public static double calculateRegularization(DoubleMatrix[] thetas, final int m, NetworkConfiguration conf) { double regularization = 0d; // only calculate the regularization term if lambda is not 0 if (conf.lambda != 0d) { for (DoubleMatrix theta : thetas) { regularization += (theta.slice(0, theta.getRowCount(), 1, theta.getColumnCount())).pow(2).sum(); } regularization = (conf.lambda / (2.0d * m)) * regularization; } return regularization; }
public static DoubleMatrix[] backwardPropagate(DoubleMatrix y, DoubleMatrix[] thetas, DoubleMatrix[] ax, DoubleMatrix[] zx, NetworkConfiguration conf) { // now backpropagate the error backwards by calculating the deltas. // also here we are following the math equations and nulling out the 0th // entry. DoubleMatrix[] deltaX = new DoubleMatrix[conf.layerSizes.length]; // set the last delta to the difference of outcome and prediction deltaX[deltaX.length - 1] = ax[conf.layerSizes.length - 1].subtract(y); // compute the deltas onto the input layer for (int i = (conf.layerSizes.length - 2); i > 0; i--) { DoubleMatrix slice = thetas[i].slice(0, thetas[i].getRowCount(), 1, thetas[i].getColumnCount()); deltaX[i] = multiply(deltaX[i + 1], slice, false, false, conf); // apply the gradient of the activations deltaX[i] = deltaX[i].multiplyElementWise(conf.activations[i] .gradient(zx[i])); } return deltaX; }
/** * Creates a weight matrix that can be used for unsupervised weight * initialization in the {@link MultilayerPerceptron}. * * @param outputLayerSize the size of the classification layer on top of this * RBM. * @return the {@link WeightMatrix} that maps layers to the weights. */ public WeightMatrix[] getNeuralNetworkWeights(int outputLayerSize) { WeightMatrix[] toReturn = new WeightMatrix[this.weights.length + 1]; // translate the matrices for (int i = 0; i < weights.length; i++) { toReturn[i] = new WeightMatrix(this.weights[i].slice(1, weights[i].getRowCount(), 0, weights[i].getColumnCount())); } // add a last layer on top of it toReturn[toReturn.length - 1] = new WeightMatrix( toReturn[toReturn.length - 2].getWeights().getRowCount(), outputLayerSize); return toReturn; }
public static void calculateGradients(DoubleMatrix[] thetas, DoubleMatrix[] thetaGradients, DoubleMatrix[] ax, DoubleMatrix[] deltaX, final int m, NetworkConfiguration conf) { // calculate the gradients of the weights for (int i = 0; i < thetaGradients.length; i++) { DoubleMatrix gradDXA = multiply(deltaX[i + 1], ax[i], true, false, conf); if (m != 1) { thetaGradients[i] = gradDXA.divide(m); } else { thetaGradients[i] = gradDXA; } if (conf.lambda != 0d) { thetaGradients[i] = thetaGradients[i].add((thetas[i] .multiply(conf.lambda / m))); // subtract the regularized bias DoubleVector regBias = thetas[i] .slice(0, thetas[i].getRowCount(), 0, 1).multiply(conf.lambda / m) .getColumnVector(0); thetaGradients[i].setColumnVector(0, regBias); } } }