public static void main(String args[]) throws Exception { ProblemTransformationMethod.evaluation(new BPNN(), args); } }
/** * Update - A single training epoch. */ public double update(double X_[][], double Y_[][]) throws Exception { int N = X_.length; double E = 0.0; for(int i = 0; i < N; i++) { E += this.backPropagate(new double[][]{X_[i]},new double[][]{Y_[i]}); } return E; }
/** * Forward Pass - Given input X_, get output Y_. * @param X_ input * @return Y_ output */ public double[][] popY(double X_[][]) { Matrix Z[] = forwardPass(X_); int n = Z.length-1; return Z[n].getArray(); }
/** * Train - Train for I iterations. * I is not necessarily m_E (yet)! */ public double train(double[][] X_, double[][] Y_, int I) throws Exception { if (getDebug()) { System.out.println("BPNN train; For "+I+" epochs ..."); } int N = X_.length; boolean breakEarly = (I < 0) ? true : false; I = Math.abs(I); double E_ = Double.MAX_VALUE; double E = 0.0; for(int e = 0; e < I; e++) { E = update(X_,Y_); if (breakEarly && E > E_) { if (getDebug()) System.out.println(" early stopped at epcho "+e+" ... "); break; // positive gradient } E_ = E; } if (getDebug()) System.out.println("Done."); return E; }
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D); double X_[][] = MLUtils.getXfromD(D); double Y_[][] = MLUtils.getYfromD(D); r = new Random(m_Seed); if (this.W == null) { if (getDebug()) System.out.println("initialize weights ..."); int h[] = new int[]{m_H}; // TODO: parameterize this int d = X_[0].length; int L = D.classIndex(); initWeights(d,L,h); } // else ... probably pre-initialized, continue ... else if (getDebug()) System.out.println("weights already preset, continue ..."); train(X_,Y_,m_E); }
m_Classifier = new BPNN(); ((BPNN)m_Classifier).presetWeights(W,L); // this W will be modified ((BPNN)m_Classifier).train(X_,Y_); // could also have called buildClassifier(D) double Ypred[][] = ((BPNN)m_Classifier).popY(X_); System.out.println("Y = \n"+ MatrixUtils.toString(MatrixUtils.threshold(Ypred, 0.5)));
/** * InitWeights - Initialize a BPNN of H.length hidden layers with H[0], H[1], etc hidden units in each layer (W will be random, and of the corresponding dimensions). * @param d number of visible units * @param L number of labels (output units) * @param H number of units in hidden layers, H.length = number of hidden layers. CURRENTLY LIMITED TO 1. */ public void initWeights(int d, int L, int H[]) throws Exception { int numHidden = H.length; if (getDebug()) { System.out.println("Initializing "+(H.length)+" hidden Layers ..."); System.out.println("d = "+d); System.out.println("L = "+L); } // We need weights for Z to Y, as well as from X to Z Matrix W[] = new Matrix[H.length+1]; int h = H[0]; H = new int[]{d,h,L}; // Hidden layers System.out.println(""+Arrays.toString(H)); for(int n = 0; n < H.length-1; n++) { W[n] = MatrixUtils.randomn(H[n] + 1, H[n + 1], r).timesEquals(0.1); if (getDebug()) System.out.println("W["+n+"] = "+(H[n]+1)+" x "+H[n+1]); } //setWeights(W, L); this.W = W; makeMomentumMatrices(); }
/** * Forward Pass - Given input x_, get output y_. * @param x_ input * @return y_ output */ public double[] popy(double x_[]) { return popY(new double[][]{x_})[0]; }
@Override public double[] distributionForInstance(Instance xy) throws Exception { double x[] = MLUtils.getxfromInstance(xy); return popy(x); }
/** * Preset Weights - Initialize a BPNN with (pre-trained) weight matrices W (which also determines X dimensions). * @param W pre-trained weight matrix (should include bias weights, assume W[-1]-1 hidden units in penultimate layer not including bias]) * @param L the number of labels (for making the final matrix) */ public void presetWeights(Matrix W[], int L) throws Exception { r = new Random(0); this.W = new Matrix[W.length+1]; for(int l = 0; l < W.length; l++) { this.W[l] = W[l]; } int h = W[1].getRowDimension()-1; this.W[W.length] = MatrixUtils.randomn(h + 1, L, r).timesEquals(0.1); makeMomentumMatrices(); }
/** * Forward Pass - Given input X_, get output of all layers Z[0]... * @param X_ input (no bias included) * @return output Z[] = {X,Z1,Z2,...,Y} */ public Matrix[] forwardPass(double X_[][]) { int numW = W.length; // number of weight matrices Matrix Z[] = new Matrix[numW+1]; // input activations Z[0] = new Matrix(MatrixUtils.addBias(X_)); // hidden layer(s) int i = 1; for(i = 1; i < numW; i++) { if (getDebug()) System.out.print("DO: ["+i+"] "+ MatrixUtils.getDim(Z[i - 1].getArray())+" * "+ MatrixUtils.getDim(W[i - 1].getArray())+" => "); Matrix A_z = Z[i-1].times(W[i-1]); // A = X * W1 = Z[n-1] * W[n-1] Z[i] = MatrixUtils.sigma(A_z); Z[i] = MatrixUtils.addBias(Z[i]); // ACTIVATIONS Z[n] = sigma(A) = if (getDebug()) System.out.println("==: "+ MatrixUtils.getDim(A_z.getArray())); } // output layer if (getDebug()) System.out.print("DX: ["+i+"] "+ MatrixUtils.getDim(Z[i - 1].getArray())+" * "+ MatrixUtils.getDim(W[i - 1].getArray())+" => "); Matrix A_y = Z[i-1].times(W[i-1]); // A = X * W1 = Z[n-1] * W[n-1] if (getDebug()) System.out.println("==: "+ MatrixUtils.getDim(A_y.getArray())); Z[numW] = MatrixUtils.sigma(A_y); // ACTIVATIONS Z[n] = sigma(A) = return Z; }
@Override public void buildClassifier(Instances D) throws Exception { testCapabilities(D); double X_[][] = MLUtils.getXfromD(D); double Y_[][] = MLUtils.getYfromD(D); r = new Random(m_Seed); if (this.W == null) { if (getDebug()) System.out.println("initialize weights ..."); int h[] = new int[]{m_H}; // TODO: parameterize this int d = X_[0].length; int L = D.classIndex(); initWeights(d,L,h); } // else ... probably pre-initialized, continue ... else if (getDebug()) System.out.println("weights already preset, continue ..."); train(X_,Y_,m_E); }
m_Classifier = new BPNN(); ((BPNN)m_Classifier).presetWeights(W,L); // this W will be modified ((BPNN)m_Classifier).train(X_,Y_); // could also have called buildClassifier(D) double Ypred[][] = ((BPNN)m_Classifier).popY(X_); System.out.println("Y = \n"+ MatrixUtils.toString(MatrixUtils.threshold(Ypred, 0.5)));
/** * Train - Train for I iterations. * I is not necessarily m_E (yet)! */ public double train(double[][] X_, double[][] Y_, int I) throws Exception { if (getDebug()) { System.out.println("BPNN train; For "+I+" epochs ..."); } int N = X_.length; boolean breakEarly = (I < 0) ? true : false; I = Math.abs(I); double E_ = Double.MAX_VALUE; double E = 0.0; for(int e = 0; e < I; e++) { E = update(X_,Y_); if (breakEarly && E > E_) { if (getDebug()) System.out.println(" early stopped at epcho "+e+" ... "); break; // positive gradient } E_ = E; } if (getDebug()) System.out.println("Done."); return E; }
/** * InitWeights - Initialize a BPNN of H.length hidden layers with H[0], H[1], etc hidden units in each layer (W will be random, and of the corresponding dimensions). * @param d number of visible units * @param L number of labels (output units) * @param H number of units in hidden layers, H.length = number of hidden layers. CURRENTLY LIMITED TO 1. */ public void initWeights(int d, int L, int H[]) throws Exception { int numHidden = H.length; if (getDebug()) { System.out.println("Initializing "+(H.length)+" hidden Layers ..."); System.out.println("d = "+d); System.out.println("L = "+L); } // We need weights for Z to Y, as well as from X to Z Matrix W[] = new Matrix[H.length+1]; int h = H[0]; H = new int[]{d,h,L}; // Hidden layers System.out.println(""+Arrays.toString(H)); for(int n = 0; n < H.length-1; n++) { W[n] = MatrixUtils.randomn(H[n] + 1, H[n + 1], r).timesEquals(0.1); if (getDebug()) System.out.println("W["+n+"] = "+(H[n]+1)+" x "+H[n+1]); } //setWeights(W, L); this.W = W; makeMomentumMatrices(); }
/** * Forward Pass - Given input x_, get output y_. * @param x_ input * @return y_ output */ public double[] popy(double x_[]) { return popY(new double[][]{x_})[0]; }
@Override public double[] distributionForInstance(Instance xy) throws Exception { double x[] = MLUtils.getxfromInstance(xy); return popy(x); }
/** * Preset Weights - Initialize a BPNN with (pre-trained) weight matrices W (which also determines X dimensions). * @param W pre-trained weight matrix (should include bias weights, assume W[-1]-1 hidden units in penultimate layer not including bias]) * @param L the number of labels (for making the final matrix) */ public void presetWeights(Matrix W[], int L) throws Exception { r = new Random(0); this.W = new Matrix[W.length+1]; for(int l = 0; l < W.length; l++) { this.W[l] = W[l]; } int h = W[1].getRowDimension()-1; this.W[W.length] = MatrixUtils.randomn(h + 1, L, r).timesEquals(0.1); makeMomentumMatrices(); }
/** * Forward Pass - Given input X_, get output of all layers Z[0]... * @param X_ input (no bias included) * @return output Z[] = {X,Z1,Z2,...,Y} */ public Matrix[] forwardPass(double X_[][]) { int numW = W.length; // number of weight matrices Matrix Z[] = new Matrix[numW+1]; // input activations Z[0] = new Matrix(MatrixUtils.addBias(X_)); // hidden layer(s) int i = 1; for(i = 1; i < numW; i++) { if (getDebug()) System.out.print("DO: ["+i+"] "+ MatrixUtils.getDim(Z[i - 1].getArray())+" * "+ MatrixUtils.getDim(W[i - 1].getArray())+" => "); Matrix A_z = Z[i-1].times(W[i-1]); // A = X * W1 = Z[n-1] * W[n-1] Z[i] = MatrixUtils.sigma(A_z); Z[i] = MatrixUtils.addBias(Z[i]); // ACTIVATIONS Z[n] = sigma(A) = if (getDebug()) System.out.println("==: "+ MatrixUtils.getDim(A_z.getArray())); } // output layer if (getDebug()) System.out.print("DX: ["+i+"] "+ MatrixUtils.getDim(Z[i - 1].getArray())+" * "+ MatrixUtils.getDim(W[i - 1].getArray())+" => "); Matrix A_y = Z[i-1].times(W[i-1]); // A = X * W1 = Z[n-1] * W[n-1] if (getDebug()) System.out.println("==: "+ MatrixUtils.getDim(A_y.getArray())); Z[numW] = MatrixUtils.sigma(A_y); // ACTIVATIONS Z[n] = sigma(A) = return Z; }
/** * Forward Pass - Given input X_, get output Y_. * @param X_ input * @return Y_ output */ public double[][] popY(double X_[][]) { Matrix Z[] = forwardPass(X_); int n = Z.length-1; return Z[n].getArray(); }