.seed(12345) .activation(Activation.LEAKYRELU) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.1))// To configure: .updater(Nesterovs.builder().momentum(0.9).build()) .l2(1e-4)
.seed(12345) .l2(0.001) .weightInit(WeightInit.XAVIER) .updater(new RmsProp(0.1)) .list()
.weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.02))// To configure: .updater(Nesterovs.builder().momentum(0.9).build()) .l2(1e-4)
public static void main(String[] args){ //Generate the training data DataSetIterator iterator = getTrainingData(batchSize,rng); //Create the network int numInput = 2; int numOutputs = 1; MultiLayerNetwork net = new MultiLayerNetwork(new NeuralNetConfiguration.Builder() .seed(seed) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER) .updater(new Sgd(learningRate)) .list() .layer(0, new OutputLayer.Builder(LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY) .nIn(numInput).nOut(numOutputs).build()) .pretrain(false).backprop(true).build() ); net.init(); net.setListeners(new ScoreIterationListener(1)); for( int i=0; i<nEpochs; i++ ){ iterator.reset(); net.fit(iterator); } final INDArray input = Nd4j.create(new double[] { 0.111111, 0.3333333333333 }, new int[] { 1, 2 }); INDArray out = net.output(input, false); System.out.println(out); }
public static void main(String[] args) throws Exception { //Define a simple ComputationGraph: ComputationGraphConfiguration conf = new NeuralNetConfiguration.Builder() .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.01, 0.9)) .graphBuilder() .addInputs("in") .addLayer("layer0", new DenseLayer.Builder().nIn(4).nOut(3).activation(Activation.TANH).build(), "in") .addLayer("layer1", new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).activation(Activation.SOFTMAX).nIn(3).nOut(3).build(), "layer0") .setOutputs("layer1") .backprop(true).pretrain(false).build(); ComputationGraph net = new ComputationGraph(conf); net.init(); //Save the model File locationToSave = new File("model/MyComputationGraph.zip"); //Where to save the network. Note: the file is in .zip format - can be opened externally boolean saveUpdater = true; //Updater: i.e., the state for Momentum, RMSProp, Adagrad etc. Save this if you want to train your network more in the future ModelSerializer.writeModel(net, locationToSave, saveUpdater); //Load the model ComputationGraph restored = ModelSerializer.restoreComputationGraph(locationToSave); System.out.println("Saved and loaded parameters are equal: " + net.params().equals(restored.params())); System.out.println("Saved and loaded configurations are equal: " + net.getConfiguration().equals(restored.getConfiguration())); }
/** Returns the network configuration, 2 hidden DenseLayers of size 50. */ private static MultiLayerConfiguration getDeepDenseLayerNetworkConfiguration() { final int numHiddenNodes = 100; return new NeuralNetConfiguration.Builder() .seed(seed) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(learningRate, 0.9)) .list() .layer(0, new DenseLayer.Builder().nIn(numInputs).nOut(numHiddenNodes) .activation(Activation.RELU).build()) .layer(1, new DenseLayer.Builder().nIn(numHiddenNodes).nOut(numHiddenNodes) .activation(Activation.RELU).build()) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.MSE) .activation(Activation.IDENTITY) .nIn(numHiddenNodes).nOut(numOutputs).build()) .pretrain(false).backprop(true).build(); }
public MultiLayerConfiguration conf() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) .learningRate(0.01).seed(12345).regularization(true).l2(0.001).weightInit(WeightInit.XAVIER) .updater(new RmsProp()).list() .layer(0, new GravesLSTM.Builder().nIn(inputShape[1]).nOut(256).activation(Activation.TANH) .build()) .layer(1, new GravesLSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nOut(totalUniqueCharacters).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(50).tBPTTBackwardLength(50) .pretrain(false).backprop(true).build(); return conf; }
.seed(seed) .l2(0.0005) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs.Builder().learningRate(.01).build()) .biasUpdater(new Nesterovs.Builder().learningRate(0.02).build())
.seed(seed) .l2(0.0005) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs.Builder().learningRate(.01).build()) .biasUpdater(new Nesterovs.Builder().learningRate(0.02).build())
.seed(seed) .l2(0.0005) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs.Builder().learningRate(.01).build()) .biasUpdater(new Nesterovs.Builder().learningRate(0.02).build())
.seed(12345) .l2(0.001) .weightInit(WeightInit.XAVIER) .updater(new RmsProp.Builder().learningRate(0.1).build()) .list()
.updater(new Adam.Builder().learningRate(2e-2).build()) .l2(1e-5) .weightInit(WeightInit.XAVIER) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0) .list()
.l2(1e-4) .updater(new AMSGrad(lrSchedule)) .weightInit(WeightInit.RELU) .graphBuilder() .addInputs("input")
.weightInit(WeightInit.RELU) .activation(Activation.LEAKYRELU) .updater(Updater.ADADELTA)
public static MultiLayerNetwork lenetModel() { /** * Revisde Lenet Model approach developed by ramgo2 achieves slightly above random * Reference: https://gist.github.com/ramgo2/833f12e92359a2da9e5c2fb6333351c5 **/ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .l2(0.005) // tried 0.0001, 0.0005 .activation(Activation.RELU) .weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Nesterovs(0.0001,0.9)) .list() .layer(0, new ConvolutionLayer.Builder(new int[]{5, 5}, new int[]{1, 1}, new int[]{0, 0}).name("cnn1") .nIn(channels).nOut(50).biasInit(0).build()) .layer(1, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool1").build()) .layer(2, new ConvolutionLayer.Builder(new int[]{5,5}, new int[]{5, 5}, new int[]{1, 1}).name("cnn2") .nOut(100).biasInit(0).build()) .layer(3, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool2").build()) .layer(4, new DenseLayer.Builder().nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(4) .activation(Activation.SOFTMAX) .build()) .backprop(true).pretrain(false) .setInputType(InputType.convolutional(height, width, channels)) .build(); return new MultiLayerNetwork(conf); }
.updater(Updater.NESTEROVS) .learningRate(learningRate) .weightInit(WeightInit.XAVIER_UNIFORM) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .regularization(true)
.updater(Updater.ADAM) .learningRate(learningRate) .weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .regularization(true)
public static ComputationGraphConfiguration getConf() { ComputationGraphConfiguration.GraphBuilder builder = new NeuralNetConfiguration.Builder() .seed(12345) .updater(new Adam(0.01)) .weightInit(WeightInit.RELU) .graphBuilder() .addInputs("in"); String[] poolNames = new String[ngramFilters.length]; int i = 0; for (int ngram : ngramFilters) { String filterName = String.format("ngram%d", ngram); poolNames[i] = String.format("pool%d", ngram); builder = builder.addLayer(filterName, new Convolution1DLayer.Builder() .nOut(numFilters) .kernelSize(ngram) .activation(Activation.RELU) .build(), "in") .addLayer(poolNames[i], new GlobalPoolingLayer.Builder(PoolingType.MAX).build(), filterName); i++; } return builder.addVertex("concat", new MergeVertex(), poolNames) .addLayer("predict", new DenseLayer.Builder().nOut(numClasses).dropOut(dropoutRetain) .activation(Activation.SOFTMAX).build(), "concat") .addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.MCXENT).build(), "predict") .setOutputs("loss") .setInputTypes(InputType.recurrent(W2V_VECTOR_SIZE, 1000)) .build(); } }
private static MultiLayerConfiguration getConfiguration(){ int lstmLayerSize = 200; //Number of units in each LSTM layer int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters Map<Character, Integer> CHAR_TO_INT = SparkLSTMCharacterExample.getCharToInt(); int nIn = CHAR_TO_INT.size(); int nOut = CHAR_TO_INT.size(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .updater(new Nesterovs(0.1)) .seed(12345) .l2(0.001) .weightInit(WeightInit.XAVIER) .list() .layer(0, new LSTM.Builder().nIn(nIn).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build(); return conf; } }
public static MultiLayerConfiguration lenetModelConf() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .l2(0.005) .activation(Activation.RELU) .weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Nesterovs(0.0001, 0.9)) .list() .layer(0, new ConvolutionLayer.Builder(new int[]{5, 5}, new int[]{1, 1}, new int[]{0, 0}).name("cnn1") .nIn(channels).nOut(50).biasInit(0).build()) .layer(1, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool1").build()) .layer(2, new ConvolutionLayer.Builder(new int[]{5,5}, new int[]{5, 5}, new int[]{1, 1}).name("cnn2") .nOut(100).biasInit(0).build()) .layer(3, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool2").build()) .layer(4, new DenseLayer.Builder().nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(4) .activation(Activation.SOFTMAX) .build()) .backprop(true).pretrain(false) .setInputType(InputType.convolutional(height, width, channels)) .build(); return conf; } public static void saveModel(FileSystem fs, Model model ) throws Exception{