.activation(Activation.LEAKYRELU) .updater(Updater.ADADELTA) .convolutionMode(ConvolutionMode.Same) .regularization(true).dropOut(0.2) .learningRate(learnRate)
.updater(new RmsProp(0.1, 0.96, 0.001)).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0.0, 0.5)).regularization(true).l2(5e-5).miniBatch(true) .convolutionMode(ConvolutionMode.Truncate).graphBuilder();
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Adam(0.1, 0.9, 0.999, 0.01)).weightInit(WeightInit.RELU).regularization(true) .l2(5e-5).learningRate(0.1).miniBatch(true).convolutionMode(ConvolutionMode.Same) .graphBuilder();
builder.adamVarDecay(adamVarDecay.getValue(values)); if (convolutionMode != null) builder.convolutionMode(convolutionMode.getValue(values));
.activation(Activation.RELU).optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(Updater.NESTEROVS).learningRate(1e-2).biasLearningRate(1e-2 * 2).regularization(true) .convolutionMode(ConvolutionMode.Same)
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new AdaDelta()).regularization(false) .convolutionMode(ConvolutionMode.Same).list()
.updater(new RmsProp(0.1, 0.96, 0.001)).weightInit(WeightInit.DISTRIBUTION) .dist(new NormalDistribution(0.0, 0.5)).regularization(true).l1(1e-7).l2(5e-5).miniBatch(true) .convolutionMode(ConvolutionMode.Truncate).graphBuilder();
public MultiLayerConfiguration conf() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder().trainingWorkspaceMode(workspaceMode) .inferenceWorkspaceMode(workspaceMode).seed(seed).iterations(iterations) .activation(Activation.IDENTITY).weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).updater(new AdaDelta()) .regularization(false).convolutionMode(ConvolutionMode.Same).list() // block 1 .layer(0, new ConvolutionLayer.Builder(new int[] {5, 5}, new int[] {1, 1}).name("cnn1") .nIn(inputShape[0]).nOut(20).activation(Activation.RELU).build()) .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}, new int[] {2, 2}).name("maxpool1").build()) // block 2 .layer(2, new ConvolutionLayer.Builder(new int[] {5, 5}, new int[] {1, 1}).name("cnn2").nOut(50) .activation(Activation.RELU).build()) .layer(3, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX, new int[] {2, 2}, new int[] {2, 2}).name("maxpool2").build()) // fully connected .layer(4, new DenseLayer.Builder().name("ffn1").activation(Activation.RELU).nOut(500).build()) // output .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.MCXENT).name("output") .nOut(numLabels).activation(Activation.SOFTMAX) // radial basis function required .build()) .setInputType(InputType.convolutionalFlat(inputShape[2], inputShape[1], inputShape[0])) .backprop(true).pretrain(false).build(); return conf; }
.convolutionMode(ConvolutionMode.Same) .l2(1e-4) .updater(new AMSGrad(lrSchedule))