.weightInit(WeightInit.RELU) .layer(3, new DenseLayer.Builder() .activation(Activation.RELU) .nIn(490) .build()) .layer(4, new LSTM.Builder() .activation(Activation.SOFTSIGN) .nIn(50) .nOut(50) .weightInit(WeightInit.XAVIER) .updater(new AdaGrad.Builder().learningRate(0.008).build()) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue) .gradientNormalizationThreshold(10) .build()) .layer(5, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX)
switch (arguments.rnnKind) { case CUDNN_LSTM: rnnBuilder = new LSTM.Builder(); ((LSTM.Builder) rnnBuilder).nOut(numHiddenNodes).learningRateDecayPolicy(LEARNING_RATE_POLICY); break; case DL4J_BidirectionalGraves:
String lstmPreviousLayerName = i == 0 ? "indel" : "lstmindel_" + (i - 1); int numLSTMInputNodes = i == 0 ? numLSTMInputs : numLSTMIndelHiddenNodes; build.addLayer(lstmLayerName, new LSTM.Builder() .nIn(numLSTMInputNodes) .nOut(numLSTMIndelHiddenNodes) .build(), lstmPreviousLayerName);
.updater(new RmsProp(0.1)) .list() .layer(0, new LSTM.Builder().nIn(CHAR_TO_INT.size()).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build())
.l2(0.001) .weightInit(WeightInit.XAVIER) .updater(new RmsProp.Builder().learningRate(0.1).build()) .list() .layer(0, new LSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) .activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize) .activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build())
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0) .list() .layer(0, new LSTM.Builder().nIn(vectorSize).nOut(256) .activation(Activation.TANH).build()) .layer(1, new RnnOutputLayer.Builder().activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).nIn(256).nOut(2).build())
private static MultiLayerConfiguration getConfiguration(){ int lstmLayerSize = 200; //Number of units in each LSTM layer int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters Map<Character, Integer> CHAR_TO_INT = SparkLSTMCharacterExample.getCharToInt(); int nIn = CHAR_TO_INT.size(); int nOut = CHAR_TO_INT.size(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .updater(new Nesterovs(0.1)) .seed(12345) .l2(0.001) .weightInit(WeightInit.XAVIER) .list() .layer(0, new LSTM.Builder().nIn(nIn).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build(); return conf; } }