.layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build();
.backpropType(BackpropType.TruncatedBPTT) .tBPTTForwardLength(V_NFRAMES / 5) .tBPTTBackwardLength(V_NFRAMES / 5) .build();
private static MultiLayerConfiguration getConfiguration(){ int lstmLayerSize = 200; //Number of units in each LSTM layer int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters Map<Character, Integer> CHAR_TO_INT = SparkLSTMCharacterExample.getCharToInt(); int nIn = CHAR_TO_INT.size(); int nOut = CHAR_TO_INT.size(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .updater(new Nesterovs(0.1)) .seed(12345) .l2(0.001) .weightInit(WeightInit.XAVIER) .list() .layer(0, new LSTM.Builder().nIn(nIn).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build(); return conf; } }
.tBPTTBackwardLength(truncatedBPTT); else listBuilder.backpropType(BackpropType.Standard);
private void initHelperMLN() { if (applyFrozen) { org.deeplearning4j.nn.api.Layer[] layers = origMLN.getLayers(); for (int i = frozenTill; i >= 0; i--) { //unchecked? layers[i] = new FrozenLayer(layers[i]); } origMLN.setLayers(layers); } for (int i = 0; i < origMLN.getnLayers(); i++) { if (origMLN.getLayer(i) instanceof FrozenLayer) { frozenInputLayer = i; } } List<NeuralNetConfiguration> allConfs = new ArrayList<>(); for (int i = frozenInputLayer + 1; i < origMLN.getnLayers(); i++) { allConfs.add(origMLN.getLayer(i).conf()); } MultiLayerConfiguration c = origMLN.getLayerWiseConfigurations(); unFrozenSubsetMLN = new MultiLayerNetwork(new MultiLayerConfiguration.Builder().backprop(c.isBackprop()) .inputPreProcessors(c.getInputPreProcessors()).pretrain(c.isPretrain()) .backpropType(c.getBackpropType()).tBPTTForwardLength(c.getTbpttFwdLength()) .tBPTTBackwardLength(c.getTbpttBackLength()).confs(allConfs).build()); unFrozenSubsetMLN.init(); //copy over params for (int i = frozenInputLayer + 1; i < origMLN.getnLayers(); i++) { unFrozenSubsetMLN.getLayer(i - frozenInputLayer - 1).setParams(origMLN.getLayer(i).params()); } //unFrozenSubsetMLN.setListeners(origMLN.getListeners()); }
.tBPTTBackwardLength(tbpttBackLength).setInputType(this.inputType) .trainingWorkspaceMode(globalConfig.trainingWorkspaceMode).cacheMode(globalConfig.cacheMode) .inferenceWorkspaceMode(globalConfig.inferenceWorkspaceMode).confs(list).build();
public MultiLayerConfiguration conf() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1) .learningRate(0.01).seed(12345).regularization(true).l2(0.001).weightInit(WeightInit.XAVIER) .updater(new RmsProp()).list() .layer(0, new GravesLSTM.Builder().nIn(inputShape[1]).nOut(256).activation(Activation.TANH) .build()) .layer(1, new GravesLSTM.Builder().nOut(256).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT) .activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nOut(totalUniqueCharacters).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(50).tBPTTBackwardLength(50) .pretrain(false).backprop(true).build(); return conf; }
/**When doing truncated BPTT: how many steps should we do?<br> * Only applicable when doing backpropType(BackpropType.TruncatedBPTT)<br> * See: http://www.cs.utoronto.ca/~ilya/pubs/ilya_sutskever_phd_thesis.pdf * @param bpttLength length > 0 */ public Builder tBPTTLength(int bpttLength) { tBPTTForwardLength(bpttLength); return tBPTTBackwardLength(bpttLength); }
.layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build();