private static MultiLayerConfiguration getConfiguration(){ int lstmLayerSize = 200; //Number of units in each LSTM layer int tbpttLength = 50; //Length for truncated backpropagation through time. i.e., do parameter updates ever 50 characters Map<Character, Integer> CHAR_TO_INT = SparkLSTMCharacterExample.getCharToInt(); int nIn = CHAR_TO_INT.size(); int nOut = CHAR_TO_INT.size(); //Set up network configuration: MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .updater(new Nesterovs(0.1)) .seed(12345) .l2(0.001) .weightInit(WeightInit.XAVIER) .list() .layer(0, new LSTM.Builder().nIn(nIn).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunctions.LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build(); return conf; } }
protected INDArray createUserWeightMatrix(NeuralNetConfiguration conf, INDArray weightParamView, boolean initializeParameters) { FeedForwardLayer layerConf = (FeedForwardLayer) conf.getLayer(); if (initializeParameters) { Distribution dist = Distributions.createDistribution(layerConf.getDist()); return createWeightMatrix(numUsers, layerConf.getNOut(), layerConf.getWeightInit(), dist, weightParamView, true); } else { return createWeightMatrix(numUsers, layerConf.getNOut(), null, null, weightParamView, false); } }
public static MultiLayerConfiguration lenetModelConf() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .l2(0.005) .activation(Activation.RELU) .weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Nesterovs(0.0001, 0.9)) .list() .layer(0, new ConvolutionLayer.Builder(new int[]{5, 5}, new int[]{1, 1}, new int[]{0, 0}).name("cnn1") .nIn(channels).nOut(50).biasInit(0).build()) .layer(1, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool1").build()) .layer(2, new ConvolutionLayer.Builder(new int[]{5,5}, new int[]{5, 5}, new int[]{1, 1}).name("cnn2") .nOut(100).biasInit(0).build()) .layer(3, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool2").build()) .layer(4, new DenseLayer.Builder().nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(4) .activation(Activation.SOFTMAX) .build()) .backprop(true).pretrain(false) .setInputType(InputType.convolutional(height, width, channels)) .build(); return conf; } public static void saveModel(FileSystem fs, Model model ) throws Exception{
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(12345) .l2(0.001) //l2 regularization on all layers .updater(new AdaGrad.Builder().learningRate(0.04).build()) .list() .layer(0, new ConvolutionLayer.Builder(10, 10) .nIn(3) //3 channels: RGB .nOut(30) .stride(4, 4) .activation(Activation.RELU) .weightInit(WeightInit.RELU) .build()) //Output: (130-10+0)/4+1 = 31 -> 31*31*30 .layer(1, new SubsamplingLayer.Builder(SubsamplingLayer.PoolingType.MAX) .kernelSize(3, 3) .stride(2, 2).build()) //(31-3+0)/2+1 = 15 .layer(2, new ConvolutionLayer.Builder(3, 3) .nIn(30) .nOut(10) .stride(2, 2) .activation(Activation.RELU) .weightInit(WeightInit.RELU) .build()) //Output: (15-3+0)/2+1 = 7 -> 7*7*10 = 490 .layer(3, new DenseLayer.Builder() .activation(Activation.RELU) .nIn(490) .nOut(50) .weightInit(WeightInit.RELU) .updater(new AdaGrad.Builder().learningRate(0.01).build()) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
@Override protected void trainModel() throws LibrecException { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .iterations(1) .updater(Updater.NESTEROVS) .learningRate(learningRate) .weightInit(WeightInit.XAVIER_UNIFORM) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .regularization(true) .l2(lambdaReg) .list() .layer(0, new DenseLayer.Builder().nIn(inputDim).nOut(hiddenDim) .activation(Activation.fromString(hiddenActivation)) .biasInit(0.1) .build()) .layer(1, new OutputLayer.Builder(new AutoRecLossFunction()).nIn(hiddenDim).nOut(inputDim) .activation(Activation.fromString(outputActivation)) .biasInit(0.1) .build()) .pretrain(false).backprop(true) .build();
ComputationGraphConfiguration config = new NeuralNetConfiguration.Builder() .weightInit(WeightInit.RELU) .activation(Activation.LEAKYRELU) .updater(Updater.ADADELTA) .convolutionMode(ConvolutionMode.Same) .regularization(true).dropOut(0.2) .learningRate(learnRate) .graphBuilder() .addInputs("input") .addLayer("cnn3", new ConvolutionLayer.Builder() .kernelSize(3,vectorSize) .stride(1,vectorSize) .nIn(1) .nOut(cnnLayerFeatureMaps) .build(), "input") .addLayer("cnn4", new ConvolutionLayer.Builder() .kernelSize(4,vectorSize) .stride(1,vectorSize) .nIn(1) .nOut(cnnLayerFeatureMaps) .build(), "input") .addLayer("cnn5", new ConvolutionLayer.Builder() .kernelSize(5,vectorSize) .stride(1,vectorSize) .nIn(1) .nOut(cnnLayerFeatureMaps) .build(), "input") .addVertex("merge", new MergeVertex(), "cnn3", "cnn4", "cnn5") //Perform depth concatenation .addLayer("globalPool", new GlobalPoolingLayer.Builder()
@Override protected void trainModel() throws LibrecException { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(6) .iterations(1) .updater(Updater.ADAM) .learningRate(learningRate) .weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .regularization(true) .l2(lambdaReg) .list() .layer(0, new CDAELayer.Builder().nIn(inputDim).nOut(hiddenDim) .activation(Activation.fromString(hiddenActivation)) .setNumUsers(numUsers) .build()) .layer(1, new OutputLayer.Builder().nIn(hiddenDim).nOut(inputDim) .lossFunction(LossFunctions.LossFunction.SQUARED_LOSS) .activation(Activation.fromString(outputActivation)) .build()) .pretrain(false).backprop(true) .build();
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(12345) .activation(Activation.LEAKYRELU) .weightInit(WeightInit.XAVIER) .updater(new Nesterovs(0.1))// To configure: .updater(Nesterovs.builder().momentum(0.9).build()) .l2(1e-4) .list() .layer(0, new DenseLayer.Builder().nIn(28 * 28).nOut(500).build()) .layer(1, new DenseLayer.Builder().nIn(500).nOut(100).build()) .layer(2, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .activation(Activation.SOFTMAX).nIn(100).nOut(10).build()) .pretrain(false).backprop(true) .build();
public static ComputationGraphConfiguration getConf() { ComputationGraphConfiguration.GraphBuilder builder = new NeuralNetConfiguration.Builder() .seed(12345) .updater(new Adam(0.01)) .weightInit(WeightInit.RELU) .graphBuilder() .addInputs("in"); String[] poolNames = new String[ngramFilters.length]; int i = 0; for (int ngram : ngramFilters) { String filterName = String.format("ngram%d", ngram); poolNames[i] = String.format("pool%d", ngram); builder = builder.addLayer(filterName, new Convolution1DLayer.Builder() .nOut(numFilters) .kernelSize(ngram) .activation(Activation.RELU) .build(), "in") .addLayer(poolNames[i], new GlobalPoolingLayer.Builder(PoolingType.MAX).build(), filterName); i++; } return builder.addVertex("concat", new MergeVertex(), poolNames) .addLayer("predict", new DenseLayer.Builder().nOut(numClasses).dropOut(dropoutRetain) .activation(Activation.SOFTMAX).build(), "concat") .addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.MCXENT).build(), "predict") .setOutputs("loss") .setInputTypes(InputType.recurrent(W2V_VECTOR_SIZE, 1000)) .build(); } }
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .updater(new Adam.Builder().learningRate(2e-2).build()) .l2(1e-5) .weightInit(WeightInit.XAVIER) .gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0) .list() .layer(0, new LSTM.Builder().nIn(vectorSize).nOut(256) .activation(Activation.TANH).build()) .layer(1, new RnnOutputLayer.Builder().activation(Activation.SOFTMAX) .lossFunction(LossFunctions.LossFunction.MCXENT).nIn(256).nOut(2).build()) .pretrain(false).backprop(true).build();
.add(7, 5e-4).build(); ComputationGraphConfiguration.GraphBuilder b = new NeuralNetConfiguration.Builder() .convolutionMode(ConvolutionMode.Same) .l2(1e-4) .updater(new AMSGrad(lrSchedule)) .weightInit(WeightInit.RELU) .graphBuilder() .addInputs("input") .setOutputs("output"); DarknetHelper.addLayers(b, 5, 2, 256, 512, 2); //8x8 out b.addLayer("convolution2d_6", new ConvolutionLayer.Builder(1, 1) .nIn(512) .nOut(TinyImageNetFetcher.NUM_LABELS) .weightInit(WeightInit.XAVIER) .stride(1, 1) .activation(Activation.IDENTITY) .build(), "maxpooling2d_5") .addLayer("globalpooling", new GlobalPoolingLayer.Builder(PoolingType.AVG).build(), "convolution2d_6") .addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD).activation(Activation.SOFTMAX).build(), "globalpooling") .setOutputs("loss"); ComputationGraphConfiguration conf = b.build();
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { Map<String, INDArray> params = super.init(conf, paramsView, initializeParams); FeedForwardLayer layerConf = (FeedForwardLayer) conf.getLayer(); int nIn = layerConf.getNIn(); int nOut = layerConf.getNOut(); int nWeightParams = nIn * nOut; int nUserWeightParams = numUsers * nOut; INDArray userWeightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nUserWeightParams)); params.put(USER_WEIGHT_KEY, this.createUserWeightMatrix(conf, userWeightView, initializeParams)); conf.addVariable(USER_WEIGHT_KEY); return params; }
public Map<String, INDArray> getGradientsFromFlattened(NeuralNetConfiguration conf, INDArray gradientView) { Map<String, INDArray> out = super.getGradientsFromFlattened(conf, gradientView); FeedForwardLayer layerConf = (FeedForwardLayer) conf.getLayer(); int nIn = layerConf.getNIn(); int nOut = layerConf.getNOut(); int nWeightParams = nIn * nOut; int nUserWeightParams = numUsers * nOut; INDArray userWeightGradientView = gradientView.get(NDArrayIndex.point(0), NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nUserWeightParams)) .reshape('f', numUsers, nOut); out.put(USER_WEIGHT_KEY, userWeightGradientView); return out; } }
public static void saveModel(FileSystem fs, Model model ) throws Exception{ String json = null; if (model instanceof MultiLayerNetwork) { json = ((MultiLayerNetwork)model).getLayerWiseConfigurations().toJson(); } else if (model instanceof ComputationGraph) { json = ((ComputationGraph)model).getConfiguration().toJson(); } byte [] byts = json.getBytes(); FSDataOutputStream out = fs.create(new Path(modelPath)); out.write(byts); out.hsync(); fs.close(); } }
if (conf.isUseDropConnect() && training && conf.getLayer().getDropOut() > 0) { W = Dropout.applyDropConnect(this, CDAEParamInitializer.WEIGHT_KEY);
@Override public double calcL2(boolean backpropParamsOnly) { if (!this.conf.isUseRegularization()) { return 0.0D; } else { double l2Sum = 0.0D; double l2Norm; if (this.conf.getL2ByParam("W") > 0.0D) { l2Norm = this.getParam("W").norm2Number().doubleValue(); l2Sum += 0.5D * this.conf.getL2ByParam("W") * l2Norm * l2Norm; } if (this.conf.getL2ByParam("uw") > 0.0D) { l2Norm = this.getParam("uw").norm2Number().doubleValue(); l2Sum += 0.5D * this.conf.getL2ByParam("uw") * l2Norm * l2Norm; } if (this.conf.getL2ByParam("b") > 0.0D) { l2Norm = this.getParam("b").norm2Number().doubleValue(); l2Sum += 0.5D * this.conf.getL2ByParam("b") * l2Norm * l2Norm; } return l2Sum; } } }
public static MultiLayerNetwork lenetModel() { /** * Revisde Lenet Model approach developed by ramgo2 achieves slightly above random * Reference: https://gist.github.com/ramgo2/833f12e92359a2da9e5c2fb6333351c5 **/ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .l2(0.005) // tried 0.0001, 0.0005 .activation(Activation.RELU) .weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Nesterovs(0.0001,0.9)) .list() .layer(0, new ConvolutionLayer.Builder(new int[]{5, 5}, new int[]{1, 1}, new int[]{0, 0}).name("cnn1") .nIn(channels).nOut(50).biasInit(0).build()) .layer(1, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool1").build()) .layer(2, new ConvolutionLayer.Builder(new int[]{5,5}, new int[]{5, 5}, new int[]{1, 1}).name("cnn2") .nOut(100).biasInit(0).build()) .layer(3, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool2").build()) .layer(4, new DenseLayer.Builder().nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(4) .activation(Activation.SOFTMAX) .build()) .backprop(true).pretrain(false) .setInputType(InputType.convolutional(height, width, channels)) .build(); return new MultiLayerNetwork(conf); }
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(12345) .l2(0.001) .weightInit(WeightInit.XAVIER) .updater(new RmsProp.Builder().learningRate(0.1).build()) .list() .layer(0, new LSTM.Builder().nIn(iter.inputColumns()).nOut(lstmLayerSize) .activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize) .activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build();
MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(12345) .l2(0.001) .weightInit(WeightInit.XAVIER) .updater(new RmsProp(0.1)) .list() .layer(0, new LSTM.Builder().nIn(CHAR_TO_INT.size()).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(1, new LSTM.Builder().nIn(lstmLayerSize).nOut(lstmLayerSize).activation(Activation.TANH).build()) .layer(2, new RnnOutputLayer.Builder(LossFunction.MCXENT).activation(Activation.SOFTMAX) //MCXENT + softmax for classification .nIn(lstmLayerSize).nOut(nOut).build()) .backpropType(BackpropType.TruncatedBPTT).tBPTTForwardLength(tbpttLength).tBPTTBackwardLength(tbpttLength) .pretrain(false).backprop(true) .build();