public static ComputationGraphConfiguration getConf() { ComputationGraphConfiguration.GraphBuilder builder = new NeuralNetConfiguration.Builder() .seed(12345) .updater(new Adam(0.01)) .weightInit(WeightInit.RELU) .graphBuilder() .addInputs("in"); String[] poolNames = new String[ngramFilters.length]; int i = 0; for (int ngram : ngramFilters) { String filterName = String.format("ngram%d", ngram); poolNames[i] = String.format("pool%d", ngram); builder = builder.addLayer(filterName, new Convolution1DLayer.Builder() .nOut(numFilters) .kernelSize(ngram) .activation(Activation.RELU) .build(), "in") .addLayer(poolNames[i], new GlobalPoolingLayer.Builder(PoolingType.MAX).build(), filterName); i++; } return builder.addVertex("concat", new MergeVertex(), poolNames) .addLayer("predict", new DenseLayer.Builder().nOut(numClasses).dropOut(dropoutRetain) .activation(Activation.SOFTMAX).build(), "concat") .addLayer("loss", new LossLayer.Builder(LossFunctions.LossFunction.MCXENT).build(), "predict") .setOutputs("loss") .setInputTypes(InputType.recurrent(W2V_VECTOR_SIZE, 1000)) .build(); } }
@Override public InputType getOutputType(int layerIndex, InputType... vertexInputs) throws InvalidInputTypeException { if (vertexInputs.length != 1) throw new InvalidInputTypeException("Invalid input type: cannot get last time step of more than 1 input"); if (vertexInputs[0].getType() != InputType.Type.RNN) { throw new InvalidInputTypeException( "Invalid input type: cannot get subset of non RNN input (got: " + vertexInputs[0] + ")"); } return InputType.feedForward(((InputType.InputTypeRecurrent) vertexInputs[0]).getSize()); }
.activation(Activation.SOFTMAX) .build()) .setInputType(InputType.convolutionalFlat(28,28,1)) //See note below .backprop(true).pretrain(false).build(); MultiLayerNetwork model = new MultiLayerNetwork(conf);
@Override public InputType getOutputType(InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.CNN) { throw new IllegalStateException("Invalid input type: Expected input of type CNN, got " + inputType); } InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; int outSize = c.getDepth() * c.getHeight() * c.getWidth(); return InputType.recurrent(outSize); }
@Override public InputType getOutputType(InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.CNN) { throw new IllegalStateException("Invalid input type: Expected input of type CNN, got " + inputType); } InputType.InputTypeConvolutional c = (InputType.InputTypeConvolutional) inputType; int outSize = c.getDepth() * c.getHeight() * c.getWidth(); return InputType.feedForward(outSize); }
@Override public InputType getOutputType(int layerIndex, InputType... vertexInputs) throws InvalidInputTypeException { if (vertexInputs.length != 1) throw new InvalidInputTypeException("Invalid input type: cannot duplicate more than 1 input"); int tsLength = 1; //TODO work this out properly if (vertexInputs[0].getType() == InputType.Type.FF) { return InputType.recurrent(((InputType.InputTypeFeedForward) vertexInputs[0]).getSize(), tsLength); } else if (vertexInputs[0].getType() != InputType.Type.CNNFlat) { return InputType.recurrent(((InputType.InputTypeConvolutionalFlat) vertexInputs[0]).getFlattenedSize(), tsLength); } else { throw new InvalidInputTypeException( "Invalid input type: cannot duplicate to time series non feed forward (or CNN flat) input (got: " + vertexInputs[0] + ")"); } }
public static MultiLayerConfiguration lenetModelConf() { MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .l2(0.005) .activation(Activation.RELU) .weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Nesterovs(0.0001, 0.9)) .list() .layer(0, new ConvolutionLayer.Builder(new int[]{5, 5}, new int[]{1, 1}, new int[]{0, 0}).name("cnn1") .nIn(channels).nOut(50).biasInit(0).build()) .layer(1, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool1").build()) .layer(2, new ConvolutionLayer.Builder(new int[]{5,5}, new int[]{5, 5}, new int[]{1, 1}).name("cnn2") .nOut(100).biasInit(0).build()) .layer(3, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool2").build()) .layer(4, new DenseLayer.Builder().nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(4) .activation(Activation.SOFTMAX) .build()) .backprop(true).pretrain(false) .setInputType(InputType.convolutional(height, width, channels)) .build(); return conf; } public static void saveModel(FileSystem fs, Model model ) throws Exception{
@Override public InputType getOutputType(InputType inputType) { if (inputType == null || inputType.getType() != InputType.Type.RNN) { throw new IllegalStateException("Invalid input: expected input of type RNN, got " + inputType); } InputType.InputTypeRecurrent rnn = (InputType.InputTypeRecurrent) inputType; return InputType.feedForward(rnn.getSize()); }
@Override public InputPreProcessor getPreProcessorForInputType(InputType inputType) { if (inputType.getType() == InputType.Type.CNNFlat) { InputType.InputTypeConvolutionalFlat i = (InputType.InputTypeConvolutionalFlat) inputType; return new FeedForwardToCnnPreProcessor(i.getHeight(), i.getWidth(), i.getDepth()); } return null; }
/** InputType for recurrent neural network (time series) data * @param size The size of the activations * @return */ public static InputType recurrent(int size) { return new InputTypeRecurrent(size); }
/** InputType for feed forward network data * @param size The size of the activations */ public static InputType feedForward(int size) { return new InputTypeFeedForward(size); }
/**Input type for convolutional (CNN) data, that is 4d with shape [miniBatchSize, depth, height, width]. * For CNN data that has been flattened, use {@link #convolutionalFlat(int, int, int)} * @param height height of the input * @param width Width of the input * @param depth Depth, or number of channels * @return */ public static InputType convolutional(int height, int width, int depth) { return new InputTypeConvolutional(height, width, depth); }
public static InputType[] inferInputTypes(INDArray... inputArrays) { InputType[] out = new InputType[inputArrays.length]; for (int i = 0; i < inputArrays.length; i++) { out[i] = inferInputType(inputArrays[i]); } return out; }
@Override public InputType getOutputType(int layerIndex, InputType... vertexInputs) throws InvalidInputTypeException { return InputType.feedForward(1); }
/** * Input type for convolutional (CNN) data, where the data is in flattened (row vector) format. * Expect data with shape [miniBatchSize, height * width * depth]. For CNN data in 4d format, use {@link #convolutional(int, int, int)} * * @param height Height of the (unflattened) data represented by this input type * @param width Width of the (unflattened) data represented by this input type * @param depth Depth of the (unflattened) data represented by this input type * @return */ public static InputType convolutionalFlat(int height, int width, int depth) { return new InputTypeConvolutionalFlat(height, width, depth); }
@Override public InputType getOutputType(InputType inputType) { if (inputType == null || (inputType.getType() != InputType.Type.FF && inputType.getType() != InputType.Type.CNNFlat)) { throw new IllegalStateException("Invalid input: expected input of type FeedForward, got " + inputType); } if (inputType.getType() == InputType.Type.FF) { InputType.InputTypeFeedForward ff = (InputType.InputTypeFeedForward) inputType; return InputType.recurrent(ff.getSize()); } else { InputType.InputTypeConvolutionalFlat cf = (InputType.InputTypeConvolutionalFlat) inputType; return InputType.recurrent(cf.getFlattenedSize()); } }
public static MultiLayerNetwork lenetModel() { /** * Revisde Lenet Model approach developed by ramgo2 achieves slightly above random * Reference: https://gist.github.com/ramgo2/833f12e92359a2da9e5c2fb6333351c5 **/ MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder() .seed(seed) .l2(0.005) // tried 0.0001, 0.0005 .activation(Activation.RELU) .weightInit(WeightInit.XAVIER) .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT) .updater(new Nesterovs(0.0001,0.9)) .list() .layer(0, new ConvolutionLayer.Builder(new int[]{5, 5}, new int[]{1, 1}, new int[]{0, 0}).name("cnn1") .nIn(channels).nOut(50).biasInit(0).build()) .layer(1, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool1").build()) .layer(2, new ConvolutionLayer.Builder(new int[]{5,5}, new int[]{5, 5}, new int[]{1, 1}).name("cnn2") .nOut(100).biasInit(0).build()) .layer(3, new SubsamplingLayer.Builder(new int[]{2,2}, new int[]{2,2}).name("maxpool2").build()) .layer(4, new DenseLayer.Builder().nOut(500).build()) .layer(5, new OutputLayer.Builder(LossFunctions.LossFunction.NEGATIVELOGLIKELIHOOD) .nOut(4) .activation(Activation.SOFTMAX) .build()) .backprop(true).pretrain(false) .setInputType(InputType.convolutional(height, width, channels)) .build(); return new MultiLayerNetwork(conf); }
.activation(Activation.SOFTMAX) .build()) .setInputType(InputType.convolutionalFlat(28,28,1)) //See note below .backprop(true).pretrain(false).build(); MultiLayerNetwork model = new MultiLayerNetwork(conf);
/** InputType for recurrent neural network (time series) data * @param size The size of the activations * @param timeSeriesLength Length of the input time series * @return */ public static InputType recurrent(int size, int timeSeriesLength) { return new InputTypeRecurrent(size, timeSeriesLength); }
.activation(Activation.SOFTMAX) .build()) .setInputType(InputType.convolutionalFlat(28,28,1)) //See note below .backprop(true).pretrain(false).build(); MultiLayerNetwork model = new MultiLayerNetwork(conf);