public int numParams(NeuralNetConfiguration conf) { FeedForwardLayer layerConf = (FeedForwardLayer) conf.getLayer(); return super.numParams(conf) + numUsers * layerConf.getNOut(); // plus another user weight matrix }
public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { Map<String, INDArray> params = super.init(conf, paramsView, initializeParams); FeedForwardLayer layerConf = (FeedForwardLayer) conf.getLayer(); int nIn = layerConf.getNIn(); int nOut = layerConf.getNOut(); int nWeightParams = nIn * nOut; int nUserWeightParams = numUsers * nOut; INDArray userWeightView = paramsView.get(NDArrayIndex.point(0), NDArrayIndex.interval(nWeightParams + nOut, nWeightParams + nOut + nUserWeightParams)); params.put(USER_WEIGHT_KEY, this.createUserWeightMatrix(conf, userWeightView, initializeParams)); conf.addVariable(USER_WEIGHT_KEY); return params; }
@Override public double calcL2(boolean backpropParamsOnly) { if (!this.conf.isUseRegularization()) { return 0.0D; } else { double l2Sum = 0.0D; double l2Norm; if (this.conf.getL2ByParam("W") > 0.0D) { l2Norm = this.getParam("W").norm2Number().doubleValue(); l2Sum += 0.5D * this.conf.getL2ByParam("W") * l2Norm * l2Norm; } if (this.conf.getL2ByParam("uw") > 0.0D) { l2Norm = this.getParam("uw").norm2Number().doubleValue(); l2Sum += 0.5D * this.conf.getL2ByParam("uw") * l2Norm * l2Norm; } if (this.conf.getL2ByParam("b") > 0.0D) { l2Norm = this.getParam("b").norm2Number().doubleValue(); l2Sum += 0.5D * this.conf.getL2ByParam("b") * l2Norm * l2Norm; } return l2Sum; } } }
if (conf.isUseDropConnect() && training && conf.getLayer().getDropOut() > 0) { W = Dropout.applyDropConnect(this, CDAEParamInitializer.WEIGHT_KEY);
NeuralNetConfiguration layerNNC = origNNC.clone(); editedModel.getLayerWiseConfigurations().getConf(i).resetVariables(); layers[i].setConf(layerNNC); layers[i] = new FrozenLayer(layers[i]); if (origNNC.getVariables() != null) { List<String> vars = origNNC.variables(true); origNNC.clearVariables(); layerNNC.clearVariables(); for (String s : vars) { origNNC.variables(false).add(s); origNNC.getL1ByParam().put(s, 0.0); origNNC.getL2ByParam().put(s, 0.0); origNNC.getLearningRateByParam().put(s, 0.0); layerNNC.variables(false).add(s); layerNNC.getL1ByParam().put(s, 0.0); layerNNC.getL2ByParam().put(s, 0.0); layerNNC.getLearningRateByParam().put(s, 0.0); Layer origLayerConf = editedModel.getLayerWiseConfigurations().getConf(i).getLayer(); Layer newLayerConf = new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer(origLayerConf); newLayerConf.setLayerName(origLayerConf.getLayerName()); editedModel.getLayerWiseConfigurations().getConf(i).setLayer(newLayerConf);
if (currentLayer.numParams() > 0) { paramShape = ""; in = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNIn()); out = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNOut()); Set<String> paraNames = currentLayer.conf().getLearningRateByParam().keySet(); for (String aP : paraNames) { String paramS = ArrayUtils.toString(currentLayer.paramTable().get(aP).shape());
double decayRate = layer.conf().getLrPolicyDecayRate(); double lr = conf.getLearningRateByParam(variable); if (!(conf.getLayer() instanceof BaseLayer)) { BaseLayer baseLayer = (BaseLayer) conf.getLayer(); break; case Inverse: newLr = lr / Math.pow((1 + decayRate * iteration), conf.getLrPolicyPower()); break; case Step: newLr = lr * Math.pow(decayRate, Math.floor(iteration / conf.getLrPolicySteps())); break; case TorchStep: if (iteration > 1 && conf.getLrPolicySteps() % iteration == 0) { newLr = lr * decayRate; } else { newLr = lr * Math.pow((1 - ((double) iteration) / conf.getNumIterations()), conf.getLrPolicyPower()); break; case Sigmoid: newLr = lr / (1 + Math.exp(-decayRate * (iteration - conf.getLrPolicySteps()))); break; case Schedule: vs.getLayer().conf().setLearningRateByParam(vs.getParamName(), newLr); if (((BaseLayer) layer.conf().getLayer()).getIUpdater() instanceof Nesterovs) { ((BaseLayer) vs.getLayer().conf().getLayer()).setMomentum(newMomentum);
LearningRatePolicy lp1 = layer1.conf().getLearningRatePolicy(); LearningRatePolicy lp2 = layer2.conf().getLearningRatePolicy(); double lr1 = layer1.conf().getLearningRateByParam(param1); double lr2 = layer2.conf().getLearningRateByParam(param2); if (lr1 != lr2) { return false; double dr1 = layer1.conf().getLrPolicyDecayRate(); double dr2 = layer2.conf().getLrPolicyDecayRate(); break; case Inverse: lrConfigEqual = dr1 == dr2 && layer1.conf().getLrPolicyPower() == layer2.conf().getLrPolicyPower(); break; case Poly: lrConfigEqual = layer1.conf().getLrPolicyPower() == layer2.conf().getLrPolicyPower(); break; case Sigmoid: lrConfigEqual = dr1 == dr2 && layer1.conf().getLrPolicySteps() == layer2.conf().getLrPolicySteps(); break; case Step: lrConfigEqual = dr1 == dr2 && layer1.conf().getLrPolicySteps() == layer2.conf().getLrPolicySteps(); break; case TorchStep: lrConfigEqual = layer1.conf().getLrPolicyPower() == layer2.conf().getLrPolicyPower(); break; case Schedule: BaseLayer bl1 = (BaseLayer) layer1.conf().getLayer();
@Override public Map<String, INDArray> init(NeuralNetConfiguration conf, INDArray paramsView, boolean initializeParams) { FrozenLayer fl = (FrozenLayer) conf.getLayer(); Layer innerLayer = fl.getLayer(); ParamInitializer initializer = innerLayer.initializer(); conf.setLayer(innerLayer); Map<String, INDArray> m = initializer.init(conf, paramsView, initializeParams); conf.setLayer(fl); return m; }
public String toJson() { try { return NeuralNetConfiguration.mapper().writeValueAsString(this); } catch (JsonProcessingException e) { throw new RuntimeException(e); } }
try { MultiLayerConfiguration conf = MultiLayerConfiguration.fromJson(FileUtils.readFileToString(new File(modelPath))); FeedForwardLayer outputLayer = (FeedForwardLayer) conf.getConf(conf.getConfs().size() - 1).getLayer(); NeuralNetConfiguration conf = NeuralNetConfiguration.fromJson(FileUtils.readFileToString(new File(modelPath))); LayerFactory factory = LayerFactories.getFactory(conf); int numParams = LayerFactories.getFactory(conf).initializer().numParams(conf,true);
@Override public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection<IterationListener> iterationListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams) { //Need to be able to instantiate a layer, from a config - for JSON -> net type situations org.deeplearning4j.nn.api.Layer underlying = layer.instantiate(getInnerConf(conf), iterationListeners, layerIndex, layerParamsView, initializeParams); NeuralNetConfiguration nncUnderlying = underlying.conf(); if (nncUnderlying.variables() != null) { List<String> vars = nncUnderlying.variables(true); nncUnderlying.clearVariables(); conf.clearVariables(); for (String s : vars) { conf.variables(false).add(s); conf.getL1ByParam().put(s, 0.0); conf.getL2ByParam().put(s, 0.0); conf.getLearningRateByParam().put(s, 0.0); nncUnderlying.variables(false).add(s); nncUnderlying.getL1ByParam().put(s, 0.0); nncUnderlying.getL2ByParam().put(s, 0.0); nncUnderlying.getLearningRateByParam().put(s, 0.0); } } return new org.deeplearning4j.nn.layers.FrozenLayer(underlying); }
@Override public void applyLearningRateScoreDecay() { for (Map.Entry<String, Double> lrPair : conf.getLearningRateByParam().entrySet()) conf.setLearningRateByParam(lrPair.getKey(), lrPair.getValue() * (conf.getLrPolicyDecayRate() + Nd4j.EPS_THRESHOLD)); } }
/** * setup the local DBN instance based on conf params * */ @Override public void setup(Configuration conf) { NeuralNetConfiguration conf2 = NeuralNetConfiguration.fromJson(conf.get(NEURAL_NET_CONF)); neuralNetwork = conf2.getLayerFactory().create(conf2); }
private void decreaseLearningRate(ComputationGraph computationGraph) { for (Layer layer : computationGraph.getLayers()) { if (!layer.conf().getLearningRateByParam().isEmpty()) { for (Map.Entry<String, Double> lrPair : layer.conf().getLearningRateByParam().entrySet()) { final double rate = lrPair.getValue() * (0.5 + Nd4j.EPS_THRESHOLD); layer.conf().setLearningRateByParam(lrPair.getKey(), rate); } } } }
/** * Apply L1 and L2 regularization, if necessary. Note that L1/L2 may differ for different layers in the same block * * @param layer The layer to apply L1/L2 to * @param paramName Parameter name in the given layer * @param gradientView Gradient view array for the layer + param * @param paramsView Parameter view array for the layer + param */ public void postApply(Layer layer, String paramName, INDArray gradientView, INDArray paramsView) { NeuralNetConfiguration conf = layer.conf(); //TODO: do this for multiple contiguous params/layers (fewer, larger ops) double l2 = conf.getL2ByParam(paramName); if (conf.isUseRegularization() && l2 > 0) { //This can be an axpy op, saving an allocation... //gradientView += params * l2 i.e., dC/dw = dC0/dw + lambda/n * w where C0 is pre-l2 cost function //Equivalent to gradientView.addi(paramsView.mul(conf.getL2ByParam(paramName))); int length = gradientView.length(); Nd4j.getBlasWrapper().level1().axpy(length, l2, paramsView, gradientView); } if (conf.isUseRegularization() && conf.getL1ByParam(paramName) > 0) { gradientView.addi(Transforms.sign(paramsView, true).muli(conf.getL1ByParam(paramName))); } }
/** * JSON model configuration passed in * If you are entering a MultiLayerConfiguration JSON, * your file name MUST contain '_multi'. * Otherwise, it will be processed as a regular * NeuralNetConfiguration * * Takes in JSON file path * Checks file path for indication of MultiLayer * Reads JSON file to string * Creates neural net configuration from string config * */ @Override public <E> E value(String value) throws Exception { Boolean isMultiLayer = value.contains("_multi"); String json = FileUtils.readFileToString(new File(value)); if (isMultiLayer) { return (E) MultiLayerConfiguration.fromJson(json); } else { return (E) NeuralNetConfiguration.fromJson(json); } }
Layer origLayerConf = currLayerVertex.getLayerConf().getLayer(); Layer newLayerConf = new org.deeplearning4j.nn.conf.layers.misc.FrozenLayer(origLayerConf); newLayerConf.setLayerName(origLayerConf.getLayerName()); NeuralNetConfiguration newNNC = currLayerVertex.getLayerConf().clone(); currLayerVertex.setLayerConf(newNNC); currLayerVertex.getLayerConf().setLayer(newLayerConf); List<String> vars = currLayerVertex.getLayerConf().variables(true); currLayerVertex.getLayerConf().clearVariables(); for (String s : vars) { newNNC.variables(false).add(s); newNNC.getL1ByParam().put(s, 0.0); newNNC.getL2ByParam().put(s, 0.0); newNNC.getLearningRateByParam().put(s, 0.0);
if (currentLayer.numParams() > 0) { paramShape = ""; in = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNIn()); out = String.valueOf(((FeedForwardLayer) currentLayer.conf().getLayer()).getNOut()); Set<String> paraNames = currentLayer.conf().getLearningRateByParam().keySet(); for (String aP : paraNames) { String paramS = ArrayUtils.toString(currentLayer.paramTable().get(aP).shape());