@Override public double calcL2(boolean backpropParamsOnly) { if (!this.conf.isUseRegularization()) { return 0.0D; } else { double l2Sum = 0.0D; double l2Norm; if (this.conf.getL2ByParam("W") > 0.0D) { l2Norm = this.getParam("W").norm2Number().doubleValue(); l2Sum += 0.5D * this.conf.getL2ByParam("W") * l2Norm * l2Norm; } if (this.conf.getL2ByParam("uw") > 0.0D) { l2Norm = this.getParam("uw").norm2Number().doubleValue(); l2Sum += 0.5D * this.conf.getL2ByParam("uw") * l2Norm * l2Norm; } if (this.conf.getL2ByParam("b") > 0.0D) { l2Norm = this.getParam("b").norm2Number().doubleValue(); l2Sum += 0.5D * this.conf.getL2ByParam("b") * l2Norm * l2Norm; } return l2Sum; } } }
@Override public double calcL2(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; //L2 norm: sqrt( sum_i x_i^2 ) -> want sum squared weights, so l2 norm squared double l2Sum = 0.0; if (conf.getL2ByParam(DefaultParamInitializer.WEIGHT_KEY) > 0.0) { double l2Norm = getParam(DefaultParamInitializer.WEIGHT_KEY).norm2Number().doubleValue(); l2Sum += 0.5 * conf.getL2ByParam(DefaultParamInitializer.WEIGHT_KEY) * l2Norm * l2Norm; } if (conf.getL2ByParam(DefaultParamInitializer.BIAS_KEY) > 0.0) { double l2Norm = getParam(DefaultParamInitializer.BIAS_KEY).norm2Number().doubleValue(); l2Sum += 0.5 * conf.getL2ByParam(DefaultParamInitializer.BIAS_KEY) * l2Norm * l2Norm; } return l2Sum; }
@Override public double calcL2(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l2Sum = 0.0; for (Map.Entry<String, INDArray> entry : paramTable().entrySet()) { double l2 = conf.getL2ByParam(entry.getKey()); if (l2 > 0) { double norm2 = getParam(entry.getKey()).norm2Number().doubleValue(); l2Sum += 0.5 * l2 * norm2 * norm2; } } return l2Sum; }
@Override public double calcL2(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l2Sum = super.calcL2(true); if (backpropParamsOnly) return l2Sum; if (conf.getL2ByParam(PretrainParamInitializer.VISIBLE_BIAS_KEY) > 0) { double l2Norm = getParam(PretrainParamInitializer.VISIBLE_BIAS_KEY).norm2Number().doubleValue(); l2Sum += 0.5 * conf.getL2ByParam(PretrainParamInitializer.VISIBLE_BIAS_KEY) * l2Norm * l2Norm; } return l2Sum; }
@Override public double calcL2(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l2Sum = 0.0; for (Map.Entry<String, INDArray> entry : paramTable().entrySet()) { double l2 = conf.getL2ByParam(entry.getKey()); if (l2 > 0) { double norm2 = getParam(entry.getKey()).norm2Number().doubleValue(); l2Sum += 0.5 * l2 * norm2 * norm2; } } return l2Sum; }
@Override public double calcL2(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l2Sum = 0.0; for (Map.Entry<String, INDArray> entry : paramTable().entrySet()) { double l2 = conf.getL2ByParam(entry.getKey()); if (l2 > 0) { double norm2 = getParam(entry.getKey()).norm2Number().doubleValue(); l2Sum += 0.5 * l2 * norm2 * norm2; } } return l2Sum; }
@Override public double calcL2(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l2Sum = 0.0; for (Map.Entry<String, INDArray> entry : paramTable().entrySet()) { double l2 = conf.getL2ByParam(entry.getKey()); if (l2 > 0) { double norm2 = getParam(entry.getKey()).norm2Number().doubleValue(); l2Sum += 0.5 * l2 * norm2 * norm2; } } return l2Sum; }
@Override public double calcL2(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l2Sum = 0.0; for (Map.Entry<String, INDArray> e : paramTable().entrySet()) { double l2 = conf().getL2ByParam(e.getKey()); if (l2 <= 0.0 || (backpropParamsOnly && isPretrainParam(e.getKey()))) { continue; } double l2Norm = e.getValue().norm2Number().doubleValue(); l2Sum += 0.5 * l2 * l2Norm * l2Norm; } return l2Sum; }
@Override public org.deeplearning4j.nn.api.Layer instantiate(NeuralNetConfiguration conf, Collection<IterationListener> iterationListeners, int layerIndex, INDArray layerParamsView, boolean initializeParams) { //Need to be able to instantiate a layer, from a config - for JSON -> net type situations org.deeplearning4j.nn.api.Layer underlying = layer.instantiate(getInnerConf(conf), iterationListeners, layerIndex, layerParamsView, initializeParams); NeuralNetConfiguration nncUnderlying = underlying.conf(); if (nncUnderlying.variables() != null) { List<String> vars = nncUnderlying.variables(true); nncUnderlying.clearVariables(); conf.clearVariables(); for (String s : vars) { conf.variables(false).add(s); conf.getL1ByParam().put(s, 0.0); conf.getL2ByParam().put(s, 0.0); conf.getLearningRateByParam().put(s, 0.0); nncUnderlying.variables(false).add(s); nncUnderlying.getL1ByParam().put(s, 0.0); nncUnderlying.getL2ByParam().put(s, 0.0); nncUnderlying.getLearningRateByParam().put(s, 0.0); } } return new org.deeplearning4j.nn.layers.FrozenLayer(underlying); }
origNNC.variables(false).add(s); origNNC.getL1ByParam().put(s, 0.0); origNNC.getL2ByParam().put(s, 0.0); origNNC.getLearningRateByParam().put(s, 0.0); layerNNC.getL2ByParam().put(s, 0.0); layerNNC.getLearningRateByParam().put(s, 0.0);
/** * Apply L1 and L2 regularization, if necessary. Note that L1/L2 may differ for different layers in the same block * * @param layer The layer to apply L1/L2 to * @param paramName Parameter name in the given layer * @param gradientView Gradient view array for the layer + param * @param paramsView Parameter view array for the layer + param */ public void postApply(Layer layer, String paramName, INDArray gradientView, INDArray paramsView) { NeuralNetConfiguration conf = layer.conf(); //TODO: do this for multiple contiguous params/layers (fewer, larger ops) double l2 = conf.getL2ByParam(paramName); if (conf.isUseRegularization() && l2 > 0) { //This can be an axpy op, saving an allocation... //gradientView += params * l2 i.e., dC/dw = dC0/dw + lambda/n * w where C0 is pre-l2 cost function //Equivalent to gradientView.addi(paramsView.mul(conf.getL2ByParam(paramName))); int length = gradientView.length(); Nd4j.getBlasWrapper().level1().axpy(length, l2, paramsView, gradientView); } if (conf.isUseRegularization() && conf.getL1ByParam(paramName) > 0) { gradientView.addi(Transforms.sign(paramsView, true).muli(conf.getL1ByParam(paramName))); } }
newNNC.variables(false).add(s); newNNC.getL1ByParam().put(s, 0.0); newNNC.getL2ByParam().put(s, 0.0); newNNC.getLearningRateByParam().put(s, 0.0);