@Override public double calcL2(boolean backpropParamsOnly) { if (!this.conf.isUseRegularization()) { return 0.0D; } else { double l2Sum = 0.0D; double l2Norm; if (this.conf.getL2ByParam("W") > 0.0D) { l2Norm = this.getParam("W").norm2Number().doubleValue(); l2Sum += 0.5D * this.conf.getL2ByParam("W") * l2Norm * l2Norm; } if (this.conf.getL2ByParam("uw") > 0.0D) { l2Norm = this.getParam("uw").norm2Number().doubleValue(); l2Sum += 0.5D * this.conf.getL2ByParam("uw") * l2Norm * l2Norm; } if (this.conf.getL2ByParam("b") > 0.0D) { l2Norm = this.getParam("b").norm2Number().doubleValue(); l2Sum += 0.5D * this.conf.getL2ByParam("b") * l2Norm * l2Norm; } return l2Sum; } } }
@Override public double calcL1(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l1Sum = 0.0; for (Map.Entry<String, INDArray> entry : paramTable().entrySet()) { double l1 = conf.getL1ByParam(entry.getKey()); if (l1 > 0) { double norm1 = getParam(entry.getKey()).norm1Number().doubleValue(); l1Sum += l1 * norm1; } } return l1Sum; }
@Override public double calcL1(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l1Sum = 0.0; for (Map.Entry<String, INDArray> entry : paramTable().entrySet()) { double l1 = conf.getL1ByParam(entry.getKey()); if (l1 > 0) { double norm1 = getParam(entry.getKey()).norm1Number().doubleValue(); l1Sum += l1 * norm1; } } return l1Sum; }
@Override public double calcL1(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l1Sum = 0.0; for (Map.Entry<String, INDArray> entry : paramTable().entrySet()) { double l1 = conf.getL1ByParam(entry.getKey()); if (l1 > 0) { double norm1 = getParam(entry.getKey()).norm1Number().doubleValue(); l1Sum += l1 * norm1; } } return l1Sum; }
@Override public double calcL2(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l2Sum = 0.0; for (Map.Entry<String, INDArray> entry : paramTable().entrySet()) { double l2 = conf.getL2ByParam(entry.getKey()); if (l2 > 0) { double norm2 = getParam(entry.getKey()).norm2Number().doubleValue(); l2Sum += 0.5 * l2 * norm2 * norm2; } } return l2Sum; }
@Override public double calcL1(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l1Sum = 0.0; for (Map.Entry<String, INDArray> entry : paramTable().entrySet()) { double l1 = conf.getL1ByParam(entry.getKey()); if (l1 > 0) { double norm1 = getParam(entry.getKey()).norm1Number().doubleValue(); l1Sum += l1 * norm1; } } return l1Sum; }
@Override public double calcL1(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l1Sum = 0.0; for (Map.Entry<String, INDArray> e : paramTable().entrySet()) { double l1 = conf().getL1ByParam(e.getKey()); if (l1 <= 0.0 || (backpropParamsOnly && isPretrainParam(e.getKey()))) { continue; } l1Sum += l1 * e.getValue().norm1Number().doubleValue(); } return l1Sum; }
@Override public double calcL2(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l2Sum = 0.0; for (Map.Entry<String, INDArray> entry : paramTable().entrySet()) { double l2 = conf.getL2ByParam(entry.getKey()); if (l2 > 0) { double norm2 = getParam(entry.getKey()).norm2Number().doubleValue(); l2Sum += 0.5 * l2 * norm2 * norm2; } } return l2Sum; }
@Override public double calcL2(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l2Sum = 0.0; for (Map.Entry<String, INDArray> entry : paramTable().entrySet()) { double l2 = conf.getL2ByParam(entry.getKey()); if (l2 > 0) { double norm2 = getParam(entry.getKey()).norm2Number().doubleValue(); l2Sum += 0.5 * l2 * norm2 * norm2; } } return l2Sum; }
@Override public double calcL2(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l2Sum = 0.0; for (Map.Entry<String, INDArray> entry : paramTable().entrySet()) { double l2 = conf.getL2ByParam(entry.getKey()); if (l2 > 0) { double norm2 = getParam(entry.getKey()).norm2Number().doubleValue(); l2Sum += 0.5 * l2 * norm2 * norm2; } } return l2Sum; }
@Override public double calcL2(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l2Sum = 0.0; for (Map.Entry<String, INDArray> e : paramTable().entrySet()) { double l2 = conf().getL2ByParam(e.getKey()); if (l2 <= 0.0 || (backpropParamsOnly && isPretrainParam(e.getKey()))) { continue; } double l2Norm = e.getValue().norm2Number().doubleValue(); l2Sum += 0.5 * l2 * l2Norm * l2Norm; } return l2Sum; }
@Override public double calcL1(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l1Sum = super.calcL1(true); if (conf.getL1ByParam(PretrainParamInitializer.VISIBLE_BIAS_KEY) > 0) { l1Sum += conf.getL1ByParam(PretrainParamInitializer.VISIBLE_BIAS_KEY) * getParam(PretrainParamInitializer.VISIBLE_BIAS_KEY).norm1Number().doubleValue(); } return l1Sum; }
@Override public double calcL2(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l2Sum = super.calcL2(true); if (backpropParamsOnly) return l2Sum; if (conf.getL2ByParam(PretrainParamInitializer.VISIBLE_BIAS_KEY) > 0) { double l2Norm = getParam(PretrainParamInitializer.VISIBLE_BIAS_KEY).norm2Number().doubleValue(); l2Sum += 0.5 * conf.getL2ByParam(PretrainParamInitializer.VISIBLE_BIAS_KEY) * l2Norm * l2Norm; } return l2Sum; }
@Override public double calcL1(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; double l1Sum = 0.0; if (conf.getL1ByParam(DefaultParamInitializer.WEIGHT_KEY) > 0.0) { l1Sum += conf.getL1ByParam(DefaultParamInitializer.WEIGHT_KEY) * getParam(DefaultParamInitializer.WEIGHT_KEY).norm1Number().doubleValue(); } if (conf.getL1ByParam(DefaultParamInitializer.BIAS_KEY) > 0.0) { l1Sum += conf.getL1ByParam(DefaultParamInitializer.BIAS_KEY) * getParam(DefaultParamInitializer.BIAS_KEY).norm1Number().doubleValue(); } return l1Sum; }
@Override public double calcL2(boolean backpropParamsOnly) { if (!conf.isUseRegularization()) return 0.0; //L2 norm: sqrt( sum_i x_i^2 ) -> want sum squared weights, so l2 norm squared double l2Sum = 0.0; if (conf.getL2ByParam(DefaultParamInitializer.WEIGHT_KEY) > 0.0) { double l2Norm = getParam(DefaultParamInitializer.WEIGHT_KEY).norm2Number().doubleValue(); l2Sum += 0.5 * conf.getL2ByParam(DefaultParamInitializer.WEIGHT_KEY) * l2Norm * l2Norm; } if (conf.getL2ByParam(DefaultParamInitializer.BIAS_KEY) > 0.0) { double l2Norm = getParam(DefaultParamInitializer.BIAS_KEY).norm2Number().doubleValue(); l2Sum += 0.5 * conf.getL2ByParam(DefaultParamInitializer.BIAS_KEY) * l2Norm * l2Norm; } return l2Sum; }
/** * Apply L1 and L2 regularization, if necessary. Note that L1/L2 may differ for different layers in the same block * * @param layer The layer to apply L1/L2 to * @param paramName Parameter name in the given layer * @param gradientView Gradient view array for the layer + param * @param paramsView Parameter view array for the layer + param */ public void postApply(Layer layer, String paramName, INDArray gradientView, INDArray paramsView) { NeuralNetConfiguration conf = layer.conf(); //TODO: do this for multiple contiguous params/layers (fewer, larger ops) double l2 = conf.getL2ByParam(paramName); if (conf.isUseRegularization() && l2 > 0) { //This can be an axpy op, saving an allocation... //gradientView += params * l2 i.e., dC/dw = dC0/dw + lambda/n * w where C0 is pre-l2 cost function //Equivalent to gradientView.addi(paramsView.mul(conf.getL2ByParam(paramName))); int length = gradientView.length(); Nd4j.getBlasWrapper().level1().axpy(length, l2, paramsView, gradientView); } if (conf.isUseRegularization() && conf.getL1ByParam(paramName) > 0) { gradientView.addi(Transforms.sign(paramsView, true).muli(conf.getL1ByParam(paramName))); } }
if (lv.getLayerConf().isUseRegularization() && dropout != 0.0) { throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = " + dropout + " for layer " + layerCount);
if (n.isUseRegularization() && dropout != 0.0) { throw new IllegalStateException("Must have dropout == 0.0 for gradient checks - got dropout = " + dropout + " for layer " + layerCount);
LayerValidation.generalValidation(l.getLayerName(), l, nnc.isUseRegularization(), useDropCon, dropOut, l2, l2Bias, l1, l1Bias, dist);