/** * Signum function of this ndarray * * @param toSign * @return */ public static INDArray sign(INDArray toSign) { return sign(toSign, true); }
/** * Signum function of this ndarray * * @param toSign * @return */ public static INDArray sign(INDArray toSign) { return sign(toSign, Nd4j.copyOnOps); }
public static INDArray castToInteger(INDArray tensor, boolean duplicate) { INDArray tensorToDropFractionOn = duplicate ? tensor.dup() : tensor; INDArray sign = Transforms.sign(tensorToDropFractionOn); Transforms.floor(Transforms.abs(tensorToDropFractionOn, false), false).muli(sign); return tensorToDropFractionOn; } }
@Override public void update(INDArray gradient, String paramType) { INDArray yGrads = gradient; gains = gains.add(.2).muli(sign(yGrads)).neqi(sign(yIncs)) .addi(gains.mul(0.8).muli(sign(yGrads)).neqi(sign(yIncs))); BooleanIndexing.applyWhere(gains, Conditions.lessThan(minGain), new Value(minGain)); INDArray gradChange = gains.mul(yGrads); if (useAdaGrad) { if (adaGrad == null) { adaGrad = new AdaGrad(gradient.shape(), learningRate); adaGrad.setStateViewArray(Nd4j.zeros(gradient.shape()).reshape(1, gradChange.length()), gradChange.shape(), gradient.ordering(), true); } gradChange = adaGrad.getGradient(gradChange, 0); } else { gradChange.muli(learningRate); } yIncs.muli(momentum).subi(gradChange); Y.addi(yIncs); }
@Override public void update(INDArray gradient, String paramType) { MemoryWorkspace workspace = workspaceMode == WorkspaceMode.NONE ? new DummyWorkspace() : Nd4j.getWorkspaceManager().getWorkspaceForCurrentThread( workspaceConfigurationExternal, workspaceExternal); try (MemoryWorkspace ws = workspace.notifyScopeEntered()) { INDArray yGrads = gradient; gains = gains.add(.2).muli(sign(yGrads)).neqi(sign(yIncs)) .addi(gains.mul(0.8).muli(sign(yGrads)).neqi(sign(yIncs))); BooleanIndexing.applyWhere(gains, Conditions.lessThan(minGain), new Value(minGain)); INDArray gradChange = gains.mul(yGrads); if (useAdaGrad) { if (adaGrad == null) { // FIXME: int cast adaGrad = new AdaGrad(ArrayUtil.toInts(gradient.shape()), learningRate); adaGrad.setStateViewArray(Nd4j.zeros(gradient.shape()).reshape(1, gradChange.length()), gradChange.shape(), gradient.ordering(), true); } gradChange = adaGrad.getGradient(gradChange, 0); } else { gradChange.muli(learningRate); } yIncs.muli(momentum).subi(gradChange); Y.addi(yIncs); } }
/** * Apply L1 and L2 regularization, if necessary. Note that L1/L2 may differ for different layers in the same block * * @param layer The layer to apply L1/L2 to * @param paramName Parameter name in the given layer * @param gradientView Gradient view array for the layer + param * @param paramsView Parameter view array for the layer + param */ public void postApply(Layer layer, String paramName, INDArray gradientView, INDArray paramsView) { NeuralNetConfiguration conf = layer.conf(); //TODO: do this for multiple contiguous params/layers (fewer, larger ops) double l2 = conf.getL2ByParam(paramName); if (conf.isUseRegularization() && l2 > 0) { //This can be an axpy op, saving an allocation... //gradientView += params * l2 i.e., dC/dw = dC0/dw + lambda/n * w where C0 is pre-l2 cost function //Equivalent to gradientView.addi(paramsView.mul(conf.getL2ByParam(paramName))); int length = gradientView.length(); Nd4j.getBlasWrapper().level1().axpy(length, l2, paramsView, gradientView); } if (conf.isUseRegularization() && conf.getL1ByParam(paramName) > 0) { gradientView.addi(Transforms.sign(paramsView, true).muli(conf.getL1ByParam(paramName))); } }