public static INDArray norm2(INDArray compute, int dimension) { return compute.norm2(dimension); }
public static INDArray norm2(INDArray compute) { return compute.norm2(Integer.MAX_VALUE); }
@Override public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) { if (labels.size(1) != preOutput.size(1)) { throw new IllegalArgumentException( "Labels array numColumns (size(1) = " + labels.size(1) + ") does not match output layer" + " number of outputs (nOut = " + preOutput.size(1) + ") "); } INDArray yhat = activationFn.getActivation(preOutput.dup(), true); INDArray yL2norm = labels.norm2(1); INDArray yhatL2norm = yhat.norm2(1); INDArray yhatL2normSq = yhatL2norm.mul(yhatL2norm); //Note: This is not really the L1 norm since I am not taking abs values INDArray yhatDotyL1norm = labels.mul(yhat).sum(1); INDArray dLda = labels.mulColumnVector(yhatL2normSq); dLda.subi(yhat.mulColumnVector(yhatDotyL1norm)); // transform vals to avoid nans before div yL2norm = Transforms.max(yL2norm, Nd4j.EPS_THRESHOLD, false); yhatL2norm = Transforms.max(yhatL2norm, Nd4j.EPS_THRESHOLD, false); yhatL2normSq = Transforms.max(yhatL2normSq, Nd4j.EPS_THRESHOLD, false); dLda.diviColumnVector(yL2norm); dLda.diviColumnVector(yhatL2norm.mul(yhatL2normSq)); dLda.muli(-1); //dL/dz INDArray gradients = activationFn.backprop(preOutput, dLda).getFirst(); //TODO loss functions with params if (mask != null) { gradients.muliColumnVector(mask); } return gradients; }
public static INDArray norm2(INDArray compute, int dimension) { return compute.norm2(dimension); }
public static INDArray norm2(INDArray compute) { return compute.norm2(Integer.MAX_VALUE); }
@Override public boolean terminate(double cost, double oldCost, Object[] otherParams) { INDArray line = (INDArray) otherParams[0]; double norm2 = line.norm2(Integer.MAX_VALUE).getDouble(0); return norm2 < gradientTolerance; } }
INDArray norm2DifferenceSquared = input.sub(centersForExamples).norm2(1); norm2DifferenceSquared.muli(norm2DifferenceSquared);
@Override public INDArray doForward(boolean training) { if (!canDoForward()) throw new IllegalStateException("Cannot do forward pass: inputs not set (L2NormalizeVertex " + vertexName + " idx " + vertexIndex + ")"); // L2 norm along all dimensions except 0, unless user-specified // x / |x|2 INDArray x = inputs[0]; int[] dimensions = getDimensions(x); INDArray xNorm2 = x.norm2(dimensions); Transforms.max(xNorm2, eps, false); if (x.rank() == 2) { return x.divColumnVector(xNorm2); } else { INDArray out = Nd4j.createUninitialized(x.shape(), x.ordering()); return Nd4j.getExecutioner().execAndReturn(new BroadcastDivOp(x, xNorm2, out, 0)); } }
@Override public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) { if (labels.size(1) != preOutput.size(1)) { throw new IllegalArgumentException("Labels array numColumns (size(1) = " + labels.size(1) + ") does not match output layer" + " number of outputs (nOut = " + preOutput.size(1) + ") "); } INDArray yhat = activationFn.getActivation(preOutput.dup(), true); INDArray yL2norm = labels.norm2(1); INDArray yhatL2norm = yhat.norm2(1); INDArray yhatL2normSq = yhatL2norm.mul(yhatL2norm); //Note: This is not really the L1 norm since I am not taking abs values INDArray yhatDotyL1norm = labels.mul(yhat).sum(1); INDArray dLda = labels.mulColumnVector(yhatL2normSq); dLda.subi(yhat.mulColumnVector(yhatDotyL1norm)); // transform vals to avoid nans before div yL2norm = Transforms.max(yL2norm, Nd4j.EPS_THRESHOLD, false); yhatL2norm = Transforms.max(yhatL2norm, Nd4j.EPS_THRESHOLD, false); yhatL2normSq = Transforms.max(yhatL2normSq, Nd4j.EPS_THRESHOLD, false); dLda.diviColumnVector(yL2norm); dLda.diviColumnVector(yhatL2norm.mul(yhatL2normSq)); dLda.muli(-1); //dL/dz INDArray gradients = activationFn.backprop(preOutput, dLda).getFirst(); //TODO loss functions with params if (mask != null) { gradients.muliColumnVector(mask); } return gradients; }
int[] dimensions = getDimensions(x); INDArray norm = x.norm2(dimensions); INDArray norm3 = Transforms.pow(norm, 3.0, true); Transforms.max(norm, eps, false); // in case of div/0