@Override public void exec(){ Nd4j.getExecutioner().exec(new Sigmoid(x,z)); z.muli(Transforms.exp(x.neg(),false)).muli(y); }
BooleanIndexing.replaceWhere(bitMaskRowCol, 1.0, Conditions.greaterThan(0.0)); INDArray dLda = labels.neg().muli(bitMaskRowCol);
@Override public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) { if (labels.size(1) != preOutput.size(1)) { throw new IllegalArgumentException( "Labels array numColumns (size(1) = " + labels.size(1) + ") does not match output layer" + " number of outputs (nOut = " + preOutput.size(1) + ") "); } INDArray scoreArr = scoreArray(labels, preOutput, activationFn, mask); INDArray bitMaskRowCol = scoreArr.dup(); /* bit mask is 0 if 1-sigma(y*yhat) is neg, bit mask is 1 if 1-sigma(y*yhat) is +ve */ BooleanIndexing.replaceWhere(bitMaskRowCol, 0.0, Conditions.lessThan(0.0)); BooleanIndexing.replaceWhere(bitMaskRowCol, 1.0, Conditions.greaterThan(0.0)); INDArray dLda = scoreArr.muli(2).muli(labels.neg()); dLda.muli(bitMaskRowCol); if (mask != null && LossUtil.isPerOutputMasking(dLda, mask)) { //For *most* activation functions: we don't actually need to mask dL/da in addition to masking dL/dz later //but: some, like softmax, require both (due to dL/dz_i being a function of dL/da_j, for i != j) //We could add a special case for softmax (activationFn instanceof ActivationSoftmax) but that would be // error prone - though buy us a tiny bit of performance LossUtil.applyMask(dLda, mask); } INDArray gradients = activationFn.backprop(preOutput, dLda).getFirst(); //TODO activation functions with params if (mask != null) { LossUtil.applyMask(gradients, mask); } return gradients; }
public static INDArray subi(INDArray left, INDArray right) { return applyInlineOperation( left, right, a -> a.neg(), INDArray::subi, INDArray::addi, (l, r, result, dims) -> Broadcast.sub(l, r, result, Ints.toArray(dims)), (l, r, result, dims) -> Broadcast.add(l, r, result, Ints.toArray(dims))); }
/** * Computes a gaussian kernel * given a vector of squared distance distances * * @param d the data * @param beta * @return */ public Pair<Double, INDArray> hBeta(INDArray d, double beta) { INDArray P = exp(d.neg().muli(beta)); double sumP = P.sumNumber().doubleValue(); double logSumP = FastMath.log(sumP); Double H = logSumP + ((beta * (d.mul(P).sumNumber().doubleValue())) / sumP); P.divi(sumP); return new Pair<>(H, P); }
/** * Computes a gaussian kernel * given a vector of squared distance distances * * @param d the data * @param beta * @return */ public Pair<Double, INDArray> hBeta(INDArray d, double beta) { INDArray P = exp(d.neg().muli(beta)); double sumP = P.sumNumber().doubleValue(); double logSumP = FastMath.log(sumP); Double H = logSumP + ((beta * (d.mul(P).sumNumber().doubleValue())) / sumP); P.divi(sumP); return new Pair<>(H, P); }
INDArray[] out2 = new INDArray[2]; out2[0] = epsilon; out2[1] = epsilon.neg(); return new Pair<>(null, out2); case Product:
@Override public Pair<Gradient, INDArray[]> doBackward(boolean tbptt) { if (!canDoBackward()) throw new IllegalStateException("Cannot do backward pass: error not set"); INDArray a = inputs[0]; INDArray b = inputs[1]; INDArray out = doForward(tbptt); Transforms.max(out, eps, false); // in case of 0 INDArray dLdlambda = epsilon; //dL/dlambda aka 'epsilon' - from layer above INDArray sNegHalf = out.rdiv(1.0); //s^(-1/2) = 1.0 / s^(1/2) = 1.0 / out INDArray diff = a.sub(b); INDArray first = dLdlambda.mul(sNegHalf); //Column vector for all cases INDArray dLda; INDArray dLdb; if (a.rank() == 2) { //2d case (MLPs etc) dLda = diff.muliColumnVector(first); dLdb = dLda.neg(); } else { //RNN and CNN case - Broadcast along dimension 0 dLda = Nd4j.getExecutioner().execAndReturn(new BroadcastMulOp(diff, first, diff, 0)); dLdb = dLda.neg(); } return new Pair<>(null, new INDArray[] {dLda, dLdb}); }
BooleanIndexing.replaceWhere(bitMaskRowCol, 1.0, Conditions.greaterThan(0.0)); INDArray dLda = labels.neg().muli(bitMaskRowCol);
@Override public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) { if (labels.size(1) != preOutput.size(1)) { throw new IllegalArgumentException("Labels array numColumns (size(1) = " + labels.size(1) + ") does not match output layer" + " number of outputs (nOut = " + preOutput.size(1) + ") "); } INDArray scoreArr = scoreArray(labels, preOutput, activationFn, mask); INDArray bitMaskRowCol = scoreArr.dup(); /* bit mask is 0 if 1-sigma(y*yhat) is neg, bit mask is 1 if 1-sigma(y*yhat) is +ve */ BooleanIndexing.replaceWhere(bitMaskRowCol, 0.0, Conditions.lessThan(0.0)); BooleanIndexing.replaceWhere(bitMaskRowCol, 1.0, Conditions.greaterThan(0.0)); INDArray dLda = scoreArr.muli(2).muli(labels.neg()); dLda.muli(bitMaskRowCol); if(mask != null && LossUtil.isPerOutputMasking(dLda, mask)){ //For *most* activation functions: we don't actually need to mask dL/da in addition to masking dL/dz later //but: some, like softmax, require both (due to dL/dz_i being a function of dL/da_j, for i != j) //We could add a special case for softmax (activationFn instanceof ActivationSoftmax) but that would be // error prone - though buy us a tiny bit of performance LossUtil.applyMask(dLda, mask); } INDArray gradients = activationFn.backprop(preOutput, dLda).getFirst(); //TODO activation functions with params if (mask != null) { LossUtil.applyMask(gradients, mask); } return gradients; }