/** * In place addition of a column vector * * @param columnVector the column vector to add * @return the result of the addition */ @Override public INDArray mulColumnVector(INDArray columnVector) { return dup().muliColumnVector(columnVector); }
/** * * @param to * @param mask */ public static void applyMask(INDArray to, INDArray mask) { //Two possibilities exist: it's *per example* masking, or it's *per output* masking //These cases have different mask shapes. Per example: column vector. Per output: same shape as score array if (mask.isColumnVectorOrScalar()) { to.muliColumnVector(mask); } else if (Arrays.equals(to.shape(), mask.shape())) { to.muli(mask); } else { throw new IllegalStateException("Invalid mask array: per-example masking should be a column vector, " + "per output masking arrays should be the same shape as the labels array. Mask shape: " + Arrays.toString(mask.shape()) + ", output shape: " + Arrays.toString(to.shape())); } } }
private INDArray scoreArray(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) { INDArray scoreArr; INDArray output = activationFn.getActivation(preOutput.dup(), true); INDArray yMinusyHat = Transforms.abs(labels.sub(output)); scoreArr = yMinusyHat.mul(yMinusyHat); scoreArr = scoreArr.mul(trainMask); if (mask != null) { scoreArr.muliColumnVector(mask); } return scoreArr; }
@Override public void exec() { INDArray softmaxed = Nd4j.getExecutioner().execAndReturn(new OldSoftMax(x)); INDArray mulled = softmaxed.muli(y); INDArray summed = mulled.sum(-1); softmaxed.muliColumnVector(summed); mulled.subi(softmaxed); }
@Override public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) { INDArray output = activationFn.getActivation(preOutput.dup(), true); INDArray yMinusyHat = labels.sub(output); INDArray dldyhat = yMinusyHat.mul(-2); INDArray gradients = activationFn.backprop(preOutput.dup(), dldyhat).getFirst(); gradients = gradients.mul(trainMask); //multiply with masks, always if (mask != null) { gradients.muliColumnVector(mask); } return gradients; }
private INDArray calculateBernoulli(INDArray minorityLabels, INDArray labelMask, double targetMinorityDist) { INDArray minorityClass = minorityLabels.dup().muli(labelMask); INDArray majorityClass = Transforms.not(minorityLabels).muli(labelMask); //all minorityLabel class, keep masks as is //presence of minoriy class and donotmask minority windows set to true return label as is if (majorityClass.sumNumber().intValue() == 0 || (minorityClass.sumNumber().intValue() > 0 && donotMaskMinorityWindows)) return labelMask; //all majority class and set to not mask all majority windows sample majority class by 1-targetMinorityDist if (minorityClass.sumNumber().intValue() == 0 && !maskAllMajorityWindows) return labelMask.muli(1 - targetMinorityDist); //Probabilities to be used for bernoulli sampling INDArray minoritymajorityRatio = minorityClass.sum(1).div(majorityClass.sum(1)); INDArray majorityBernoulliP = minoritymajorityRatio.muli(1 - targetMinorityDist).divi(targetMinorityDist); BooleanIndexing.replaceWhere(majorityBernoulliP, 1.0, Conditions.greaterThan(1.0)); //if minority ratio is already met round down to 1.0 return majorityClass.muliColumnVector(majorityBernoulliP).addi(minorityClass); }
+ "; per-output masking is not " + "supported for LossCosineProximity"); scoreArr.muliColumnVector(mask);
@Override public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) { if (labels.size(1) != preOutput.size(1)) { throw new IllegalArgumentException( "Labels array numColumns (size(1) = " + labels.size(1) + ") does not match output layer" + " number of outputs (nOut = " + preOutput.size(1) + ") "); } INDArray yhat = activationFn.getActivation(preOutput.dup(), true); INDArray yL2norm = labels.norm2(1); INDArray yhatL2norm = yhat.norm2(1); INDArray yhatL2normSq = yhatL2norm.mul(yhatL2norm); //Note: This is not really the L1 norm since I am not taking abs values INDArray yhatDotyL1norm = labels.mul(yhat).sum(1); INDArray dLda = labels.mulColumnVector(yhatL2normSq); dLda.subi(yhat.mulColumnVector(yhatDotyL1norm)); // transform vals to avoid nans before div yL2norm = Transforms.max(yL2norm, Nd4j.EPS_THRESHOLD, false); yhatL2norm = Transforms.max(yhatL2norm, Nd4j.EPS_THRESHOLD, false); yhatL2normSq = Transforms.max(yhatL2normSq, Nd4j.EPS_THRESHOLD, false); dLda.diviColumnVector(yL2norm); dLda.diviColumnVector(yhatL2norm.mul(yhatL2normSq)); dLda.muli(-1); //dL/dz INDArray gradients = activationFn.backprop(preOutput, dLda).getFirst(); //TODO loss functions with params if (mask != null) { gradients.muliColumnVector(mask); } return gradients; }
@Override public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) { double[] d = computeScoreNumDenom(labels, preOutput, activationFn, mask, false); double numerator = d[0]; double denominator = d[1]; if (numerator == 0.0 && denominator == 0.0) { //Zero score -> zero gradient return Nd4j.create(preOutput.shape()); } double secondTerm = numerator / (denominator * denominator); INDArray dLdOut; if (labels.size(1) == 1) { //Single binary output case dLdOut = labels.mul(1 + beta * beta).divi(denominator).subi(secondTerm); } else { //Softmax case: the getColumn(1) here is to account for the fact that we're using prob(class1) // only in the score function; column(1) is equivalent to output for the single output case dLdOut = Nd4j.create(labels.shape()); dLdOut.getColumn(1).assign(labels.getColumn(1).mul(1 + beta * beta).divi(denominator).subi(secondTerm)); } //Negate relative to description in paper, as we want to *minimize* 1.0-fMeasure, which is equivalent to // maximizing fMeasure dLdOut.negi(); INDArray dLdPreOut = activationFn.backprop(preOutput, dLdOut).getFirst(); if (mask != null) { dLdPreOut.muliColumnVector(mask); } return dLdPreOut; }
protected void applyMask(INDArray to) { to.muliColumnVector(maskArray); }
/** * In place addition of a column vector * * @param columnVector the column vector to add * @return the result of the addition */ @Override public INDArray mulColumnVector(INDArray columnVector) { return dup().muliColumnVector(columnVector); }
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon) { INDArray delta = epsilon.dup(); if (maskArray != null) { delta.muliColumnVector(maskArray); } Gradient ret = new DefaultGradient(); return new Pair<>(ret, delta); }
@Override protected void applyMask(INDArray to) { //For output layers: can be either per-example masking, or per- if (maskArray.isColumnVector()) { to.muliColumnVector(maskArray); } else if (Arrays.equals(to.shape(), maskArray.shape())) { to.muli(maskArray); } else { throw new IllegalStateException("Invalid mask array: per-example masking should be a column vector, " + "per output masking arrays should be the same shape as the output/labels arrays. Mask shape: " + Arrays.toString(maskArray.shape()) + ", output shape: " + Arrays.toString(to.shape()) + layerId()); } }
@Override public INDArray activate(boolean training) { INDArray z = input; INDArray ret = layerConf().getActivationFn().getActivation(z.dup(), training); if (maskArray != null) { ret.muliColumnVector(maskArray); } return ret; }
@Override public INDArray activate(boolean training) { INDArray rows = preOutput(training); //INDArray ret = Nd4j.getExecutioner().execAndReturn(Nd4j.getOpFactory().createTransform(conf.getLayer().getActivationFunction(), rows)); INDArray ret = layerConf().getActivationFn().getActivation(rows, training); if (maskArray != null) { ret.muliColumnVector(maskArray); } return ret; }
@Override public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) { INDArray output = activationFn.getActivation(preOutput.dup(), true); INDArray dlDx = computeDlDx(labels, output); //Everything below remains the same output = activationFn.backprop(preOutput.dup(), dlDx).getFirst(); //multiply with masks, always if(mask != null) { output.muliColumnVector(mask); } return output; }
@Override public INDArray computeGradient(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) { INDArray output = activationFn.getActivation(preOutput.dup(), true); INDArray dlDx = computeDlDx(labels, output); //Everything below remains the same output = activationFn.backprop(preOutput.dup(), dlDx).getFirst(); //multiply with masks, always if(mask != null) { output.muliColumnVector(mask); } return output; }
@Override public Pair<Gradient, INDArray> backpropGradient(INDArray epsilon) { INDArray delta = layerConf().getActivationFn().backprop(input.dup(), epsilon).getFirst(); //TODO handle activation function params if (maskArray != null) { delta.muliColumnVector(maskArray); } Gradient ret = new DefaultGradient(); return new Pair<>(ret, delta); }
private INDArray calculateBernoulli(INDArray minorityLabels, INDArray labelMask, double targetMinorityDist) { INDArray minorityClass = minorityLabels.dup().muli(labelMask); INDArray majorityClass = Transforms.not(minorityLabels).muli(labelMask); //all minorityLabel class, keep masks as is //presence of minoriy class and donotmask minority windows set to true return label as is if (majorityClass.sumNumber().intValue() == 0 || (minorityClass.sumNumber().intValue() > 0 && donotMaskMinorityWindows)) return labelMask; //all majority class and set to not mask all majority windows sample majority class by 1-targetMinorityDist if (minorityClass.sumNumber().intValue() == 0 && !maskAllMajorityWindows) return labelMask.muli(1-targetMinorityDist); //Probabilities to be used for bernoulli sampling INDArray minoritymajorityRatio = minorityClass.sum(1).div(majorityClass.sum(1)); INDArray majorityBernoulliP = minoritymajorityRatio.muli(1 - targetMinorityDist).divi(targetMinorityDist); BooleanIndexing.replaceWhere(majorityBernoulliP,1.0, Conditions.greaterThan(1.0)); //if minority ratio is already met round down to 1.0 return majorityClass.muliColumnVector(majorityBernoulliP).addi(minorityClass); }
@Override public INDArray ndOp(INDArray features, INDArray adjacencyMatrix) { double sigma = 16; INDArray[] sumsOfSquareDiffs = new INDArray[adjacencyMatrix.rows()]; for (int node = 0; node < adjacencyMatrix.rows(); node++) { INDArray column = adjacencyMatrix.getColumn(node); INDArray repeat = features.getRow(node).repeat(0, features.rows()).muliColumnVector(column); INDArray sub = repeat.sub(features.mulColumnVector(column)); sumsOfSquareDiffs[node] = Transforms.pow(sub, 2).sum(0); } INDArray sumOfSquareDiffs = Nd4j.vstack(sumsOfSquareDiffs).muli(-(1d / Math.pow(sigma, 2))); return Transforms.exp(sumOfSquareDiffs); }