/** * Reverse division * * @param other the matrix to divide from * @return */ @Override public INDArray rdiv(INDArray other) { return dup().rdivi(other); }
/** * Reverse division * * @param other the matrix to subtract from * @param result the result ndarray * @return */ @Override public INDArray rdiv(INDArray other, INDArray result) { return dup().rdivi(other, result); }
@Override public IComplexNDArray rdiv(IComplexNumber n, IComplexNDArray result) { return dup().rdivi(n, result); }
@Override public IComplexNDArray rdiv(IComplexNumber n) { return dup().rdivi(n); }
/** * Gets feature specific learning rates * Adagrad keeps a history of gradients being passed in. * Note that each gradient passed in becomes adapted over time, hence * the opName adagrad * * @param gradient the gradient to get learning rates for * @param iteration * @return the feature specific learning rates */ public INDArray getGradient(INDArray gradient, int iteration) { if (historicalGradient == null) throw new IllegalStateException("Updater has not been initialized with view state"); historicalGradient.addi(gradient.mul(gradient)); INDArray sqrtHistory = sqrt(historicalGradient.dup(gradientReshapeOrder), false).addi(epsilon); // lr * gradient / (sqrt(sumSquaredGradients) + epsilon) INDArray ret = gradient.muli(sqrtHistory.rdivi(learningRate)); numIterations++; return ret; }
/** * Gets feature specific learning rates * Adagrad keeps a history of gradients being passed in. * Note that each gradient passed in becomes adapted over time, hence the opName adagrad * * @param gradient the gradient to get learning rates for * @param iteration */ @Override public void applyUpdater(INDArray gradient, int iteration, int epoch) { if (historicalGradient == null) throw new IllegalStateException("Updater has not been initialized with view state"); double learningRate = config.getLearningRate(iteration, epoch); double epsilon = config.getEpsilon(); historicalGradient.addi(gradient.mul(gradient)); INDArray sqrtHistory = sqrt(historicalGradient.dup(gradientReshapeOrder), false).addi(epsilon); // lr * gradient / (sqrt(sumSquaredGradients) + epsilon) gradient.muli(sqrtHistory.rdivi(learningRate)); } }
public INDArray getGradient(INDArray gradient, int slice, int[] shape) { boolean historicalInitialized = false; INDArray sqrtHistory; if (this.historicalGradient == null) { this.historicalGradient = Nd4j.zeros(shape).add(epsilon); historicalInitialized = true; } else if (!this.historicalGradient.isVector() && this.historicalGradient.slice(slice).length() != gradient.length()) throw new IllegalArgumentException("Illegal gradient"); if (historicalGradient.isVector()) sqrtHistory = sqrt(historicalGradient); else sqrtHistory = !historicalInitialized ? sqrt(historicalGradient.slice(slice)) : historicalGradient; INDArray learningRates; try { learningRates = sqrtHistory.rdivi(learningRate); } catch (ArithmeticException ae) { learningRates = sqrtHistory.rdivi(learningRate + epsilon); } if (gradient.length() != learningRates.length()) gradient.muli(learningRates.slice(slice)); else gradient.muli(learningRates); this.historicalGradient.slice(slice).addi(gradient.mul(gradient)); numIterations++; //ensure no zeros return gradient; }
private INDArray scoreArray(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) { if (labels.size(1) != preOutput.size(1)) { throw new IllegalArgumentException( "Labels array numColumns (size(1) = " + labels.size(1) + ") does not match output layer" + " number of outputs (nOut = " + preOutput.size(1) + ") "); } INDArray output = activationFn.getActivation(preOutput.dup(), true); // Clip output and labels to be between Nd4j.EPS_THREsHOLD and 1, i.e. a valid non-zero probability output = Transforms.min(Transforms.max(output, Nd4j.EPS_THRESHOLD, false), 1, false); labels = Transforms.min(Transforms.max(labels, Nd4j.EPS_THRESHOLD, true), 1, false); INDArray logRatio = Transforms.log(output.rdivi(labels), false); INDArray scoreArr = logRatio.muli(labels); if (mask != null) { LossUtil.applyMask(scoreArr, mask); } return scoreArr; }
gradient.rdivi(m).muli(alphat);
INDArray dLda = output.rdivi(labels).negi();
/** * in place (element wise) division of two matrices * * @param other the second ndarray to divide * @param result the result ndarray * @return the result of the divide */ @Override public INDArray divi(INDArray other, INDArray result) { if (other.isScalar()) { return divi(other.getDouble(0), result); } if (isScalar()) { return other.rdivi(getDouble(0), result); } if(!Shape.shapeEquals(this.shape(),other.shape())) { int[] broadcastDimensions = Shape.getBroadcastDimensions(this.shape(),other.shape()); Nd4j.getExecutioner().exec(new BroadcastDivOp(this,other,result,broadcastDimensions),broadcastDimensions); return result; } LinAlgExceptions.assertSameShape(other, result); Nd4j.getExecutioner().exec(new OldDivOp(this, other, result, length())); if (Nd4j.ENFORCE_NUMERICAL_STABILITY) Nd4j.clearNans(result); return result; }
@Override public DoubleTensor reciprocalInPlace() { tensor.rdivi(1.0); return this; }
/** * Reverse division * * @param other the matrix to divide from * @return */ @Override public INDArray rdiv(INDArray other) { return dup().rdivi(other); }
/** * Reverse division * * @param other the matrix to subtract from * @param result the result ndarray * @return */ @Override public INDArray rdiv(INDArray other, INDArray result) { return dup().rdivi(other, result); }
@Override public IComplexNDArray rdiv(IComplexNumber n) { return dup().rdivi(n); }
@Override public IComplexNDArray rdiv(IComplexNumber n, IComplexNDArray result) { return dup().rdivi(n, result); }
@Override public INDArray generateAtMean(INDArray preOutDistributionParams) { //Input: gamma = log(lambda) -> lambda = exp(gamma) //Mean for exponential distribution: 1/lambda INDArray gamma = activationFn.getActivation(preOutDistributionParams.dup(), false); INDArray lambda = Transforms.exp(gamma, true); return lambda.rdivi(1.0); //mean = 1.0 / lambda }
@Override public INDArray getVarCache() { INDArray ret; if(Nd4j.dataType() == DataBuffer.Type.HALF){ INDArray vc = varCache.convertToHalfs(); ret = vc.mul(vc).rdivi(1.0).subi(eps); } else { ret = varCache.mul(varCache).rdivi(1.0).subi(eps); } if(Nd4j.dataType() == DataBuffer.Type.HALF){ //Buffer is FP32 return ret.convertToHalfs(); } return ret; }
/** * Gets feature specific learning rates * Adagrad keeps a history of gradients being passed in. * Note that each gradient passed in becomes adapted over time, hence the name adagrad * * @param gradient the gradient to get learning rates for * @param iteration */ @Override public void applyUpdater(INDArray gradient, int iteration) { if (historicalGradient == null) throw new IllegalStateException("Updater has not been initialized with view state"); double learningRate = config.getLearningRate(); double epsilon = config.getEpsilon(); historicalGradient.addi(gradient.mul(gradient)); INDArray sqrtHistory = sqrt(historicalGradient.dup(gradientReshapeOrder), false).addi(epsilon); // lr * gradient / (sqrt(sumSquaredGradients) + epsilon) gradient.muli(sqrtHistory.rdivi(learningRate)); } }
private INDArray scoreArray(INDArray labels, INDArray preOutput, IActivation activationFn, INDArray mask) { if (labels.size(1) != preOutput.size(1)) { throw new IllegalArgumentException("Labels array numColumns (size(1) = " + labels.size(1) + ") does not match output layer" + " number of outputs (nOut = " + preOutput.size(1) + ") "); } INDArray output = activationFn.getActivation(preOutput.dup(), true); // Clip output and labels to be between Nd4j.EPS_THREsHOLD and 1, i.e. a valid non-zero probability output = Transforms.min(Transforms.max(output, Nd4j.EPS_THRESHOLD, false), 1, false); labels = Transforms.min(Transforms.max(labels, Nd4j.EPS_THRESHOLD, true), 1, false); INDArray logRatio = Transforms.log(output.rdivi(labels), false); INDArray scoreArr = logRatio.muli(labels); if (mask != null) { LossUtil.applyMask(scoreArr, mask); } return scoreArr; }