@Override public DoubleVector pow(double x) { return this.mainVector.pow(x); }
@Override public DoubleVector pow(double x) { return vector.pow(x); }
@Override public DoubleVector pow(double x) { return vector.pow(x); }
@Override public double measureDistance(DoubleVector vec1, DoubleVector vec2) { double lengthSquaredv1 = vec1.pow(2).sum(); double lengthSquaredv2 = vec2.pow(2).sum(); double dotProduct = vec2.dot(vec1); double denominator = Math.sqrt(lengthSquaredv1) * Math.sqrt(lengthSquaredv2); // correct for floating-point rounding errors if (denominator < dotProduct) { denominator = dotProduct; } return 1.0 - dotProduct / denominator; }
@Override public CostGradientTuple updateGradient(DoubleVector weights, DoubleVector gradient, double learningRate, long iteration, double cost) { if (l2 != 0d) { DoubleVector powered = weights.pow(2d); DoubleVector regGrad = weights.multiply(l2); // assume bias is on the first dimension powered.set(0, 0); regGrad.set(0, 0); cost += l2 * powered.sum() / 2d; gradient = gradient.add(regGrad); } return new CostGradientTuple(cost, gradient); } }
@Override public double measureDistance(DoubleVector vec1, DoubleVector vec2) { if (vec1.isSparse() || vec2.isSparse()) { return FastMath.sqrt(vec2.subtract(vec1).pow(2).sum()); } else { // dense vectors usually doesn't do a defensive copy, so it is faster than // the implementation above. return measureDistance(vec1.toArray(), vec2.toArray()); } }
@Override public double measureDistance(DoubleVector vec1, DoubleVector vec2) { if (vec1.isSparse() || vec2.isSparse()) { return FastMath.sqrt(vec2.subtract(vec1).pow(2).sum()); } else { // dense vectors usually doesn't do a defensive copy, so it is faster than // the implementation above. return measureDistance(vec1.toArray(), vec2.toArray()); } }
@Override public CostGradientTuple evaluateCost(DoubleVector theta) { DoubleVector activation = SIGMOID.get().apply(x.multiplyVectorRow(theta)); DenseDoubleMatrix hypo = new DenseDoubleMatrix(Arrays.asList(activation)); double error = ERROR_FUNCTION.calculateLoss(y, hypo); DoubleMatrix loss = hypo.subtract(y); double j = error / m; DoubleVector gradient = xTransposed.multiplyVectorRow(loss.getRowVector(0)) .divide(m); if (lambda != 0d) { DoubleVector reg = theta.multiply(lambda / m); // don't regularize the bias reg.set(0, 0d); gradient = gradient.add(reg); j += lambda * theta.pow(2).sum() / m; } return new CostGradientTuple(j, gradient); } }
@Override public CostGradientTuple updateGradient(DoubleVector theta, DoubleVector gradient, double learningRate, long iteration, double cost) { if (movingAvg == null) { // initialize same types with zeros movingAvg = gradient.deepCopy().multiply(0); squaredGradient = gradient.deepCopy().multiply(0); } DoubleVector oneMinusBeta1Grad = gradient.multiply(1d - movingAvgDecay); movingAvg = movingAvg.multiply(movingAvgDecay).add(oneMinusBeta1Grad); DoubleVector oneMinusBeta2GradSquared = gradient.pow(2d).multiply( 1 - squaredDecay); squaredGradient = squaredGradient.multiply(squaredDecay).add( oneMinusBeta2GradSquared); double beta1t = FastMath.pow(movingAvgDecay, iteration); double beta2t = FastMath.pow(squaredDecay, iteration); double alphat = alpha * FastMath.sqrt(1 - beta2t) / (1 - beta1t); if (Double.isNaN(alphat) || alphat == 0.0) { alphat = EPS; } DoubleVector sqrtV = squaredGradient.sqrt().add(eps); gradient = movingAvg.multiply(alphat).divide(sqrtV); return new CostGradientTuple(cost, gradient); }
.pow(2).sum() + this.getHiddenPriorProbability().subtract(hiddenPriorProbability) .pow(2).sum(); if (verbose) { LOG.info("Iteration " + iteration + " | Model difference: "
/** * @return the normalized matrix (0 mean and stddev of 1) as well as the mean * and the stddev. */ public static Tuple3<DoubleMatrix, DoubleVector, DoubleVector> meanNormalizeColumns( DoubleMatrix x) { DenseDoubleMatrix toReturn = new DenseDoubleMatrix(x.getRowCount(), x.getColumnCount()); final int length = x.getColumnCount(); DoubleVector meanVector = new DenseDoubleVector(length); DoubleVector stddevVector = new DenseDoubleVector(length); for (int col = 0; col < length; col++) { DoubleVector column = x.getColumnVector(col); double mean = column.sum() / column.getLength(); meanVector.set(col, mean); double var = column.subtract(mean).pow(2).sum() / column.getLength(); stddevVector.set(col, Math.sqrt(var)); } for (int col = 0; col < length; col++) { DoubleVector column = x.getColumnVector(col) .subtract(meanVector.get(col)).divide(stddevVector.get(col)); toReturn.setColumn(col, column.toArray()); } return new Tuple3<>(toReturn, meanVector, stddevVector); }