/** * Initialize the node with a single vector, mainly used for initializing * the bottom. */ ClusterNode(DoubleVector mean) { this.mean = mean.deepCopy(); this.splitDistance = 0d; }
private void addMultInto(DoubleVector a, DoubleVector b, DoubleVector c, double d) { for (int i = 0; i < a.getDimension(); i++) { a.set(i, b.get(i) + c.get(i) * d); } }
@Override public double calculateLoss(DoubleVector y, DoubleVector hypothesis) { double sum = 0d; for (int col = 0; col < y.getDimension(); col++) { double diff = y.get(col) - hypothesis.get(col); sum += (diff * diff); } return sum; }
/** * @return the normalized matrix (0 mean and stddev of 1) as well as the mean * and the stddev. */ public static Tuple3<DoubleMatrix, DoubleVector, DoubleVector> meanNormalizeColumns( DoubleMatrix x) { DenseDoubleMatrix toReturn = new DenseDoubleMatrix(x.getRowCount(), x.getColumnCount()); final int length = x.getColumnCount(); DoubleVector meanVector = new DenseDoubleVector(length); DoubleVector stddevVector = new DenseDoubleVector(length); for (int col = 0; col < length; col++) { DoubleVector column = x.getColumnVector(col); double mean = column.sum() / column.getLength(); meanVector.set(col, mean); double var = column.subtract(mean).pow(2).sum() / column.getLength(); stddevVector.set(col, Math.sqrt(var)); } for (int col = 0; col < length; col++) { DoubleVector column = x.getColumnVector(col) .subtract(meanVector.get(col)).divide(stddevVector.get(col)); toReturn.setColumn(col, column.toArray()); } return new Tuple3<>(toReturn, meanVector, stddevVector); }
@Override public DoubleVector multiply(DoubleVector v) { DoubleVector newv = new DenseDoubleVector(this.getLength()); if (v.isSparse()) { Iterator<DoubleVectorElement> iterateNonZero = v.iterateNonZero(); while (iterateNonZero.hasNext()) { DoubleVectorElement next = iterateNonZero.next(); newv.set(next.getIndex(), this.get(next.getIndex()) * next.getValue()); } } else { for (int i = 0; i < v.getLength(); i++) { newv.set(i, this.get(i) * v.get(i)); } } return newv; }
DoubleVector newWeights = theta.subtract(gradient.multiply(learningRate)); double shrinkageVal = l1 * learningRate; if (newWeights.isSparse()) { DoubleVector deepCopy = newWeights.deepCopy(); Iterator<DoubleVectorElement> iterateNonZero = newWeights .iterateNonZero(); while (iterateNonZero.hasNext()) { DoubleVectorElement next = iterateNonZero.next(); for (int i = 1; i < newWeights.getDimension(); i++) { addedCost += updateWeight(newWeights, newWeights, shrinkageVal, i, newWeights.get(i));
@Override public DoubleVector calculateGradient(DoubleVector feature, DoubleVector y, DoubleVector hypothesis) { double error = y.subtract(hypothesis).sum(); if (error != 0d) { DoubleVector result = feature.deepCopy(); Iterator<DoubleVectorElement> iterateNonZero = feature.iterateNonZero(); while (iterateNonZero.hasNext()) { DoubleVectorElement next = iterateNonZero.next(); result.set(next.getIndex(), MathUtils.guardedLogarithm(next.getValue() + 1d) * error * -1d); } return result; } return new SequentialSparseDoubleVector(feature.getDimension()); } }
@Override public CostGradientTuple updateGradient(DoubleVector weights, DoubleVector gradient, double learningRate, long iteration, double cost) { if (l2 != 0d) { DoubleVector powered = weights.pow(2d); DoubleVector regGrad = weights.multiply(l2); // assume bias is on the first dimension powered.set(0, 0); regGrad.set(0, 0); cost += l2 * powered.sum() / 2d; gradient = gradient.add(regGrad); } return new CostGradientTuple(cost, gradient); } }
hiddenPriorProbability = new DenseDoubleVector(numHiddenStates); for (int i = 0; i < numHiddenStates; i++) { hiddenPriorProbability.set(i, random.nextDouble()); .deepCopy(); DoubleVector hiddenPriorProbability = this.hiddenPriorProbability .deepCopy(); hiddenPriorProbability = alpha.getRowVector(0).multiply( beta.getRowVector(0)); final double modelLikelihood = estimateLikelihood(alpha); for (int t = 0; t < features.length - 1; t++) { Iterator<DoubleVectorElement> iterateNonZero = features[t + 1] .iterateNonZero(); while (iterateNonZero.hasNext()) { temp += alpha.get(t, i) for (int t = 0; t < features.length; t++) { Iterator<DoubleVectorElement> iterateNonZero = features[t] .iterateNonZero(); while (iterateNonZero.hasNext()) { DoubleVectorElement next = iterateNonZero.next(); + this.emissionProbabilityMatrix.subtract(emissionProbabilityMatrix) .pow(2).sum() + this.getHiddenPriorProbability().subtract(hiddenPriorProbability) .pow(2).sum(); if (verbose) {
@Override public DoubleVector gradient(DoubleVector vector) { DoubleVector newInstance = newInstance(vector); if (vector.isSparse()) { Iterator<DoubleVectorElement> iterateNonZero = vector.iterateNonZero(); while (iterateNonZero.hasNext()) { DoubleVectorElement next = iterateNonZero.next(); newInstance.set(next.getIndex(), gradient(next.getValue())); } } else { for (int i = 0; i < vector.getDimension(); i++) { newInstance.set(i, gradient(vector.get(i))); } } return newInstance; }
@Override public DoubleVector predict(DoubleVector features) { // clamp the features to the visible units, calculate the joint // probability for each hidden state and put it into the vector DoubleVector probabilities = emissionProbabilityMatrix .multiplyVectorRow(features); double max = probabilities.max(); for (int state = 0; state < probabilities.getDimension(); state++) { probabilities.set(state, FastMath.exp(probabilities.get(state) - max) * hiddenPriorProbability.get(state)); } // normalize again return probabilities.divide(probabilities.sum()); }
@Override public double measureDistance(DoubleVector vec1, DoubleVector vec2) { if (vec1.isSparse() || vec2.isSparse()) { return FastMath.sqrt(vec2.subtract(vec1).pow(2).sum()); } else { // dense vectors usually doesn't do a defensive copy, so it is faster than // the implementation above. return measureDistance(vec1.toArray(), vec2.toArray()); } }
/** * Transforms the given vector into this vector. * * @param vec a double vector. */ public DenseDoubleVector(DoubleVector vec) { this.vector = new double[vec.getDimension()]; if (vec.isSparse()) { Iterator<DoubleVectorElement> iterateNonZero = vec.iterateNonZero(); while (iterateNonZero.hasNext()) { DoubleVectorElement next = iterateNonZero.next(); this.vector[next.getIndex()] = next.getValue(); } } else { System.arraycopy(vec.toArray(), 0, this.vector, 0, this.vector.length); } }
@Override public CostGradientTuple updateGradient(DoubleVector theta, DoubleVector gradient, double learningRate, long iteration, double cost) { if (movingAvg == null) { // initialize same types with zeros movingAvg = gradient.deepCopy().multiply(0); squaredGradient = gradient.deepCopy().multiply(0); } DoubleVector oneMinusBeta1Grad = gradient.multiply(1d - movingAvgDecay); movingAvg = movingAvg.multiply(movingAvgDecay).add(oneMinusBeta1Grad); DoubleVector oneMinusBeta2GradSquared = gradient.pow(2d).multiply( 1 - squaredDecay); squaredGradient = squaredGradient.multiply(squaredDecay).add( oneMinusBeta2GradSquared); double beta1t = FastMath.pow(movingAvgDecay, iteration); double beta2t = FastMath.pow(squaredDecay, iteration); double alphat = alpha * FastMath.sqrt(1 - beta2t) / (1 - beta1t); if (Double.isNaN(alphat) || alphat == 0.0) { alphat = EPS; } DoubleVector sqrtV = squaredGradient.sqrt().add(eps); gradient = movingAvg.multiply(alphat).divide(sqrtV); return new CostGradientTuple(cost, gradient); }
@Override public DoubleVector prePredictionWeightUpdate( FeatureOutcomePair featureOutcome, DoubleVector theta, double learningRate, long iteration) { if (squaredPreviousGradient == null) { // initialize zeroed vectors of the same type as the weights squaredPreviousGradient = theta.deepCopy().multiply(0); perCoordinateWeights = theta.deepCopy().multiply(0); } Iterator<DoubleVectorElement> iterateNonZero = featureOutcome.getFeature() .iterateNonZero(); while (iterateNonZero.hasNext()) { DoubleVectorElement next = iterateNonZero.next(); double gradientValue = next.getValue(); int index = next.getIndex(); double zi = perCoordinateWeights.get(index); double ni = squaredPreviousGradient.get(index); if (FastMath.abs(zi) <= l1) { theta.set(index, 0); } else { double value = -1d / (((beta + FastMath.sqrt(ni)) / learningRate) + l2); value = value * (zi - FastMath.signum(gradientValue) * l1); theta.set(index, value); } } return theta; }
@Override public DoubleVector divide(double scalar) { DoubleVector result = new SequentialSparseDoubleVector(this); Iterator<DoubleVectorElement> iter = result.iterateNonZero(); while (iter.hasNext()) { DoubleVectorElement e = iter.next(); int index = e.getIndex(); result.set(index, e.getValue() / scalar); } return result; }
/** * Calculates the numerical gradient from a cost function using the central * difference theorem. f'(x) = (f(x + h) - f(x - h)) / 2. * * @param vector the parameters to derive. * @param f the costfunction to return the cost at a given parameterset. * @return a numerical gradient. */ public static DoubleVector numericalGradient(DoubleVector vector, CostFunction f) { DoubleVector gradient = new DenseDoubleVector(vector.getLength()); DoubleVector tmp = vector.deepCopy(); for (int i = 0; i < vector.getLength(); i++) { double stepSize = EPS * (Math.abs(vector.get(i)) + 1d); tmp.set(i, vector.get(i) + stepSize); double add = f.evaluateCost(tmp).getCost(); tmp.set(i, vector.get(i) - stepSize); double diff = f.evaluateCost(tmp).getCost(); gradient.set(i, (add - diff) / (2d * stepSize)); } return gradient; }
sumVector = dataset.getFeatures()[i]; } else { sumVector = sumVector.add(dataset.getFeatures()[i]); final DoubleVector mean = sumVector.divide(numSamples); DoubleVector stdVector = null; for (int i = 0; i < numSamples; i++) { dataset.getOutcomes()[i]))) { if (stdVector == null) { stdVector = dataset.getFeatures()[i].subtract(mean).pow(2d); } else { stdVector = stdVector.add(dataset.getFeatures()[i].subtract(mean) .pow(2d)); stdVector = stdVector.divide(numSamples).sqrt() .apply((int i, double val) -> Math.max(1, val)); dataset.getFeatures()[i] = dataset.getFeatures()[i].subtract(mean) .divide(stdVector);