/** * Returns a measure of how good the classification for a particular example * actually is. * * @param actual The correct category for the example. * @param data The vector to be classified. * @return The log likelihood of the correct answer as estimated by the current model. This will always be <= 0 * and larger (closer to 0) indicates better accuracy. In order to simplify code that maintains eunning averages, * we bound this value at -100. */ public double logLikelihood(int actual, Vector data) { if (numCategories() == 2) { double p = classifyScalar(data); if (actual > 0) { return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p)); } else { return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p)); } } else { Vector p = classify(data); if (actual > 0) { return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p.get(actual - 1))); } else { return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p.zSum())); } } } }
/** * Returns a measure of how good the classification for a particular example * actually is. * * @param actual The correct category for the example. * @param data The vector to be classified. * @return The log likelihood of the correct answer as estimated by the current model. This will always be <= 0 * and larger (closer to 0) indicates better accuracy. In order to simplify code that maintains eunning averages, * we bound this value at -100. */ public double logLikelihood(int actual, Vector data) { if (numCategories() == 2) { double p = classifyScalar(data); if (actual > 0) { return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p)); } else { return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p)); } } else { Vector p = classify(data); if (actual > 0) { return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p.get(actual - 1))); } else { return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p.zSum())); } } } }
/** * Returns a measure of how good the classification for a particular example * actually is. * * @param actual The correct category for the example. * @param data The vector to be classified. * @return The log likelihood of the correct answer as estimated by the current model. This will always be <= 0 * and larger (closer to 0) indicates better accuracy. In order to simplify code that maintains eunning averages, * we bound this value at -100. */ public double logLikelihood(int actual, Vector data) { if (numCategories() == 2) { double p = classifyScalar(data); if (actual > 0) { return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p)); } else { return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p)); } } else { Vector p = classify(data); if (actual > 0) { return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p.get(actual - 1))); } else { return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p.zSum())); } } } }
/** * Returns a vector of probabilities of category 1, one for each row * of a matrix. This only makes sense if there are exactly two categories, but * calling this method in that case can save a number of vector allocations. * * @param data The matrix whose rows are vectors to classify * @return A vector of scores, with one value per row of the input matrix. */ public Vector classifyScalar(Matrix data) { Preconditions.checkArgument(numCategories() == 2, "Can only call classifyScalar with two categories"); Vector r = new DenseVector(data.numRows()); for (int row = 0; row < data.numRows(); row++) { r.set(row, classifyScalar(data.viewRow(row))); } return r; }
/** * Returns a vector of probabilities of category 1, one for each row * of a matrix. This only makes sense if there are exactly two categories, but * calling this method in that case can save a number of vector allocations. * * @param data The matrix whose rows are vectors to classify * @return A vector of scores, with one value per row of the input matrix. */ public Vector classifyScalar(Matrix data) { Preconditions.checkArgument(numCategories() == 2, "Can only call classifyScalar with two categories"); Vector r = new DenseVector(data.numRows()); for (int row = 0; row < data.numRows(); row++) { r.set(row, classifyScalar(data.viewRow(row))); } return r; }
/** * Returns a vector of probabilities of category 1, one for each row * of a matrix. This only makes sense if there are exactly two categories, but * calling this method in that case can save a number of vector allocations. * * @param data The matrix whose rows are vectors to classify * @return A vector of scores, with one value per row of the input matrix. */ public Vector classifyScalar(Matrix data) { Preconditions.checkArgument(numCategories() == 2, "Can only call classifyScalar with two categories"); Vector r = new DenseVector(data.numRows()); for (int row = 0; row < data.numRows(); row++) { r.set(row, classifyScalar(data.viewRow(row))); } return r; }
static void test(Matrix input, Vector target, AbstractVectorClassifier lr, double expected_mean_error, double expected_absolute_error) { // now test the accuracy Matrix tmp = lr.classify(input); // mean(abs(tmp - target)) double meanAbsoluteError = tmp.viewColumn(0).minus(target).aggregate(Functions.PLUS, Functions.ABS) / 60; // max(abs(tmp - target) double maxAbsoluteError = tmp.viewColumn(0).minus(target).aggregate(Functions.MAX, Functions.ABS); System.out.printf("mAE = %.4f, maxAE = %.4f\n", meanAbsoluteError, maxAbsoluteError); assertEquals(0, meanAbsoluteError , expected_mean_error); assertEquals(0, maxAbsoluteError, expected_absolute_error); // convenience methods should give the same results Vector v = lr.classifyScalar(input); assertEquals(0, v.minus(tmp.viewColumn(0)).norm(1), 1.0e-5); v = lr.classifyFull(input).viewColumn(1); assertEquals(0, v.minus(tmp.viewColumn(0)).norm(1), 1.0e-4); }