/** * Returns a measure of how good the classification for a particular example * actually is. * * @param actual The correct category for the example. * @param data The vector to be classified. * @return The log likelihood of the correct answer as estimated by the current model. This will always be <= 0 * and larger (closer to 0) indicates better accuracy. In order to simplify code that maintains eunning averages, * we bound this value at -100. */ public double logLikelihood(int actual, Vector data) { if (numCategories() == 2) { double p = classifyScalar(data); if (actual > 0) { return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p)); } else { return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p)); } } else { Vector p = classify(data); if (actual > 0) { return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p.get(actual - 1))); } else { return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p.zSum())); } } } }
/** * Provides a default gradient computation useful for logistic regression. * * @param groupKey A grouping key to allow per-something AUC loss to be used for training. * @param actual The target variable value. * @param instance The current feature vector to use for gradient computation * @param classifier The classifier that can compute scores * @return The gradient to be applied to beta */ @Override public final Vector apply(String groupKey, int actual, Vector instance, AbstractVectorClassifier classifier) { // what does the current model say? Vector v = classifier.classify(instance); Vector r = v.like(); if (actual != 0) { r.setQuick(actual - 1, 1); } r.assign(v, Functions.MINUS); return r; } }
static void test(Matrix input, Vector target, AbstractVectorClassifier lr, double expected_mean_error, double expected_absolute_error) { // now test the accuracy Matrix tmp = lr.classify(input); // mean(abs(tmp - target)) double meanAbsoluteError = tmp.viewColumn(0).minus(target).aggregate(Functions.PLUS, Functions.ABS) / 60; // max(abs(tmp - target) double maxAbsoluteError = tmp.viewColumn(0).minus(target).aggregate(Functions.MAX, Functions.ABS); System.out.printf("mAE = %.4f, maxAE = %.4f\n", meanAbsoluteError, maxAbsoluteError); assertEquals(0, meanAbsoluteError , expected_mean_error); assertEquals(0, maxAbsoluteError, expected_absolute_error); // convenience methods should give the same results Vector v = lr.classifyScalar(input); assertEquals(0, v.minus(tmp.viewColumn(0)).norm(1), 1.0e-5); v = lr.classifyFull(input).viewColumn(1); assertEquals(0, v.minus(tmp.viewColumn(0)).norm(1), 1.0e-4); }
/** * Returns a measure of how good the classification for a particular example * actually is. * * @param actual The correct category for the example. * @param data The vector to be classified. * @return The log likelihood of the correct answer as estimated by the current model. This will always be <= 0 * and larger (closer to 0) indicates better accuracy. In order to simplify code that maintains eunning averages, * we bound this value at -100. */ public double logLikelihood(int actual, Vector data) { if (numCategories() == 2) { double p = classifyScalar(data); if (actual > 0) { return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p)); } else { return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p)); } } else { Vector p = classify(data); if (actual > 0) { return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p.get(actual - 1))); } else { return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p.zSum())); } } } }
/** * Provides a default gradient computation useful for logistic regression. * * @param groupKey A grouping key to allow per-something AUC loss to be used for training. * @param actual The target variable value. * @param instance The current feature vector to use for gradient computation * @param classifier The classifier that can compute scores * @return The gradient to be applied to beta */ @Override public final Vector apply(String groupKey, int actual, Vector instance, AbstractVectorClassifier classifier) { // what does the current model say? Vector v = classifier.classify(instance); Vector r = v.like(); if (actual != 0) { r.setQuick(actual - 1, 1); } r.assign(v, Functions.MINUS); return r; } }
/** * Returns a measure of how good the classification for a particular example * actually is. * * @param actual The correct category for the example. * @param data The vector to be classified. * @return The log likelihood of the correct answer as estimated by the current model. This will always be <= 0 * and larger (closer to 0) indicates better accuracy. In order to simplify code that maintains eunning averages, * we bound this value at -100. */ public double logLikelihood(int actual, Vector data) { if (numCategories() == 2) { double p = classifyScalar(data); if (actual > 0) { return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p)); } else { return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p)); } } else { Vector p = classify(data); if (actual > 0) { return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p.get(actual - 1))); } else { return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p.zSum())); } } } }
/** * Provides a default gradient computation useful for logistic regression. * * @param groupKey A grouping key to allow per-something AUC loss to be used for training. * @param actual The target variable value. * @param instance The current feature vector to use for gradient computation * @param classifier The classifier that can compute scores * @return The gradient to be applied to beta */ @Override public final Vector apply(String groupKey, int actual, Vector instance, AbstractVectorClassifier classifier) { // what does the current model say? Vector v = classifier.classify(instance); Vector r = v.like(); if (actual != 0) { r.setQuick(actual - 1, 1); } r.assign(v, Functions.MINUS); return r; } }
/** * Returns n-1 probabilities, one for each categories 1 through * {@code n-1}, for each row of a matrix, where {@code n} is equal * to {@code numCategories()}. The probability of the missing 0-th * category is 1 - rowSum(this result). * * @param data The matrix whose rows are the input vectors to classify * @return A matrix of scores, one row per row of the input matrix, one column for each but the last category. */ public Matrix classify(Matrix data) { Matrix r = new DenseMatrix(data.numRows(), numCategories() - 1); for (int row = 0; row < data.numRows(); row++) { r.assignRow(row, classify(data.viewRow(row))); } return r; }
/** * Returns n-1 probabilities, one for each categories 1 through * {@code n-1}, for each row of a matrix, where {@code n} is equal * to {@code numCategories()}. The probability of the missing 0-th * category is 1 - rowSum(this result). * * @param data The matrix whose rows are the input vectors to classify * @return A matrix of scores, one row per row of the input matrix, one column for each but the last category. */ public Matrix classify(Matrix data) { Matrix r = new DenseMatrix(data.numRows(), numCategories() - 1); for (int row = 0; row < data.numRows(); row++) { r.assignRow(row, classify(data.viewRow(row))); } return r; }
/** * Returns n-1 probabilities, one for each categories 1 through * {@code n-1}, for each row of a matrix, where {@code n} is equal * to {@code numCategories()}. The probability of the missing 0-th * category is 1 - rowSum(this result). * * @param data The matrix whose rows are the input vectors to classify * @return A matrix of scores, one row per row of the input matrix, one column for each but the last category. */ public Matrix classify(Matrix data) { Matrix r = new DenseMatrix(data.numRows(), numCategories() - 1); for (int row = 0; row < data.numRows(); row++) { r.assignRow(row, classify(data.viewRow(row))); } return r; }
r.viewPart(1, numCategories() - 1).assign(classify(instance)); r.setQuick(0, 1.0 - r.zSum()); return r;
r.viewPart(1, numCategories() - 1).assign(classify(instance)); r.setQuick(0, 1.0 - r.zSum()); return r;
r.viewPart(1, numCategories() - 1).assign(classify(instance)); r.setQuick(0, 1.0 - r.zSum()); return r;