org.apache.mahout.classifier.AbstractVectorClassifier.numCategories java code examples

 /**
  * Returns a measure of how good the classification for a particular example
  * actually is.
  * 
  * @param actual  The correct category for the example.
  * @param data  The vector to be classified.
  * @return The log likelihood of the correct answer as estimated by the current model. This will always be <= 0
  *  and larger (closer to 0) indicates better accuracy. In order to simplify code that maintains eunning averages,
  *  we bound this value at -100.
  */
 public double logLikelihood(int actual, Vector data) {
  if (numCategories() == 2) {
   double p = classifyScalar(data);
   if (actual > 0) {
    return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p));
   } else {
    return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p));
   }
  } else {
   Vector p = classify(data);
   if (actual > 0) {
    return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p.get(actual - 1)));
   } else {
    return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p.zSum()));
   }
  }
 }
}

return classifyFull(new DenseVector(numCategories()), instance);

return classifyFull(new DenseVector(numCategories()), instance);

 /**
  * Returns a measure of how good the classification for a particular example
  * actually is.
  * 
  * @param actual  The correct category for the example.
  * @param data  The vector to be classified.
  * @return The log likelihood of the correct answer as estimated by the current model. This will always be <= 0
  *  and larger (closer to 0) indicates better accuracy. In order to simplify code that maintains eunning averages,
  *  we bound this value at -100.
  */
 public double logLikelihood(int actual, Vector data) {
  if (numCategories() == 2) {
   double p = classifyScalar(data);
   if (actual > 0) {
    return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p));
   } else {
    return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p));
   }
  } else {
   Vector p = classify(data);
   if (actual > 0) {
    return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p.get(actual - 1)));
   } else {
    return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p.zSum()));
   }
  }
 }
}

return classifyFull(new DenseVector(numCategories()), instance);

 /**
  * Returns a measure of how good the classification for a particular example
  * actually is.
  * 
  * @param actual  The correct category for the example.
  * @param data  The vector to be classified.
  * @return The log likelihood of the correct answer as estimated by the current model. This will always be <= 0
  *  and larger (closer to 0) indicates better accuracy. In order to simplify code that maintains eunning averages,
  *  we bound this value at -100.
  */
 public double logLikelihood(int actual, Vector data) {
  if (numCategories() == 2) {
   double p = classifyScalar(data);
   if (actual > 0) {
    return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p));
   } else {
    return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p));
   }
  } else {
   Vector p = classify(data);
   if (actual > 0) {
    return Math.max(MIN_LOG_LIKELIHOOD, Math.log(p.get(actual - 1)));
   } else {
    return Math.max(MIN_LOG_LIKELIHOOD, Math.log1p(-p.zSum()));
   }
  }
 }
}

/**
 * Returns a vector of probabilities of category 1, one for each row
 * of a matrix. This only makes sense if there are exactly two categories, but
 * calling this method in that case can save a number of vector allocations.
 * 
 * @param data  The matrix whose rows are vectors to classify
 * @return A vector of scores, with one value per row of the input matrix.
 */
public Vector classifyScalar(Matrix data) {
 Preconditions.checkArgument(numCategories() == 2, "Can only call classifyScalar with two categories");
 Vector r = new DenseVector(data.numRows());
 for (int row = 0; row < data.numRows(); row++) {
  r.set(row, classifyScalar(data.viewRow(row)));
 }
 return r;
}

/**
 * Returns a vector of probabilities of category 1, one for each row
 * of a matrix. This only makes sense if there are exactly two categories, but
 * calling this method in that case can save a number of vector allocations.
 * 
 * @param data  The matrix whose rows are vectors to classify
 * @return A vector of scores, with one value per row of the input matrix.
 */
public Vector classifyScalar(Matrix data) {
 Preconditions.checkArgument(numCategories() == 2, "Can only call classifyScalar with two categories");
 Vector r = new DenseVector(data.numRows());
 for (int row = 0; row < data.numRows(); row++) {
  r.set(row, classifyScalar(data.viewRow(row)));
 }
 return r;
}

/**
 * Returns a vector of probabilities of category 1, one for each row
 * of a matrix. This only makes sense if there are exactly two categories, but
 * calling this method in that case can save a number of vector allocations.
 * 
 * @param data  The matrix whose rows are vectors to classify
 * @return A vector of scores, with one value per row of the input matrix.
 */
public Vector classifyScalar(Matrix data) {
 Preconditions.checkArgument(numCategories() == 2, "Can only call classifyScalar with two categories");
 Vector r = new DenseVector(data.numRows());
 for (int row = 0; row < data.numRows(); row++) {
  r.set(row, classifyScalar(data.viewRow(row)));
 }
 return r;
}

/**
 * Returns a matrix where the rows of the matrix each contain {@code n} probabilities, one for each category.
 *
 * @param data  The matrix whose rows are the input vectors to classify
 * @return A matrix of scores, one row per row of the input matrix, one column for each but the last category.
 */
public Matrix classifyFull(Matrix data) {
 Matrix r = new DenseMatrix(data.numRows(), numCategories());
 for (int row = 0; row < data.numRows(); row++) {
  classifyFull(r.viewRow(row), data.viewRow(row));
 }
 return r;
}

/**
 * Returns a matrix where the rows of the matrix each contain {@code n} probabilities, one for each category.
 *
 * @param data  The matrix whose rows are the input vectors to classify
 * @return A matrix of scores, one row per row of the input matrix, one column for each but the last category.
 */
public Matrix classifyFull(Matrix data) {
 Matrix r = new DenseMatrix(data.numRows(), numCategories());
 for (int row = 0; row < data.numRows(); row++) {
  classifyFull(r.viewRow(row), data.viewRow(row));
 }
 return r;
}

/**
 * Returns a matrix where the rows of the matrix each contain {@code n} probabilities, one for each category.
 *
 * @param data  The matrix whose rows are the input vectors to classify
 * @return A matrix of scores, one row per row of the input matrix, one column for each but the last category.
 */
public Matrix classifyFull(Matrix data) {
 Matrix r = new DenseMatrix(data.numRows(), numCategories());
 for (int row = 0; row < data.numRows(); row++) {
  classifyFull(r.viewRow(row), data.viewRow(row));
 }
 return r;
}

/**
 * Returns n-1 probabilities, one for each categories 1 through
 * {@code n-1}, for each row of a matrix, where {@code n} is equal
 * to {@code numCategories()}. The probability of the missing 0-th
 * category is 1 - rowSum(this result).
 *
 * @param data  The matrix whose rows are the input vectors to classify
 * @return A matrix of scores, one row per row of the input matrix, one column for each but the last category.
 */
public Matrix classify(Matrix data) {
 Matrix r = new DenseMatrix(data.numRows(), numCategories() - 1);
 for (int row = 0; row < data.numRows(); row++) {
  r.assignRow(row, classify(data.viewRow(row)));
 }
 return r;
}

/**
 * Returns n-1 probabilities, one for each categories 1 through
 * {@code n-1}, for each row of a matrix, where {@code n} is equal
 * to {@code numCategories()}. The probability of the missing 0-th
 * category is 1 - rowSum(this result).
 *
 * @param data  The matrix whose rows are the input vectors to classify
 * @return A matrix of scores, one row per row of the input matrix, one column for each but the last category.
 */
public Matrix classify(Matrix data) {
 Matrix r = new DenseMatrix(data.numRows(), numCategories() - 1);
 for (int row = 0; row < data.numRows(); row++) {
  r.assignRow(row, classify(data.viewRow(row)));
 }
 return r;
}

/**
 * Returns n-1 probabilities, one for each categories 1 through
 * {@code n-1}, for each row of a matrix, where {@code n} is equal
 * to {@code numCategories()}. The probability of the missing 0-th
 * category is 1 - rowSum(this result).
 *
 * @param data  The matrix whose rows are the input vectors to classify
 * @return A matrix of scores, one row per row of the input matrix, one column for each but the last category.
 */
public Matrix classify(Matrix data) {
 Matrix r = new DenseMatrix(data.numRows(), numCategories() - 1);
 for (int row = 0; row < data.numRows(); row++) {
  r.assignRow(row, classify(data.viewRow(row)));
 }
 return r;
}

r.viewPart(1, numCategories() - 1).assign(classify(instance));
r.setQuick(0, 1.0 - r.zSum());
return r;

r.viewPart(1, numCategories() - 1).assign(classify(instance));
r.setQuick(0, 1.0 - r.zSum());
return r;

r.viewPart(1, numCategories() - 1).assign(classify(instance));
r.setQuick(0, 1.0 - r.zSum());
return r;

@Test
public void toyData() throws Exception {
 TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob();
 trainNaiveBayes.setConf(conf);
 trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
   "-el", "--tempDir", tempDir.getAbsolutePath() });
 NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new Path(outputDir.getAbsolutePath()), conf);
 AbstractVectorClassifier classifier = new StandardNaiveBayesClassifier(naiveBayesModel);
 assertEquals(2, classifier.numCategories());
 Vector prediction = classifier.classifyFull(trainingInstance(COLOR_RED, TYPE_SUV, ORIGIN_DOMESTIC).get());
 // should be classified as not stolen
 assertTrue(prediction.get(0) < prediction.get(1));
}

@Test
public void toyDataComplementary() throws Exception {
 TrainNaiveBayesJob trainNaiveBayes = new TrainNaiveBayesJob();
 trainNaiveBayes.setConf(conf);
 trainNaiveBayes.run(new String[] { "--input", inputFile.getAbsolutePath(), "--output", outputDir.getAbsolutePath(),
   "-el", "--trainComplementary",
   "--tempDir", tempDir.getAbsolutePath() });
 NaiveBayesModel naiveBayesModel = NaiveBayesModel.materialize(new Path(outputDir.getAbsolutePath()), conf);
 AbstractVectorClassifier classifier = new ComplementaryNaiveBayesClassifier(naiveBayesModel);
 assertEquals(2, classifier.numCategories());
 Vector prediction = classifier.classifyFull(trainingInstance(COLOR_RED, TYPE_SUV, ORIGIN_DOMESTIC).get());
 // should be classified as not stolen
 assertTrue(prediction.get(0) < prediction.get(1));
}

Javadoc

Returns the number of categories that a target variable can be assigned to. A vector classifier will encode it's output as an integer from 0 to numCategories()-1 (inclusive).

Popular methods of AbstractVectorClassifier

classifyFull
Computes and returns a vector containing n scores, where n is numCategories(), given an input vector
classify
classifyScalar
classifyNoLink
Compute and return a vector of scores before applying the inverse link function. For logistic regres

Popular in Java

Creating JSON documents from java classes using gson
getSystemService (Context)
getContentResolver (Context)
scheduleAtFixedRate (ScheduledExecutorService)
Comparator (java.util)
A Comparator is used to compare two objects to determine their ordering with respect to each other.
Vector (java.util)
Vector is an implementation of List, backed by an array and synchronized. All optional operations in
SSLHandshakeException (javax.net.ssl)
The exception that is thrown when a handshake could not be completed successfully.
Table (com.google.common.collect)
A collection that associates an ordered pair of keys, called a row key and a column key, with a sing
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
BasicDataSource (org.apache.commons.dbcp)
Basic implementation of javax.sql.DataSource that is configured via JavaBeans properties. This is no
Top plugins for WebStorm

How to use numCategoriesmethodin org.apache.mahout.classifier.AbstractVectorClassifier

Best Java code snippets using org.apache.mahout.classifier.AbstractVectorClassifier.numCategories (Showing top 20 results out of 315)

How to use
numCategories
method
in
org.apache.mahout.classifier.AbstractVectorClassifier