edu.stanford.nlp.ling.RVFDatum.label java code examples

/**
 * Returns a String representation of this BasicDatum (lists features and
 * labels).
 */
@Override
public String toString() {
 return "RVFDatum[id="+id+", features=" + asFeaturesCounter() + ",label=" + label() + "]";
}

protected void addInstances(Collection<RVFDatum<K, V>> datums) {
 for (RVFDatum<K, V> datum : datums) {
  K label = datum.label();
  Counter<V> vec = datum.asFeaturesCounter();
  instances.add(label, vec);
  classLookup.put(vec, label);
 }
}

public float accuracy(Iterator<RVFDatum<L, F>> exampleIterator) {
 int correct = 0;
 int total = 0;
 for (; exampleIterator.hasNext();) {
  RVFDatum<L, F> next = exampleIterator.next();
  L guess = classOf(next);
  if (guess.equals(next.label())) {
   correct++;
  }
  total++;
 }
 logger.info("correct " + correct + " out of " + total);
 return correct / (float) total;
}

/**
 * Evaluate the accuracy of this classifier on the given dataset.
 *
 * @param testData The dataset to evaluate the classifier on.
 * @return The accuracy of the classifier on the given dataset.
 */
public default double evaluateAccuracy(GeneralDataset<L, F> testData) {
 int numCorrect = 0;
 for (RVFDatum<L, F> datum : testData) {
  // Get the gold label
  L label = datum.label();
  if (label == null) {
   throw new IllegalArgumentException("Cannot compute precision and recall on unlabelled dataset. Offending datum: " + datum);
  }
  // Get the guess
  L guess = classOf(datum);
  // Compute statistics
  if (label.equals(guess)) {
   numCorrect += 1;
  }
 }
 return ((double) numCorrect) / ((double) testData.size);
}

private double probabilityOfRVFDatum(RVFDatum<L, F> example) {
 return probabilityOf(example.asFeaturesCounter(), example.label());
}

public void writeSVMLightFormat(PrintWriter writer) {
 for (RVFDatum<L, F> datum : this) {
  writer.print(this.labelIndex.indexOf(datum.label()));
  Counter<F> features = datum.asFeaturesCounter();
  for (F feature : features.keySet()) {
   double count = features.getCount(feature);
   writer.format(Locale.ENGLISH, " %s:%f", this.featureIndex.indexOf(feature), count);
  }
  writer.println();
 }
}

L label = datum.label();
if (label == null) {
 throw new IllegalArgumentException("Cannot compute precision and recall on unlabelled dataset. Offending datum: " + datum);

/**
 * A helper function for dumping the accuracy of the trained classifier.
 *
 * @param classifier The classifier to evaluate.
 * @param dataset The dataset to evaluate the classifier on.
 */
public static void dumpAccuracy(Classifier<ClauseSplitter.ClauseClassifierLabel, String> classifier, GeneralDataset<ClauseSplitter.ClauseClassifierLabel, String> dataset) {
 DecimalFormat df = new DecimalFormat("0.00%");
 log("size:         " + dataset.size());
 log("split count:  " + StreamSupport.stream(dataset.spliterator(), false).filter(x -> x.label() == ClauseSplitter.ClauseClassifierLabel.CLAUSE_SPLIT).collect(Collectors.toList()).size());
 log("interm count: " + StreamSupport.stream(dataset.spliterator(), false).filter(x -> x.label() == ClauseSplitter.ClauseClassifierLabel.CLAUSE_INTERM).collect(Collectors.toList()).size());
 Pair<Double, Double> pr = classifier.evaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.CLAUSE_SPLIT);
 log("p  (split):   " + df.format(pr.first));
 log("r  (split):   " + df.format(pr.second));
 log("f1 (split):   " + df.format(2 * pr.first * pr.second / (pr.first + pr.second)));
 pr = classifier.evaluatePrecisionAndRecall(dataset, ClauseSplitter.ClauseClassifierLabel.CLAUSE_INTERM);
 log("p  (interm):  " + df.format(pr.first));
 log("r  (interm):  " + df.format(pr.second));
 log("f1 (interm):  " + df.format(2 * pr.first * pr.second / (pr.first + pr.second)));
}

/**
 * The examples are assumed to be a list of RFVDatum.
 * The datums are assumed to not contain the zeroes and then they are added to each instance.
 */
public NaiveBayesClassifier<L, F> trainClassifier(GeneralDataset<L, F> examples, Set<F> featureSet) {
 int numFeatures = featureSet.size();
 int[][] data = new int[examples.size()][numFeatures];
 int[] labels = new int[examples.size()];
 labelIndex = new HashIndex<>();
 featureIndex = new HashIndex<>();
 for (F feat : featureSet) {
  featureIndex.add(feat);
 }
 for (int d = 0; d < examples.size(); d++) {
  RVFDatum<L, F> datum = examples.getRVFDatum(d);
  Counter<F> c = datum.asFeaturesCounter();
  for (F feature : c.keySet()) {
   int fNo = featureIndex.indexOf(feature);
   int value = (int) c.getCount(feature);
   data[d][fNo] = value;
  }
  labelIndex.add(datum.label());
  labels[d] = labelIndex.indexOf(datum.label());
 }
 int numClasses = labelIndex.size();
 return trainClassifier(data, labels, numFeatures, numClasses, labelIndex, featureIndex);
}

public RVFDatum<L, F> scaleDatumGaussian(RVFDatum<L, F> datum) {
 // scale this dataset before scaling the datum
 if (means == null || stdevs == null)
  scaleFeaturesGaussian();
 Counter<F> scaledFeatures = new ClassicCounter<>();
 for (F feature : datum.asFeatures()) {
  int fID = this.featureIndex.indexOf(feature);
  if (fID >= 0) {
   double oldVal = datum.asFeaturesCounter().getCount(feature);
   double newVal;
   if (stdevs[fID] != 0)
    newVal = (oldVal - means[fID]) / stdevs[fID];
   else
    newVal = oldVal;
   scaledFeatures.incrementCount(feature, newVal);
  }
 }
 return new RVFDatum<>(scaledFeatures, datum.label());
}

while(iter.hasNext()){
 RVFDatum<String, ScorePhraseMeasures> inst = iter.next();
 newdataset.add(new BasicDatum<>(inst.asFeatures(), inst.label()));

/**
 * Scales the values of each feature linearly using the min and max values
 * found in the training set. NOTE1: Not guaranteed to be between 0 and 1 for
 * a test datum. NOTE2: Also filters out features from the datum that are not
 * seen at training time.
 *
 * @param datum
 * @return a new datum
 */
public RVFDatum<L, F> scaleDatum(RVFDatum<L, F> datum) {
 // scale this dataset before scaling the datum
 if (minValues == null || maxValues == null)
  scaleFeatures();
 Counter<F> scaledFeatures = new ClassicCounter<>();
 for (F feature : datum.asFeatures()) {
  int fID = this.featureIndex.indexOf(feature);
  if (fID >= 0) {
   double oldVal = datum.asFeaturesCounter().getCount(feature);
   double newVal;
   if (minValues[fID] != maxValues[fID])
    newVal = (oldVal - minValues[fID]) / (maxValues[fID] - minValues[fID]);
   else
    newVal = oldVal;
   scaledFeatures.incrementCount(feature, newVal);
  }
 }
 return new RVFDatum<>(scaledFeatures, datum.label());
}

/**
 * Builds a sigmoid model to turn the classifier outputs into probabilities.
 */
private LinearClassifier<L, L> fitSigmoid(SVMLightClassifier<L, F> classifier, GeneralDataset<L, F> dataset) {
 RVFDataset<L, L> plattDataset = new RVFDataset<>();
 for (int i = 0; i < dataset.size(); i++) {
  RVFDatum<L, F> d = dataset.getRVFDatum(i);
  Counter<L> scores = classifier.scoresOf((Datum<L,F>)d);
  scores.incrementCount(null);
  plattDataset.add(new RVFDatum<>(scores, d.label()));
 }
 LinearClassifierFactory<L, L> factory = new LinearClassifierFactory<>();
 factory.setPrior(new LogPrior(LogPrior.LogPriorType.NULL));
 return factory.trainClassifier(plattDataset);
}

.forEach(x -> {
  synchronized (dataset) {
   distribution.incrementCount(x.label());
   dataset.add(x);

/**
 * Returns a String representation of this BasicDatum (lists features and
 * labels).
 */
@Override
public String toString() {
 return "RVFDatum[id="+id+", features=" + asFeaturesCounter() + ",label=" + label() + "]";
}

/**
 * Returns a String representation of this BasicDatum (lists features and
 * labels).
 */
@Override
public String toString() {
 return "RVFDatum[features=" + asFeaturesCounter() + ",label=" + label() + "]";
}

/**
 * Returns a String representation of this BasicDatum (lists features and labels).
 */
@Override
public String toString() {
 return "RVFDatum[features=" + asFeatures() + ",label=" + label() + "]";
}

private double probabilityOfRVFDatum(RVFDatum<L, F> example) {
 return probabilityOf(example.asFeaturesCounter(), example.label());
}

public void writeSVMLightFormat(PrintWriter writer) {
 for (RVFDatum<L, F> datum : this) {
  writer.print(this.labelIndex.indexOf(datum.label()));
  Counter<F> features = datum.asFeaturesCounter();
  for (F feature : features.keySet()) {
   double count = features.getCount(feature);
   writer.format(Locale.ENGLISH, " %s:%f", this.featureIndex.indexOf(feature), count);
  }
  writer.println();
 }
}

public void writeSVMLightFormat(PrintWriter writer) {
 for (RVFDatum<L, F> datum : this) {
  writer.print(this.labelIndex.indexOf(datum.label()));
  Counter<F> features = datum.asFeaturesCounter();
  for (F feature : features.keySet()) {
   double count = features.getCount(feature);
   writer.format(" %s:%f", this.featureIndex.indexOf(feature), count);
  }
  writer.println();
 }
}

Javadoc

labels for this Datum. Invariant: always non-null

Popular methods of RVFDatum

<init>
Constructs a new RVFDatum with the given features and label.
asFeatures
Returns the list of features without values
asFeaturesCounter
Returns the Counter of features and values
setLabel
Removes all currently assigned Labels for this Datum then adds the given Label. Calling setLabel(nul
setID
Sets id for this instance

Popular in Java

Running tasks concurrently on multiple threads
putExtra (Intent)
getExternalFilesDir (Context)
findViewById (Activity)
MalformedURLException (java.net)
This exception is thrown when a program attempts to create an URL from an incorrect specification.
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
JOptionPane (javax.swing)
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
CodeWhisperer alternatives

How to use labelmethodin edu.stanford.nlp.ling.RVFDatum

Best Java code snippets using edu.stanford.nlp.ling.RVFDatum.label (Showing top 20 results out of 315)

How to use
label
method
in
edu.stanford.nlp.ling.RVFDatum