edu.stanford.nlp.ling.RVFDatum.<init> java code examples

public ClassicCounter<L> scoresOf(Datum<L, F> example) {
 RVFDatum<L, F> rvf = new RVFDatum<>(example);
 return scoresOf(rvf);
}

public L classOf(Datum<L, F> example) {
 RVFDatum<L, F> rvf = new RVFDatum<>(example);
 return classOf(rvf);
}

public static RVFDatum<String, String> svmLightLineToRVFDatum(String l) {
 l = l.replaceFirst("#.*$", ""); // remove any trailing comments
 String[] line = l.split("\\s+");
 ClassicCounter<String> features = new ClassicCounter<>();
 for (int i = 1; i < line.length; i++) {
  String[] f = line[i].split(":");
  if (f.length != 2) {
   throw new IllegalArgumentException("Bad data format: " + l);
  }
  double val = Double.parseDouble(f[1]);
  features.incrementCount(f[0], val);
 }
 return new RVFDatum<>(features, line[0]);
}

public double probabilityOf(Mention p, Set<Mention> shares, Set<String> neStrings, Dictionaries dict, Properties props) {
 try {
  boolean dummyLabel = false;
  RVFDatum<Boolean, String> datum = new RVFDatum<>(extractFeatures(p, shares, neStrings, dict, props), dummyLabel);
  return rf.probabilityOfTrue(datum);
 } catch (Exception e) {
  throw new RuntimeException(e);
 }
}

/**
 * Get the sentiment of a sentence.
 *
 * @param sentence The sentence as a core map.
 *                 POS tags and Lemmas are a prerequisite.
 *                 See {@link edu.stanford.nlp.ling.CoreAnnotations.PartOfSpeechAnnotation} and
 *                 {@link edu.stanford.nlp.ling.CoreAnnotations.LemmaAnnotation}.
 *
 * @return The sentiment class of this sentence.
 */
public SentimentClass classify(CoreMap sentence) {
 Counter<String> features = featurize(sentence);
 RVFDatum<SentimentClass, String> datum = new RVFDatum<>(features);
 return impl.classOf(datum);
}

public Datum<String,String> createDatum(RelationMention rel, String positiveLabel) {
 Counter<String> features = new ClassicCounter<>();
 if (rel.getArgs().size() != 2) {
  return null;
 }
 addFeatures(features, rel, featureList);
 String labelString = rel.getType();
 if(! labelString.equals(positiveLabel)) labelString = RelationMention.UNRELATED;
 return new RVFDatum<>(features, labelString);
}

/**
 * Given a set of vectors, and a mapping from each vector to its class label, 
 * generates the sets of instances used to perform classifications and returns
 * the corresponding K-NN classifier.
 *
 * NOTE: if l2NormalizeVectors is T, creates a copy and applies L2Normalize to it.
 */
public KNNClassifier<K,V> train(Collection<Counter<V>> vectors, Map<V, K> labelMap) {
 KNNClassifier<K, V> classifier = new KNNClassifier<>(k, weightedVotes, l2NormalizeVectors);
 Collection<RVFDatum<K, V>> instances = new ArrayList<>();
 for (Counter<V> vector : vectors) {
  K label = labelMap.get(vector);
  RVFDatum<K, V> datum;
  if (l2NormalizeVectors) { 
   datum = new RVFDatum<>(Counters.L2Normalize(new ClassicCounter<>(vector)), label);
  } else {
   datum = new RVFDatum<>(vector, label);
  }
  instances.add(datum);
 }
 classifier.addInstances(instances);
 return classifier;
}

 /**
  * Returns a counter for the log probability of each of the classes
  * looking at the the sum of e^v for each count v, should be 1
  * Note: Uses SloppyMath.logSum which isn't exact but isn't as
  * offensively slow as doing a series of exponentials
  */
 @Override
 public Counter<L> logProbabilityOf(RVFDatum<L, F> example) {
  if (platt == null) {
   throw new UnsupportedOperationException("If you want to ask for the probability, you must train a Platt model!");
  }
  Counter<L> scores = scoresOf(example);
  scores.incrementCount(null);
  Counter<L> probs = platt.logProbabilityOf(new RVFDatum<>(scores));
  //System.out.println(scores+" "+probs);
  return probs;
 }
}

/**
 * Returns a counter for the log probability of each of the classes
 * looking at the the sum of e^v for each count v, should be 1
 * Note: Uses SloppyMath.logSum which isn't exact but isn't as
 * offensively slow as doing a series of exponentials
 */
@Override
public Counter<L> logProbabilityOf(Datum<L, F> example) {
 if (platt == null) {
  throw new UnsupportedOperationException("If you want to ask for the probability, you must train a Platt model!");
 }
 Counter<L> scores = scoresOf(example);
 scores.incrementCount(null);
 Counter<L> probs = platt.logProbabilityOf(new RVFDatum<>(scores));
 //System.out.println(scores+" "+probs);
 return probs;
}

public static <L,L2,F> Datum<L2,F> mapDatum(Datum<L,F> d, Map<L,L2> labelMapping, L2 defaultLabel) {
 // TODO: How to copy datum?
 L2 newLabel = labelMapping.get(d.label());
 if (newLabel == null) {
  newLabel = defaultLabel;
 }
 if (d instanceof RVFDatum) {
  return new RVFDatum<>(((RVFDatum<L, F>) d).asFeaturesCounter(), newLabel);
 } else {
  return new BasicDatum<>(d.asFeatures(), newLabel);
 }
}

public Datum<String,String> createDatum(RelationMention rel, Logger logger) {
 Counter<String> features = new ClassicCounter<>();
 if (rel.getArgs().size() != 2) {
  return null;
 }
 addFeatures(features, rel, featureList, logger);
 String labelString = rel.getType();
 return new RVFDatum<>(features, labelString);
}

/**
 * Read SVM-light formatted data into this dataset.
 *
 * A strict SVM-light format is expected, where labels and features are both
 * encoded as integers. These integers are converted into the dataset label
 * and feature types using the indexes stored in this dataset.
 *
 * @param file The file from which the data should be read.
 */
public void readSVMLightFormat(File file) {
 for (String line : IOUtils.readLines(file)) {
  line = line.replaceAll("#.*", ""); // remove any trailing comments
  String[] items = line.split("\\s+");
  Integer label = Integer.parseInt(items[0]);
  Counter<F> features = new ClassicCounter<>();
  for (int i = 1; i < items.length; i++) {
   String[] featureItems = items[i].split(":");
   int feature = Integer.parseInt(featureItems[0]);
   double value = Double.parseDouble(featureItems[1]);
   features.incrementCount(this.featureIndex.get(feature), value);
  }
  this.add(new RVFDatum<>(features, this.labelIndex.get(label)));
 }
}

/**
 * Given a CollectionValued Map of vectors, treats outer key as label for each
 * set of inner vectors.
 * NOTE: if l2NormalizeVectors is T, creates a copy of each vector and applies 
 * l2Normalize to it.
 */
public KNNClassifier<K,V> train(CollectionValuedMap<K, Counter<V>> vecBag) {
 KNNClassifier<K, V> classifier = new KNNClassifier<>(k, weightedVotes, l2NormalizeVectors);
 Collection<RVFDatum<K, V>> instances = new ArrayList<>();
 for (K label : vecBag.keySet()) {
  RVFDatum<K, V> datum;
  for (Counter<V> vector : vecBag.get(label)) {
   if (l2NormalizeVectors) {
    datum = new RVFDatum<>(Counters.L2Normalize(new ClassicCounter<>(vector)), label);
   }  else {
    datum = new RVFDatum<>(vector, label);
   }
   instances.add(datum);
  }
 }
 classifier.addInstances(instances);
 return classifier;
}

/**
 * @return the index-ed datum
 */
@Override
public RVFDatum<L, F> getRVFDatum(int index) {
  ClassicCounter<F> c = new ClassicCounter<>();
 for (F key : featureIndex.objects(data[index])) {
  c.incrementCount(key);
 }
 return new RVFDatum<>(c, labelIndex.get(labels[index]));
}

/**
 * @return the index-ed datum
 *
 *         Note, this returns a new RVFDatum object, not the original RVFDatum
 *         that was added to the dataset.
 */
@Override
public RVFDatum<L, F> getRVFDatum(int index) {
 ClassicCounter<F> c = new ClassicCounter<>();
 for (int i = 0; i < data[index].length; i++) {
  c.incrementCount(featureIndex.get(data[index][i]), values[index][i]);
 }
 return new RVFDatum<>(c, labelIndex.get(labels[index]));
}

/**
 * @see SimpleSentiment#classify(CoreMap)
 */
public SentimentClass classify(String text) {
 Annotation ann = new Annotation(text);
 pipeline.get().annotate(ann);
 CoreMap sentence = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0);
 Counter<String> features = featurize(sentence);
 RVFDatum<SentimentClass, String> datum = new RVFDatum<>(features);
 return impl.classOf(datum);
}

/**
 * Mostly just an alias, but make sure our featurizer is serializable!
 */
public interface Featurizer extends Function<Triple<ClauseSplitterSearchProblem.State, ClauseSplitterSearchProblem.Action, ClauseSplitterSearchProblem.State>, Counter<String>>, Serializable {
 boolean isSimpleSplit(Counter<String> feats);
}

private RVFDatum<String,String> makeRVFDatumFromStrings(String[] strings) {
 if (globalFlags.featureFormat) {
  ClassicCounter<String> theFeatures = new ClassicCounter<>();
  for (int i = 0; i < strings.length; i++) {
   if (i != globalFlags.goldAnswerColumn) {
    if (isRealValued(flags[i])) {
     addFeatureValue(strings[i], flags[i], theFeatures);
    } else {
     theFeatures.setCount(strings[i], 1.0);
    }
   }
  }
  return new RVFDatum<>(theFeatures, strings[globalFlags.goldAnswerColumn]);
 } else {
  //logger.info("Read in " + strings);
  return makeRVFDatum(strings);
 }
}

public RVFDatum<L, F> scaleDatumGaussian(RVFDatum<L, F> datum) {
 // scale this dataset before scaling the datum
 if (means == null || stdevs == null)
  scaleFeaturesGaussian();
 Counter<F> scaledFeatures = new ClassicCounter<>();
 for (F feature : datum.asFeatures()) {
  int fID = this.featureIndex.indexOf(feature);
  if (fID >= 0) {
   double oldVal = datum.asFeaturesCounter().getCount(feature);
   double newVal;
   if (stdevs[fID] != 0)
    newVal = (oldVal - means[fID]) / stdevs[fID];
   else
    newVal = oldVal;
   scaledFeatures.incrementCount(feature, newVal);
  }
 }
 return new RVFDatum<>(scaledFeatures, datum.label());
}

/**
 * Builds a sigmoid model to turn the classifier outputs into probabilities.
 */
private LinearClassifier<L, L> fitSigmoid(SVMLightClassifier<L, F> classifier, GeneralDataset<L, F> dataset) {
 RVFDataset<L, L> plattDataset = new RVFDataset<>();
 for (int i = 0; i < dataset.size(); i++) {
  RVFDatum<L, F> d = dataset.getRVFDatum(i);
  Counter<L> scores = classifier.scoresOf((Datum<L,F>)d);
  scores.incrementCount(null);
  plattDataset.add(new RVFDatum<>(scores, d.label()));
 }
 LinearClassifierFactory<L, L> factory = new LinearClassifierFactory<>();
 factory.setPrior(new LogPrior(LogPrior.LogPriorType.NULL));
 return factory.trainClassifier(plattDataset);
}

Javadoc

Constructs a new RVFDatum with no features or labels.

Popular methods of RVFDatum

asFeatures
Returns the list of features without values
asFeaturesCounter
Returns the Counter of features and values
label
setLabel
Removes all currently assigned Labels for this Datum then adds the given Label. Calling setLabel(nul
setID
Sets id for this instance

Popular in Java

Finding current android device location
notifyDataSetChanged (ArrayAdapter)
onRequestPermissionsResult (Fragment)
getApplicationContext (Context)
MalformedURLException (java.net)
This exception is thrown when a program attempts to create an URL from an incorrect specification.
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
JOptionPane (javax.swing)
Loader (org.hibernate.loader)
Abstract superclass of object loading (and querying) strategies. This class implements useful common
Best plugins for Eclipse

How to use edu.stanford.nlp.ling.RVFDatumconstructor

Best Java code snippets using edu.stanford.nlp.ling.RVFDatum.<init> (Showing top 20 results out of 315)

How to use
edu.stanford.nlp.ling.RVFDatum
constructor