public RVFDatum<L, F> scaleDatumGaussian(RVFDatum<L, F> datum) { // scale this dataset before scaling the datum if (means == null || stdevs == null) scaleFeaturesGaussian(); Counter<F> scaledFeatures = new ClassicCounter<>(); for (F feature : datum.asFeatures()) { int fID = this.featureIndex.indexOf(feature); if (fID >= 0) { double oldVal = datum.asFeaturesCounter().getCount(feature); double newVal; if (stdevs[fID] != 0) newVal = (oldVal - means[fID]) / stdevs[fID]; else newVal = oldVal; scaledFeatures.incrementCount(feature, newVal); } } return new RVFDatum<>(scaledFeatures, datum.label()); }
static RVFDatum<String, Integer> readDatum(String[] values, int classColumn, Set<Integer> skip, Map<Integer, Index<String>> indices) { ClassicCounter<Integer> c = new ClassicCounter<>(); RVFDatum<String, Integer> d = new RVFDatum<>(c); int attrNo = 0; for (int index = 0; index < values.length; index++) { if (index == classColumn) { d.setLabel(values[index]); continue;
public RVFDatum<L, F> getRVFDatumWithId(int index) { RVFDatum<L, F> datum = getRVFDatum(index); datum.setID(getRVFDatumId(index)); return datum; }
/** * Returns a String representation of this BasicDatum (lists features and * labels). */ @Override public String toString() { return "RVFDatum[id="+id+", features=" + asFeaturesCounter() + ",label=" + label() + "]"; }
public static <L,L2,F> Datum<L2,F> mapDatum(Datum<L,F> d, Map<L,L2> labelMapping, L2 defaultLabel) { // TODO: How to copy datum? L2 newLabel = labelMapping.get(d.label()); if (newLabel == null) { newLabel = defaultLabel; } if (d instanceof RVFDatum) { return new RVFDatum<>(((RVFDatum<L, F>) d).asFeaturesCounter(), newLabel); } else { return new BasicDatum<>(d.asFeatures(), newLabel); } }
public ClassicCounter<L> scoresOf(Datum<L, F> example) { RVFDatum<L, F> rvf = new RVFDatum<>(example); return scoresOf(rvf); }
/** * Returns whether the given RVFDatum contains the same features with the same * values as this RVFDatum. An RVFDatum can only be equal to another RVFDatum. * <i>Implementation note:</i> Doesn't check the labels, should we change * this? */ @Override @SuppressWarnings("unchecked") public boolean equals(Object o) { if (this == o) { return true; } if (!(o instanceof RVFDatum)) { return (false); } RVFDatum<L, F> d = (RVFDatum<L, F>) o; return features.equals(d.asFeaturesCounter()); }
/** * Builds a sigmoid model to turn the classifier outputs into probabilities. */ private LinearClassifier<L, L> fitSigmoid(SVMLightClassifier<L, F> classifier, GeneralDataset<L, F> dataset) { RVFDataset<L, L> plattDataset = new RVFDataset<>(); for (int i = 0; i < dataset.size(); i++) { RVFDatum<L, F> d = dataset.getRVFDatum(i); Counter<L> scores = classifier.scoresOf((Datum<L,F>)d); scores.incrementCount(null); plattDataset.add(new RVFDatum<>(scores, d.label())); } LinearClassifierFactory<L, L> factory = new LinearClassifierFactory<>(); factory.setPrior(new LogPrior(LogPrior.LogPriorType.NULL)); return factory.trainClassifier(plattDataset); }
/** * Constructs a new RVFDatum with the given features and label. */ public RVFDatum(Counter<F> features, L label) { this.features = features; setLabel(label); }
while(iter.hasNext()){ RVFDatum<String, ScorePhraseMeasures> inst = iter.next(); newdataset.add(new BasicDatum<>(inst.asFeatures(), inst.label()));
public float accuracy(Iterator<RVFDatum<L, F>> exampleIterator) { int correct = 0; int total = 0; for (; exampleIterator.hasNext();) { RVFDatum<L, F> next = exampleIterator.next(); L guess = classOf(next); if (guess.equals(next.label())) { correct++; } total++; } logger.info("correct " + correct + " out of " + total); return correct / (float) total; }
RVFDatum<String, String> datum = new RVFDatum<>(features, "isMention"); datum.setID(Integer.toString(dataset.size())); mapDatumToMention.put(dataset.size(), mention); dataset.add(datum); } else { RVFDatum<String, String> datum = new RVFDatum<>(features, "isNotMention"); datum.setID(Integer.toString(dataset.size())); dataset.add(datum); mapDatumToMention.put(dataset.size(), mention); RVFDatum<String, String> datum = new RVFDatum<>(features, "none"); datum.setID(Integer.toString(dataset.size())); mapDatumToMention.put(dataset.size(), mention); dataset.add(datum);
protected void addInstances(Collection<RVFDatum<K, V>> datums) { for (RVFDatum<K, V> datum : datums) { K label = datum.label(); Counter<V> vec = datum.asFeaturesCounter(); instances.add(label, vec); classLookup.put(vec, label); } }
public L classOf(Datum<L, F> example) { RVFDatum<L, F> rvf = new RVFDatum<>(example); return classOf(rvf); }
@Override @Deprecated //use classOf(Datum) instead. public L classOf(RVFDatum<L, F> example) { return classOf(example.asFeaturesCounter()); }
ClassicCounter<V> featVec = new ClassicCounter<>(vec.asFeaturesCounter()); Counters.normalize(featVec); vec = new RVFDatum<>(featVec); scores.setCount(instance, Counters.cosine(vec.asFeaturesCounter(), instance)); // set entry, for given instance and score
/** * Constructs a new RVFDatum taking the data from a Datum. <i>Implementation * note:</i> This constructor allocates its own counter over features, but is * only guaranteed correct if the label and feature names are immutable. * * @param m The Datum to copy. */ public RVFDatum(Datum<L, F> m) { this.features = new ClassicCounter<>(); for (F key : m.asFeatures()) { features.incrementCount(key, 1.0); } setLabel(m.label()); }
/** * Returns a String representation of this BasicDatum (lists features and labels). */ @Override public String toString() { return "RVFDatum[features=" + asFeatures() + ",label=" + label() + "]"; }
/** * Evaluate the accuracy of this classifier on the given dataset. * * @param testData The dataset to evaluate the classifier on. * @return The accuracy of the classifier on the given dataset. */ public default double evaluateAccuracy(GeneralDataset<L, F> testData) { int numCorrect = 0; for (RVFDatum<L, F> datum : testData) { // Get the gold label L label = datum.label(); if (label == null) { throw new IllegalArgumentException("Cannot compute precision and recall on unlabelled dataset. Offending datum: " + datum); } // Get the guess L guess = classOf(datum); // Compute statistics if (label.equals(guess)) { numCorrect += 1; } } return ((double) numCorrect) / ((double) testData.size); }