/** * Returns whether the given RVFDatum contains the same features with the same * values as this RVFDatum. An RVFDatum can only be equal to another RVFDatum. * <i>Implementation note:</i> Doesn't check the labels, should we change * this? */ @Override @SuppressWarnings("unchecked") public boolean equals(Object o) { if (this == o) { return true; } if (!(o instanceof RVFDatum)) { return (false); } RVFDatum<L, F> d = (RVFDatum<L, F>) o; return features.equals(d.asFeaturesCounter()); }
@Override @Deprecated //use classOf(Datum) instead. public L classOf(RVFDatum<L, F> example) { return classOf(example.asFeaturesCounter()); }
/** * Returns a String representation of this BasicDatum (lists features and * labels). */ @Override public String toString() { return "RVFDatum[id="+id+", features=" + asFeaturesCounter() + ",label=" + label() + "]"; }
private double valueOfFeature(F feature, Datum<L,F> datum){ if(datum instanceof RVFDatum) return ((RVFDatum<L,F>)datum).asFeaturesCounter().getCount(feature); else return 1.0; }
public double probabilityOfTrue(RVFDatum<Boolean,String> datum) { return probabilityOfTrue(datum.asFeaturesCounter()); } public double probabilityOfTrue(Counter<String> features) {
public double probabilityOfTrue(RVFDatum<Boolean, String> datum) { return probabilityOfTrue(datum.asFeaturesCounter()); } public double probabilityOfTrue(Counter<String> features) {
private L classOfRVFDatum(RVFDatum<L, F> example) { return classOf(example.asFeaturesCounter()); }
protected void addInstances(Collection<RVFDatum<K, V>> datums) { for (RVFDatum<K, V> datum : datums) { K label = datum.label(); Counter<V> vec = datum.asFeaturesCounter(); instances.add(label, vec); classLookup.put(vec, label); } }
private double probabilityOfRVFDatum(RVFDatum<L, F> example) { return probabilityOf(example.asFeaturesCounter(), example.label()); }
/** Returns the score of the RVFDatum for the specified label. * Ignores the true label of the RVFDatum. * * @param example Used to get the observed x value. Its label is ignored. * @param label The label y that the observed value is scored with. * @return A linear classifier score */ private double scoreOfRVFDatum(RVFDatum<L, F> example, L label) { int iLabel = labelIndex.indexOf(label); double score = 0.0; Counter<F> features = example.asFeaturesCounter(); for (Map.Entry<F, Double> entry : features.entrySet()) { score += weight(entry.getKey(), iLabel) * entry.getValue(); } return score + thresholds[iLabel]; }
public void writeSVMLightFormat(PrintWriter writer) { for (RVFDatum<L, F> datum : this) { writer.print(this.labelIndex.indexOf(datum.label())); Counter<F> features = datum.asFeaturesCounter(); for (F feature : features.keySet()) { double count = features.getCount(feature); writer.format(Locale.ENGLISH, " %s:%f", this.featureIndex.indexOf(feature), count); } writer.println(); } }
public static <L,L2,F> Datum<L2,F> mapDatum(Datum<L,F> d, Map<L,L2> labelMapping, L2 defaultLabel) { // TODO: How to copy datum? L2 newLabel = labelMapping.get(d.label()); if (newLabel == null) { newLabel = defaultLabel; } if (d instanceof RVFDatum) { return new RVFDatum<>(((RVFDatum<L, F>) d).asFeaturesCounter(), newLabel); } else { return new BasicDatum<>(d.asFeatures(), newLabel); } }
private Counter<L> scoresOfRVFDatum(RVFDatum<L, F> example) { Counter<F> features = example.asFeaturesCounter(); double sum = scoreOf(features); Counter<L> c = new ClassicCounter<>(); c.setCount(classes[0], -sum); c.setCount(classes[1], sum); return c; }
/** Construct a counter with keys the labels of the classifier and * values the score (unnormalized log probability) of each class * for an RVFDatum. */ private Counter<L> scoresOfRVFDatum(RVFDatum<L, F> example) { Counter<L> scores = new ClassicCounter<>(); // Index the features in the datum Counter<F> asCounter = example.asFeaturesCounter(); Counter<Integer> asIndexedCounter = new ClassicCounter<>(asCounter.size()); for (Map.Entry<F, Double> entry : asCounter.entrySet()) { asIndexedCounter.setCount(featureIndex.indexOf(entry.getKey()), entry.getValue()); } // Set the scores appropriately for (L l : labels()) { scores.setCount(l, scoreOfRVFDatum(asIndexedCounter, l)); } //System.out.println("Scores are: " + scores + " (gold: " + example.label() + ")"); return scores; }
public ClassicCounter<L> scoresOf(RVFDatum<L, F> example) { ClassicCounter<L> scores = new ClassicCounter<>(); Counters.addInPlace(scores, priors); if (addZeroValued) { Counters.addInPlace(scores, priorZero); } for (L l : labels) { double score = 0.0; Counter<F> features = example.asFeaturesCounter(); for (F f : features.keySet()) { int value = (int) features.getCount(f); score += weight(l, f, Integer.valueOf(value)); if (addZeroValued) { score -= weight(l, f, zero); } } scores.incrementCount(l, score); } return scores; }
@Override public Counter<L> probabilityOf(Datum<L, F> example) { // calculate the feature indices and feature values int[] featureIndices = LogisticUtils.indicesOf(example.asFeatures(), featureIndex); double[] featureValues; if (example instanceof RVFDatum<?, ?>) { Collection<Double> featureValuesCollection = ((RVFDatum<?, ?>) example).asFeaturesCounter().values(); featureValues = LogisticUtils.convertToArray(featureValuesCollection); } else { featureValues = new double[example.asFeatures().size()]; Arrays.fill(featureValues, 1.0); } // calculate probability of each class Counter<L> result = new ClassicCounter<>(); int numClasses = labelIndex.size(); double[] sigmoids = LogisticUtils.calculateSigmoids(weights, featureIndices, featureValues); for (int c = 0; c < numClasses; c++) { L label = labelIndex.get(c); result.incrementCount(label, sigmoids[c]); } return result; }
@Override // If you edit me, also take care of WeightedRVFDataset public void add(Datum<L, F> d) { if (d instanceof RVFDatum<?, ?>) { addLabel(d.label()); addFeatures(((RVFDatum<L, F>) d).asFeaturesCounter()); size++; } else { addLabel(d.label()); addFeatures(Counters.asCounter(d.asFeatures())); size++; } }
public RVFDatum<L, F> scaleDatumGaussian(RVFDatum<L, F> datum) { // scale this dataset before scaling the datum if (means == null || stdevs == null) scaleFeaturesGaussian(); Counter<F> scaledFeatures = new ClassicCounter<>(); for (F feature : datum.asFeatures()) { int fID = this.featureIndex.indexOf(feature); if (fID >= 0) { double oldVal = datum.asFeaturesCounter().getCount(feature); double newVal; if (stdevs[fID] != 0) newVal = (oldVal - means[fID]) / stdevs[fID]; else newVal = oldVal; scaledFeatures.incrementCount(feature, newVal); } } return new RVFDatum<>(scaledFeatures, datum.label()); }
public void add(Datum<L, F> d, String src, String id) { if (d instanceof RVFDatum<?, ?>) { addLabel(d.label()); addFeatures(((RVFDatum<L, F>) d).asFeaturesCounter()); addSourceAndId(src, id); size++; } else { addLabel(d.label()); addFeatures(Counters.asCounter(d.asFeatures())); addSourceAndId(src, id); size++; } }
/** * Scales the values of each feature linearly using the min and max values * found in the training set. NOTE1: Not guaranteed to be between 0 and 1 for * a test datum. NOTE2: Also filters out features from the datum that are not * seen at training time. * * @param datum * @return a new datum */ public RVFDatum<L, F> scaleDatum(RVFDatum<L, F> datum) { // scale this dataset before scaling the datum if (minValues == null || maxValues == null) scaleFeatures(); Counter<F> scaledFeatures = new ClassicCounter<>(); for (F feature : datum.asFeatures()) { int fID = this.featureIndex.indexOf(feature); if (fID >= 0) { double oldVal = datum.asFeaturesCounter().getCount(feature); double newVal; if (minValues[fID] != maxValues[fID]) newVal = (oldVal - minValues[fID]) / (maxValues[fID] - minValues[fID]); else newVal = oldVal; scaledFeatures.incrementCount(feature, newVal); } } return new RVFDatum<>(scaledFeatures, datum.label()); }