public RVFDatum<L, F> scaleDatumGaussian(RVFDatum<L, F> datum) { // scale this dataset before scaling the datum if (means == null || stdevs == null) scaleFeaturesGaussian(); Counter<F> scaledFeatures = new ClassicCounter<>(); for (F feature : datum.asFeatures()) { int fID = this.featureIndex.indexOf(feature); if (fID >= 0) { double oldVal = datum.asFeaturesCounter().getCount(feature); double newVal; if (stdevs[fID] != 0) newVal = (oldVal - means[fID]) / stdevs[fID]; else newVal = oldVal; scaledFeatures.incrementCount(feature, newVal); } } return new RVFDatum<>(scaledFeatures, datum.label()); }
while(iter.hasNext()){ RVFDatum<String, ScorePhraseMeasures> inst = iter.next(); newdataset.add(new BasicDatum<>(inst.asFeatures(), inst.label()));
/** * Scales the values of each feature linearly using the min and max values * found in the training set. NOTE1: Not guaranteed to be between 0 and 1 for * a test datum. NOTE2: Also filters out features from the datum that are not * seen at training time. * * @param datum * @return a new datum */ public RVFDatum<L, F> scaleDatum(RVFDatum<L, F> datum) { // scale this dataset before scaling the datum if (minValues == null || maxValues == null) scaleFeatures(); Counter<F> scaledFeatures = new ClassicCounter<>(); for (F feature : datum.asFeatures()) { int fID = this.featureIndex.indexOf(feature); if (fID >= 0) { double oldVal = datum.asFeaturesCounter().getCount(feature); double newVal; if (minValues[fID] != maxValues[fID]) newVal = (oldVal - minValues[fID]) / (maxValues[fID] - minValues[fID]); else newVal = oldVal; scaledFeatures.incrementCount(feature, newVal); } } return new RVFDatum<>(scaledFeatures, datum.label()); }
/** * Returns a String representation of this BasicDatum (lists features and labels). */ @Override public String toString() { return "RVFDatum[features=" + asFeatures() + ",label=" + label() + "]"; }
public RVFDatum<L, F> scaleDatumGaussian(RVFDatum<L, F> datum) { // scale this dataset before scaling the datum if (means == null || stdevs == null) scaleFeaturesGaussian(); Counter<F> scaledFeatures = new ClassicCounter<>(); for (F feature : datum.asFeatures()) { int fID = this.featureIndex.indexOf(feature); if (fID >= 0) { double oldVal = datum.asFeaturesCounter().getCount(feature); double newVal; if (stdevs[fID] != 0) newVal = (oldVal - means[fID]) / stdevs[fID]; else newVal = oldVal; scaledFeatures.incrementCount(feature, newVal); } } return new RVFDatum<>(scaledFeatures, datum.label()); }
public RVFDatum<L,F> scaleDatumGaussian(RVFDatum<L,F> datum){ //scale this dataset before scaling the datum if(means == null || stdevs == null) scaleFeaturesGaussian(); Counter<F> scaledFeatures = new ClassicCounter<F>(); for(F feature : datum.asFeatures()){ int fID = this.featureIndex.indexOf(feature); if(fID >= 0){ double oldVal = datum.asFeaturesCounter().getCount(feature); double newVal; if(stdevs[fID] != 0) newVal = (oldVal - means[fID])/stdevs[fID]; else newVal = oldVal; scaledFeatures.incrementCount(feature, newVal); } } return new RVFDatum<L,F>(scaledFeatures, datum.label()); }
public RVFDatum<L, F> scaleDatumGaussian(RVFDatum<L, F> datum) { // scale this dataset before scaling the datum if (means == null || stdevs == null) scaleFeaturesGaussian(); Counter<F> scaledFeatures = new ClassicCounter<F>(); for (F feature : datum.asFeatures()) { int fID = this.featureIndex.indexOf(feature); if (fID >= 0) { double oldVal = datum.asFeaturesCounter().getCount(feature); double newVal; if (stdevs[fID] != 0) newVal = (oldVal - means[fID]) / stdevs[fID]; else newVal = oldVal; scaledFeatures.incrementCount(feature, newVal); } } return new RVFDatum<L, F>(scaledFeatures, datum.label()); }
/** * Scales the values of each feature linearly using the min and max values found in the training set. * NOTE1: Not guaranteed to be between 0 and 1 for a test datum. * NOTE2: Also filters out features from the datum that are not seen at training time. * @param datum * @return a new datum */ public RVFDatum<L,F> scaleDatum(RVFDatum<L,F> datum){ //scale this dataset before scaling the datum if(minValues == null || maxValues == null) scaleFeatures(); Counter<F> scaledFeatures = new ClassicCounter<F>(); for(F feature : datum.asFeatures()){ int fID = this.featureIndex.indexOf(feature); if(fID >= 0){ double oldVal = datum.asFeaturesCounter().getCount(feature); double newVal; if(minValues[fID] != maxValues[fID]) newVal = (oldVal - minValues[fID])/(maxValues[fID] - minValues[fID]); else newVal = oldVal; scaledFeatures.incrementCount(feature, newVal); } } return new RVFDatum<L,F>(scaledFeatures, datum.label()); }
/** * Scales the values of each feature linearly using the min and max values * found in the training set. NOTE1: Not guaranteed to be between 0 and 1 for * a test datum. NOTE2: Also filters out features from the datum that are not * seen at training time. * * @param datum * @return a new datum */ public RVFDatum<L, F> scaleDatum(RVFDatum<L, F> datum) { // scale this dataset before scaling the datum if (minValues == null || maxValues == null) scaleFeatures(); Counter<F> scaledFeatures = new ClassicCounter<>(); for (F feature : datum.asFeatures()) { int fID = this.featureIndex.indexOf(feature); if (fID >= 0) { double oldVal = datum.asFeaturesCounter().getCount(feature); double newVal; if (minValues[fID] != maxValues[fID]) newVal = (oldVal - minValues[fID]) / (maxValues[fID] - minValues[fID]); else newVal = oldVal; scaledFeatures.incrementCount(feature, newVal); } } return new RVFDatum<>(scaledFeatures, datum.label()); }
/** * Scales the values of each feature linearly using the min and max values * found in the training set. NOTE1: Not guaranteed to be between 0 and 1 for * a test datum. NOTE2: Also filters out features from the datum that are not * seen at training time. * * @param datum * @return a new datum */ public RVFDatum<L, F> scaleDatum(RVFDatum<L, F> datum) { // scale this dataset before scaling the datum if (minValues == null || maxValues == null) scaleFeatures(); Counter<F> scaledFeatures = new ClassicCounter<F>(); for (F feature : datum.asFeatures()) { int fID = this.featureIndex.indexOf(feature); if (fID >= 0) { double oldVal = datum.asFeaturesCounter().getCount(feature); double newVal; if (minValues[fID] != maxValues[fID]) newVal = (oldVal - minValues[fID]) / (maxValues[fID] - minValues[fID]); else newVal = oldVal; scaledFeatures.incrementCount(feature, newVal); } } return new RVFDatum<L, F>(scaledFeatures, datum.label()); }