public InfoGain (InstanceList ilist) { super (ilist.getDataAlphabet(), calcInfoGains (ilist)); baseEntropy = staticBaseEntropy; baseLabelDistribution = staticBaseLabelDistribution; }
public void setPerLabelFeatureSelection (FeatureSelection[] selectedFeatures) { if (selectedFeatures != null) { for (int i = 0; i < selectedFeatures.length; i++) if (selectedFeatures[i].getAlphabet() != getDataAlphabet()) throw new IllegalArgumentException ("Vocabularies do not match"); } perLabelFeatureSelection = selectedFeatures; }
public FeatureCountTool (InstanceList instances) { this.instances = instances; numFeatures = instances.getDataAlphabet().size(); featureCounts = new double[numFeatures]; documentFrequencies = new int[numFeatures]; }
public InfoGain (InstanceList ilist) { super (ilist.getDataAlphabet(), calcInfoGains (ilist)); baseEntropy = staticBaseEntropy; baseLabelDistribution = staticBaseLabelDistribution; }
public void printParameters(String filename) throws IOException { Alphabet alphabet = data.getDataAlphabet(); PrintWriter out = new PrintWriter(filename); for (int feature = 0; feature < alphabet.size(); feature++) { out.printf("%s\t%.8f\n", alphabet.lookupObject(feature), coefficients[feature]); } out.close(); }
public void printParameters(String filename) throws IOException { Alphabet alphabet = data.getDataAlphabet(); PrintWriter out = new PrintWriter(filename); for (int feature = 0; feature < alphabet.size(); feature++) { out.printf("%s\t%.8f\n", alphabet.lookupObject(feature), coefficients[feature]); } out.close(); }
public void printParameters(String filename) throws IOException { Alphabet alphabet = data.getDataAlphabet(); PrintWriter out = new PrintWriter(filename); for (int feature = 0; feature < alphabet.size(); feature++) { out.printf("%s\t%.8f\n", alphabet.lookupObject(feature), coefficients[feature]); } out.close(); }
/** * Returns a ranked feature vector of the gradient gain for the given training data on the given trained CRF. The * instance list must have the target labels as LabelSequence */ public static RankedFeatureVector gradientGainFrom(InstanceList ilist, CRF crf) { int numFeatures = ilist.getDataAlphabet().size(); double[] gradientgains = new double[numFeatures]; fillResults(ilist, crf, gradientgains, null, null); return new RankedFeatureVector(ilist.getDataAlphabet(), gradientgains); }
public void setFeatureSelection (FeatureSelection selectedFeatures) { if (selectedFeatures != null && selectedFeatures.getAlphabet() != null // xxx We allow a null vocabulary here? See CRF3.java && selectedFeatures.getAlphabet() != getDataAlphabet()) throw new IllegalArgumentException ("Vocabularies do not match"); featureSelection = selectedFeatures; }
public ArrayList getInstancesWithFeature (Object feature) { int index = ilist.getDataAlphabet().lookupIndex (feature, false); if (index == -1) throw new IllegalArgumentException ("Feature "+feature+" not contained in InvertedIndex"); return getInstancesWithFeature (index); }
public void setFeatureSelection (FeatureSelection selectedFeatures) { if (selectedFeatures != null && selectedFeatures.getAlphabet() != null // xxx We allow a null vocabulary here? See CRF3.java && selectedFeatures.getAlphabet() != getDataAlphabet()) throw new IllegalArgumentException ("Vocabularies do not match"); featureSelection = selectedFeatures; }
public FeatureCooccurrenceCounter (InstanceList instances) { this.instances = instances; numFeatures = instances.getDataAlphabet().size(); featureFeatureCounts = new TIntIntHashMap[numFeatures]; for (int feature = 0; feature < numFeatures; feature++) { featureFeatureCounts[feature] = new TIntIntHashMap(); } documentFrequencies = new int[numFeatures]; }
public FeatureCooccurrenceCounter (InstanceList instances) { this.instances = instances; numFeatures = instances.getDataAlphabet().size(); featureFeatureCounts = new TIntIntHashMap[numFeatures]; for (int feature = 0; feature < numFeatures; feature++) { featureFeatureCounts[feature] = new TIntIntHashMap(); } documentFrequencies = new int[numFeatures]; }
public KLGain (InstanceList ilist, Classification[] classifications) { super (ilist.getDataAlphabet(), calcKLGains (ilist, getLabelVectorsFromClassifications(classifications))); }
public GradientGain (InstanceList ilist, Classification[] classifications) { super (ilist.getDataAlphabet(), calcGradientGains (ilist, getLabelVectorsFromClassifications(classifications))); }
public KLGain (InstanceList ilist, Classification[] classifications) { super (ilist.getDataAlphabet(), calcKLGains (ilist, getLabelVectorsFromClassifications(classifications))); }
/** Replaces the <code>Instance</code> at position <code>index</code> * with a new one. */ public void setInstance (int index, Instance instance) { assert (this.getDataAlphabet().equals(instance.getDataAlphabet())); assert (this.getTargetAlphabet().equals(instance.getTargetAlphabet())); this.set(index, instance); }
public Alphabet getPrunedAlphabet(int minDocs, int maxDocs, int minCount, int maxCount) { Alphabet inputAlphabet = instances.getDataAlphabet(); Alphabet outputAlphabet = new Alphabet(); for (int inputType = 0; inputType < numFeatures; inputType++) { if (featureCounts[inputType] >= minCount && featureCounts[inputType] <= maxCount && documentFrequencies[inputType] >= minDocs && documentFrequencies[inputType] <= maxDocs) { outputAlphabet.lookupIndex(inputAlphabet.lookupObject(inputType)); } } return outputAlphabet; }
public PerLabelInfoGain (InstanceList ilist) { double[][] pcig = calcPerLabelInfoGains (ilist); Alphabet v = ilist.getDataAlphabet(); int numClasses = ilist.getTargetAlphabet().size(); ig = new InfoGain[numClasses]; for (int i = 0; i < numClasses; i++) ig[i] = new InfoGain (v, pcig[i]); }
public Node (InstanceList ilist, Node parent, FeatureSelection fs) { InfoGain ig = new InfoGain (ilist); this.featureIndex = ig.getMaxValuedIndexIn (fs); this.infoGain = ig.value(featureIndex); this.ilist = ilist; this.dictionary = ilist.getDataAlphabet(); this.parent = parent; this.labeling = ig.getBaseLabelDistribution(); this.labelEntropy = ig.getBaseEntropy(); this.child0 = this.child1 = null; }