public int getCountWithFeature (Object feature) { int index = ilist.getDataAlphabet().lookupIndex (feature, false); if (index == -1) throw new IllegalArgumentException ("Feature "+feature+" not contained in InvertedIndex"); ArrayList a = ii[index]; return a == null ? 0 : a.size(); }
public InfoGain (InstanceList ilist) { super (ilist.getDataAlphabet(), calcInfoGains (ilist)); baseEntropy = staticBaseEntropy; baseLabelDistribution = staticBaseLabelDistribution; }
public void setPerLabelFeatureSelection (FeatureSelection[] selectedFeatures) { if (selectedFeatures != null) { for (int i = 0; i < selectedFeatures.length; i++) if (selectedFeatures[i].getAlphabet() != getDataAlphabet()) throw new IllegalArgumentException ("Vocabularies do not match"); } perLabelFeatureSelection = selectedFeatures; }
public KLGain (InstanceList ilist, LabelVector[] classifications) { super (ilist.getDataAlphabet(), calcKLGains (ilist, classifications)); }
public GradientGain (InstanceList ilist, LabelVector[] classifications) { super (ilist.getDataAlphabet(), calcGradientGains (ilist, classifications)); }
public ExpGain (InstanceList ilist, LabelVector[] classifications, double gaussianPriorVariance) { super (ilist.getDataAlphabet(), calcExpGains (ilist, classifications, gaussianPriorVariance)); }
public FeatureCounts (InstanceList ilist) { super (ilist.getDataAlphabet(), calcFeatureCounts (ilist)); }
public void setFeatureSelection (FeatureSelection selectedFeatures) { if (selectedFeatures != null && selectedFeatures.getAlphabet() != null // xxx We allow a null vocabulary here? See CRF3.java && selectedFeatures.getAlphabet() != getDataAlphabet()) throw new IllegalArgumentException ("Vocabularies do not match"); featureSelection = selectedFeatures; }
public ArrayList getInstancesWithFeature (Object feature) { int index = ilist.getDataAlphabet().lookupIndex (feature, false); if (index == -1) throw new IllegalArgumentException ("Feature "+feature+" not contained in InvertedIndex"); return getInstancesWithFeature (index); }
public GradientGain (InstanceList ilist, Classification[] classifications) { super (ilist.getDataAlphabet(), calcGradientGains (ilist, getLabelVectorsFromClassifications(classifications))); }
public KLGain (InstanceList ilist, Classification[] classifications) { super (ilist.getDataAlphabet(), calcKLGains (ilist, getLabelVectorsFromClassifications(classifications))); }
public ExpGain (InstanceList ilist, Classification[] classifications, double gaussianPriorVariance) { super (ilist.getDataAlphabet(), calcExpGains (ilist, getLabelVectorsFromClassifications(classifications), gaussianPriorVariance)); }
public MaxEnt trainClassifier (InstanceList ilist) { // just to plain MaxEnt training for now System.out.println("Training NOW: "); MaxEnt me = (MaxEnt)(new MaxEntTrainer().train (ilist, null, null, null, null)); Alphabet alpha = ilist.getDataAlphabet(); alpha.stopGrowth(); // hack to prevent alphabet from growing Trial t = new Trial(me, ilist); System.out.println("CorefClusterAdv -> Training F1 on \"yes\" is: " + t.labelF1("yes")); //me.write(new File("/tmp/MaxEnt_Output")); return me; }
public void selectFeaturesForAllLabels (InstanceList ilist, InstanceList validationList) { RankedFeatureVector ranking = ranker.newRankedFeatureVector (ilist); FeatureSelection fs = new FeatureSelection (ilist.getDataAlphabet()); int nf = Math.min (numFeatures, ranking.singleSize()); for (int i = 0; i < nf; i++) { logger.info ("adding feature "+i+" word="+ilist.getDataAlphabet().lookupObject(ranking.getIndexAtRank(i))); fs.add (ranking.getIndexAtRank(i)); } ilist.setPerLabelFeatureSelection (null); ilist.setFeatureSelection (fs); }
public Classifier trainPairwiseClassifier (ArrayList[] nodes, Pipe p) { InstanceList ilist = new InstanceList (p); for (int i=0; i < nodes.length; i++) ilist.add (CitationUtils.makePairs (p, nodes[i])); System.err.println ("Training size: " + ilist.size() + "\tNum features: " + ilist.getDataAlphabet().size()); MaxEnt me = (MaxEnt)(new MaxEntTrainer().train(ilist, null, null, null, null)); ilist.getDataAlphabet().stopGrowth(); Trial t = new Trial(me, ilist); System.out.println("Pairwise classifier: -> Training F1 on \"yes\" is: " + t.labelF1("yes")); System.out.println("Pairwise classifier: -> Training F1 on \"no\" is: " + t.labelF1("no")); return me; }
public PerLabelFeatureCounts (InstanceList ilist) { dataAlphabet = ilist.getDataAlphabet(); targetAlphabet = ilist.getTargetAlphabet(); double[][] counts = calcFeatureCounts (ilist); fc = new FeatureCounts[targetAlphabet.size()]; for (int i = 0; i < fc.length; i++) fc[i] = new FeatureCounts (dataAlphabet, counts[i]); }
public PerLabelInfoGain (InstanceList ilist) { double[][] pcig = calcPerLabelInfoGains (ilist); Alphabet v = ilist.getDataAlphabet(); int numClasses = ilist.getTargetAlphabet().size(); ig = new InfoGain[numClasses]; for (int i = 0; i < numClasses; i++) ig[i] = new InfoGain (v, pcig[i]); }
public Node (InstanceList ilist, Node parent, FeatureSelection fs) { InfoGain ig = new InfoGain (ilist); this.featureIndex = ig.getMaxValuedIndexIn (fs); this.infoGain = ig.value(featureIndex); this.ilist = ilist; this.dictionary = ilist.getDataAlphabet(); this.parent = parent; this.labeling = ig.getBaseLabelDistribution(); this.labelEntropy = ig.getBaseEntropy(); this.child0 = this.child1 = null; }
private static Classifier trainPairwiseClassifier (ArrayList[] nodes, Pipe p) { InstanceList ilist = new InstanceList (p); for (int i=0; i < nodes.length; i++) ilist.add (CitationUtils.makePairs (p, nodes[i])); MaxEnt me = (MaxEnt)(new MaxEntTrainer().train(ilist, null, null, null, null)); ilist.getDataAlphabet().stopGrowth(); Trial t = new Trial(me, ilist); System.out.println("Pairwise classifier: -> Training F1 on \"yes\" is: " + t.labelF1("yes")); return me; }
private static Classifier trainPairwiseClassifier (ArrayList[] nodes, Pipe p) { InstanceList ilist = new InstanceList (p); for (int i=0; i < nodes.length; i++) ilist.add (CitationUtils.makePairs (p, nodes[i])); MaxEnt me = (MaxEnt)(new MaxEntTrainer().train(ilist, null, null, null, null)); ilist.getDataAlphabet().stopGrowth(); Trial t = new Trial(me, ilist); System.out.println("Pairwise classifier: -> Training F1 on \"yes\" is: " + t.labelF1("yes")); return me; }