public RankedFeatureVector newRankedFeatureVector (InstanceList ilist) { assert (ilist.getTargetAlphabet() == classifications[0].getAlphabet()); return new ExpGain (ilist, classifications, gaussianPriorVariance); }
public LabelVector targetLabelDistribution () { if (instances.size() == 0) return null; if (!(getInstance(0).getTarget() instanceof Labeling)) throw new IllegalStateException ("Target is not a labeling."); double[] counts = new double[getTargetAlphabet().size()]; for (int i = 0; i < instances.size(); i++) { Instance instance = getInstance(i); Labeling l = (Labeling) instance.getTarget(); l.addTo (counts, getInstanceWeight(i)); } return new LabelVector ((LabelAlphabet)getTargetAlphabet(), counts); }
/** * Calculates the minimum description length of this node, i.e., * the length of the binary encoding that describes the feature * and the split value used at this node */ public double getMDL() { int numClasses = m_ilist.getTargetAlphabet().size(); double mdl = getSize() * getGainRatio().getBaseEntropy(); mdl += ((numClasses-1) * Math.log(getSize() / 2.0)) / (2 * GainRatio.log2); double piPow = Math.pow(Math.PI, numClasses/2.0); double gammaVal = Maths.gamma(numClasses/2.0); mdl += Math.log(piPow/gammaVal) / GainRatio.log2; return mdl; }
public PerLabelFeatureCounts (InstanceList ilist) { dataAlphabet = ilist.getDataAlphabet(); targetAlphabet = ilist.getTargetAlphabet(); double[][] counts = calcFeatureCounts (ilist); fc = new FeatureCounts[targetAlphabet.size()]; for (int i = 0; i < fc.length; i++) fc[i] = new FeatureCounts (dataAlphabet, counts[i]); }
public PerLabelInfoGain (InstanceList ilist) { double[][] pcig = calcPerLabelInfoGains (ilist); Alphabet v = ilist.getDataAlphabet(); int numClasses = ilist.getTargetAlphabet().size(); ig = new InfoGain[numClasses]; for (int i = 0; i < numClasses; i++) ig[i] = new InfoGain (v, pcig[i]); }
Alphabet la = ilist.getTargetAlphabet (); for (int i = 0; i < la.size(); i++) System.out.println (la.lookupObject (i));
int numClasses = ilist.getTargetAlphabet().size(); int numFeatures = ilist.getDataAlphabet().size(); double[] infogains = new double[numFeatures]; staticBaseLabelDistribution = new LabelVector ((LabelAlphabet)ilist.getTargetAlphabet(), targetCount); return infogains; staticBaseEntropy -= p * Math.log(p) / log2; staticBaseLabelDistribution = new LabelVector ((LabelAlphabet)ilist.getTargetAlphabet(), classDistribution);
System.out.println("target alphabet size " + instList.getTargetAlphabet().size()); InstanceList instList2 = new InstanceList(instPipe); instList2.add(new System.out.println("target alphabet size " + instList2.getTargetAlphabet().size());
public void induceFeatures (InstanceList ilist, boolean withFeatureShrinkage, boolean inducePerClassFeatures) { if (inducePerClassFeatures) { int numClasses = ilist.getTargetAlphabet().size(); // int numFeatures = ilist.getDataAlphabet().size(); FeatureSelection[] pcfs = new FeatureSelection[numClasses]; for (int j = 0; j < numClasses; j++) pcfs[j] = (FeatureSelection) ilist.getPerLabelFeatureSelection()[j].clone(); for (int i = 0; i < ilist.size(); i++) { Object data = ilist.getInstance(i).getData(); AugmentableFeatureVector afv = (AugmentableFeatureVector) data; root.induceFeatures (afv, null, pcfs, ilist.getFeatureSelection(), ilist.getPerLabelFeatureSelection(), withFeatureShrinkage, inducePerClassFeatures, addFeaturesClassEntropyThreshold); } } else { throw new UnsupportedOperationException ("Not yet implemented"); } }
System.out.println("target alphabet size " + instList.getTargetAlphabet().size()); InstanceList instList2 = new InstanceList(instPipe); instList2.add(new System.out.println("target alphabet size " + instList2.getTargetAlphabet().size());
private static double[] calcGradientGains (InstanceList ilist, LabelVector[] classifications) int numClasses = ilist.getTargetAlphabet().size(); int numFeatures = ilist.getDataAlphabet().size(); double[] gradientgains = new double[numFeatures]; assert (classifications[i].getLabelAlphabet() == ilist.getTargetAlphabet()); Instance inst = ilist.getInstance(i); Labeling labeling = inst.getLabeling ();
System.out.println("target alphabet size " + instList.getTargetAlphabet().size()); InstanceList instList2 = new InstanceList(instPipe); instList2.add(new System.out.println("target alphabet size " + instList2.getTargetAlphabet().size());
int numClasses = ilist.getTargetAlphabet().size(); int numFeatures = ilist.getDataAlphabet().size(); Alphabet dataAlphabet = ilist.getDataAlphabet();
LabelAlphabet ld = (LabelAlphabet) ilist.getTargetAlphabet();
LabelAlphabet ld = (LabelAlphabet) ilist.getTargetAlphabet();
private static double[] calcFeatureCounts (InstanceList ilist) { int numInstances = ilist.size(); int numClasses = ilist.getTargetAlphabet().size(); int numFeatures = ilist.getDataAlphabet().size(); double[] counts = new double[numFeatures]; double count; for (int i = 0; i < ilist.size(); i++) { Instance inst = ilist.getInstance(i); if (!(inst.getData() instanceof FeatureVector)) throw new IllegalArgumentException ("Currently only handles FeatureVector data"); FeatureVector fv = (FeatureVector) inst.getData (); if (ilist.getInstanceWeight(i) == 0) continue; for (int j = 0; j < fv.numLocations(); j++) { if (countInstances) counts[fv.indexAtLocation(j)] += 1; else counts[fv.indexAtLocation(j)] += fv.valueAtLocation(j); } } return counts; }
private static double[][] calcFeatureCounts (InstanceList ilist) { int numClasses = ilist.getTargetAlphabet().size(); int numFeatures = ilist.getDataAlphabet().size(); double[][] featureCounts = new double[numClasses][numFeatures]; // Count features across all classes for (int i = 0; i < ilist.size(); i++) { Instance inst = ilist.getInstance(i); if (!(inst.getData() instanceof FeatureVector)) throw new IllegalArgumentException ("Currently only handles FeatureVector data"); FeatureVector fv = (FeatureVector) inst.getData (); // xxx Note that this ignores uncertain-labels. int labelIndex = inst.getLabeling ().getBestIndex(); int fli; for (int fl = 0; fl < fv.numLocations(); fl++) { fli = fv.indexAtLocation(fl); if (countInstances) featureCounts[labelIndex][fli]++; else featureCounts[labelIndex][fli] += fv.valueAtLocation(fl); } } return featureCounts; }
int numLabels = trainingList.getTargetAlphabet().size(); int numFeats = dict.size(); m_weights = new double [numLabels][numFeats+1];
trainingList.getTargetAlphabet().stopGrowth(); Pipe dataPipe = trainingList.getPipe (); Alphabet dict = (Alphabet) trainingList.getDataAlphabet (); int numLabels = trainingList.getTargetAlphabet().size(); int numFeats = dict.size(); this.theta = numFeats * this.nfactor;
public void testStringTrained () { String[] africaTraining = new String[] { "on the plains of africa the lions roar", "in swahili ngoma means to dance", "nelson mandela became president of south africa", "the saraha dessert is expanding"}; String[] asiaTraining = new String[] { "panda bears eat bamboo", "china's one child policy has resulted in a surplus of boys", "tigers live in the jungle"}; InstanceList instances = new InstanceList ( new SerialPipes (new Pipe[] { new Target2Label (), new CharSequence2TokenSequence (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector ()})); instances.add (new ArrayIterator (africaTraining, "africa")); instances.add (new ArrayIterator (asiaTraining, "asia")); Classifier c = new NaiveBayesTrainer ().train (instances); Classification cf = c.classify ("nelson mandela never eats lions"); assertTrue (cf.getLabeling().getBestLabel() == ((LabelAlphabet)instances.getTargetAlphabet()).lookupLabel("africa")); }