public TokenClassifiers(ClassifierTrainer trainer, InstanceList trainList, int randSeed, int numCV) { super(trainList.getPipe()); m_trainer = trainer; m_randSeed = randSeed; m_numCV = numCV; m_table = new HashMap(); doTraining(trainList); }
public InstanceList getInstances() { InstanceList ret = new InstanceList(m_ilist.getPipe()); for (int ii = 0; ii < m_instIndices.length; ii++) ret.add(m_ilist.getInstance(m_instIndices[ii])); return ret; }
public MaxEnt trainClassifier (InstanceList ilist, String correct, String incorrect) { this.meClassifier = (MaxEnt) meTrainer.train (ilist); this.pipe = ilist.getPipe (); this.correct = correct; this.incorrect = incorrect; InfoGain ig = new InfoGain (ilist); int igl = Math.min (30, ig.numLocations()); for (int i = 0; i < igl; i++) System.out.println ("InfoGain["+ig.getObjectAtRank(i)+"]="+ig.getValueAtRank(i)); return this.meClassifier; }
public InstanceList subList (int start, int end) { InstanceList other = new InstanceList (getPipe ()); for (int i = start; i < end; i++) { other.add (getInstance (i)); } return other; }
public Classifier train (InstanceList trainingList, InstanceList validationList, InstanceList testSet, ClassifierEvaluating evaluator, Classifier initialClassifier) { Classifier[] classifiers = new Classifier[numBags]; java.util.Random r = new java.util.Random (); for (int round = 0; round < numBags; round++) { InstanceList bag = trainingList.sampleWithReplacement (r, trainingList.size()); classifiers[round] = underlyingTrainer.train (bag, validationList, testSet, evaluator, initialClassifier); } return new BaggingClassifier (trainingList.getPipe(), classifiers); }
/** Train underlying classifier on <code>ilist</code>. Assumes ilist has targst <code>correct</code> or <code>incorrect</code>. @param ilist training list to build correct/incorrect classifier @param correct "correct" label @param incorrect "incorrect" label */ public MaxEnt trainClassifier (InstanceList ilist, String correct, String incorrect) { this.meClassifier = (MaxEnt) meTrainer.train (ilist); this.pipe = ilist.getPipe (); this.correct = correct; this.incorrect = incorrect; InfoGain ig = new InfoGain (ilist); int igl = Math.min (30, ig.numLocations()); for (int i = 0; i < igl; i++) System.out.println ("InfoGain["+ig.getObjectAtRank(i)+"]="+ig.getValueAtRank(i)); return this.meClassifier; }
/** Return an list of instances with a particular label. */ public InstanceList getCluster(int label) { InstanceList cluster = new InstanceList(instances.getPipe() ); for (int n=0 ; n<instances.size() ; n++ ) { if ( labels[n] == label) cluster.add( instances.getInstance(n) ); } return cluster; }
public Classifier train (InstanceList trainingList, InstanceList validationList, InstanceList testSet, ClassifierEvaluating evaluator, Classifier initialClassifier) { FeatureSelection selectedFeatures = trainingList.getFeatureSelection(); DecisionTree.Node root = new DecisionTree.Node (trainingList, null, selectedFeatures); splitTree (root, selectedFeatures, 0); root.stopGrowth(); System.out.println ("DecisionTree learned:"); root.print(); return new DecisionTree (trainingList.getPipe(), root); }
private double testRandomTrainedOn (InstanceList training) { ClassifierTrainer trainer = new MaxEntTrainer (); Alphabet fd = dictOfSize (3); String[] classNames = new String[] {"class0", "class1", "class2"}; Random r = new Random (1); PipeInputIterator iter = new RandomTokenSequenceIterator (r, new Dirichlet(fd, 2.0), 30, 0, 10, 200, classNames); training.add (iter); InstanceList testing = new InstanceList (training.getPipe ()); testing.add (new RandomTokenSequenceIterator (r, new Dirichlet(fd, 2.0), 30, 0, 10, 200, classNames)); System.out.println ("Training set size = "+training.size()); System.out.println ("Testing set size = "+testing.size()); Classifier classifier = trainer.train (training); System.out.println ("Accuracy on training set:"); System.out.println (classifier.getClass().getName() + ": " + new Trial (classifier, training).accuracy()); System.out.println ("Accuracy on testing set:"); double testAcc = new Trial (classifier, testing).accuracy(); System.out.println (classifier.getClass().getName() + ": " + testAcc); return testAcc; }
public Classifier train (InstanceList trainingList, InstanceList validationList, InstanceList testSet, ClassifierEvaluating evaluator, Classifier initialClassifier) { FeatureSelection selectedFeatures = trainingList.getFeatureSelection(); if (selectedFeatures != null) // xxx Attend to FeatureSelection!!! throw new UnsupportedOperationException ("FeatureSelection not yet implemented."); C45.Node root = new C45.Node(trainingList, null, m_minNumInsts); splitTree(root, 0); C45 tree = new C45 (trainingList.getPipe(), root); logger.info("C45 learned: (size=" + tree.getSize() + ")\n"); tree.print(); if (m_doPruning) { tree.prune(); logger.info("\nPruned C45: (size=" + tree.getSize() + ")\n"); root.print(); } root.stopGrowth(); return tree; }
public void split (FeatureSelection fs) { if (ilist == null) throw new IllegalStateException ("Frozen. Cannot split."); InstanceList ilist0 = new InstanceList (ilist.getPipe()); InstanceList ilist1 = new InstanceList (ilist.getPipe()); for (int i = 0; i < ilist.size(); i++) { Instance instance = ilist.getInstance(i); FeatureVector fv = (FeatureVector) instance.getData (); // xxx What test should this be? What to do with negative values? // Whatever is decided here should also go in InfoGain.calcInfoGains() if (fv.value (featureIndex) != 0) { //System.out.println ("list1 add "+instance.getUri()+" weight="+ilist.getInstanceWeight(i)); ilist1.add (instance, ilist.getInstanceWeight(i)); } else { //System.out.println ("list0 add "+instance.getUri()+" weight="+ilist.getInstanceWeight(i)); ilist0.add (instance, ilist.getInstanceWeight(i)); } } logger.info("child0="+ilist0.size()+" child1="+ilist1.size()); child0 = new Node (ilist0, this, fs); child1 = new Node (ilist1, this, fs); }
+ ": " + new Trial (classifiers[i], training).accuracy()); InstanceList testing = new InstanceList (training.getPipe ()); PipeInputIterator iter = new RandomTokenSequenceIterator ( r, new Dirichlet (fd, 2.0),
Pattern allowedPat = Pattern.compile(allowed); if (crf == null) { crf = new CRF4(training.getPipe(), null); String startName = crf.addOrderNStates(training, orders, null,
this.perLabelFeatureSelection = theClassifier.perClassFeatureSelection; this.defaultFeatureIndex = theClassifier.defaultFeatureIndex; assert (initialClassifier.getInstancePipe() == ilist.getPipe()); this.theClassifier = new MCMaxEnt (ilist.getPipe(), parameters, featureSelection, perLabelFeatureSelection);
this.perLabelFeatureSelection = theClassifier.perClassFeatureSelection; this.defaultFeatureIndex = theClassifier.defaultFeatureIndex; assert (initialClassifier.getInstancePipe() == ilist.getPipe()); this.theClassifier = new MaxEnt (ilist.getPipe(), parameters, featureSelection, perLabelFeatureSelection);
public ConditionalClusterer train (AbstractPipeInputIterator instanceIterator, boolean useFeatureInduction) { InstanceList trainingList = new InstanceList (p); trainingList.add (instanceIterator); System.err.println ("Training on " + trainingList.size() + " instances with distribution " + trainingList.targetLabelDistribution() + " and " + trainingList.getPipe().getDataAlphabet().size() + " features");; InfoGain ig = new InfoGain (trainingList); for (int i=0; i < ig.numLocations(); i++) System.err.println ("InfoGain["+ig.getObjectAtRank(i)+"]="+ig.getValueAtRank(i)); if (useFeatureInduction) { System.err.println ("Beginning Feature Induction"); RankedFeatureVector.Factory gainFactory = new InfoGain.Factory(); FeatureInducer fi = new FeatureInducer (gainFactory, trainingList, 20); fi.induceFeaturesFor(trainingList, false, false); } classifier = classifierTrainer.train (trainingList); classifier.getInstancePipe().getDataAlphabet().stopGrowth(); return new ConditionalClusterer(p, classifier, threshold); }
return new BalancedWinnow (trainingList.getPipe(), m_weights);
Pipe dataPipe = trainingList.getPipe (); Alphabet dict = (Alphabet) trainingList.getDataAlphabet (); int numLabels = trainingList.getTargetAlphabet().size();