public NaiveBayesTrainer newClassifierTrainer(Classifier initialClassifier) { return new NaiveBayesTrainer ((NaiveBayes)initialClassifier); } public NaiveBayesTrainer.Factory setDocLengthNormalization (double docLengthNormalization) {
public NaiveBayes trainIncremental (InstanceList trainingInstancesToAdd) { // Initialize and check instance variables as necessary... setup(trainingInstancesToAdd, null); // Incrementally add the counts of this new training data for (Instance instance : trainingInstancesToAdd) incorporateOneInstance(instance, trainingInstancesToAdd.getInstanceWeight(instance)); // Estimate multinomials, and return a new naive Bayes classifier. // Note that, unlike MaxEnt, NaiveBayes is immutable, so we create a new one each time. classifier = new NaiveBayes (instancePipe, pe.estimate(), estimateFeatureMultinomials()); return classifier; }
/** * Create a NaiveBayes classifier from a set of training data. * The trainer uses counts of each feature in an instance's feature vector * to provide an estimate of p(Labeling| feature). The internal state * of the trainer is thrown away ( by a call to reset() ) when train() returns. Each * call to train() is completely independent of any other. * @param trainingList The InstanceList to be used to train the classifier. * Within each instance the data slot is an instance of FeatureVector and the * target slot is an instance of Labeling * @param validationList Currently unused * @param testSet Currently unused * @param evaluator Currently unused * @param initialClassifier Currently unused * @return The NaiveBayes classifier as trained on the trainingList */ public NaiveBayes train (InstanceList trainingList) { // Forget all the previous sufficient statistics counts; me = null; pe = null; // Train a new classifier based on this data this.classifier = trainIncremental (trainingList); return classifier; }
public ClassifierTrainer<NaiveBayes> createTrainer(String... args) { NaiveBayesTrainer trainer = new NaiveBayesTrainer(); if (args != null) { if (args.length % 2 != 0) { throw new IllegalArgumentException("each argument must be supplied with a value: " + getUsageMessage()); } for (int i = 0; i < args.length; i += 2) { String optionName = args[i]; String optionValue = args[i + 1]; if (optionName.equals("--docLengthNormalization")) trainer.setDocLengthNormalization(Double.parseDouble(optionValue)); else throw new IllegalArgumentException(String.format( "the argument %1$s is invalid.", optionName) + getUsageMessage()); } } return trainer; }
NaiveBayesTrainer trainer = new NaiveBayesTrainer(); NaiveBayes classifier = trainer.trainIncremental(instList); System.out.println("target alphabet size " + instList2.getTargetAlphabet().size()); NaiveBayes classifier2 = (NaiveBayes) trainer.trainIncremental(instList2);
public void testRandomTrained () { InstanceList ilist = new InstanceList (new Randoms(1), 10, 2); Classifier c = new NaiveBayesTrainer ().train (ilist); // test on the training data int numCorrect = 0; for (int i = 0; i < ilist.size(); i++) { Instance inst = ilist.get(i); Classification cf = c.classify (inst); cf.print (); if (cf.getLabeling().getBestLabel() == inst.getLabeling().getBestLabel()) numCorrect++; } System.out.println ("Accuracy on training set = " + ((double)numCorrect)/ilist.size()); }
NaiveBayes c = (NaiveBayes) nbTrainer.newClassifierTrainer().train (trainingSet); double prevLogLikelihood = 0, logLikelihood = 0; boolean converged = false; c = (NaiveBayes) nbTrainer.newClassifierTrainer().train (trainingSet2); logLikelihood = c.dataLogLikelihood (trainingSet2); System.err.println ("Loglikelihood = "+logLikelihood);
NaiveBayesTrainer trainer = new NaiveBayesTrainer(); NaiveBayes classifier = trainer.trainIncremental(instList); System.out.println("target alphabet size " + instList2.getTargetAlphabet().size()); NaiveBayes classifier2 = (NaiveBayes) trainer.trainIncremental(instList2);
public void testRandomTrained () { InstanceList ilist = new InstanceList (new Randoms(1), 10, 2); Classifier c = new NaiveBayesTrainer ().train (ilist); // test on the training data int numCorrect = 0; for (int i = 0; i < ilist.size(); i++) { Instance inst = ilist.get(i); Classification cf = c.classify (inst); cf.print (); if (cf.getLabeling().getBestLabel() == inst.getLabeling().getBestLabel()) numCorrect++; } System.out.println ("Accuracy on training set = " + ((double)numCorrect)/ilist.size()); }
public ClassifierTrainer<NaiveBayes> createTrainer(String... args) { NaiveBayesTrainer trainer = new NaiveBayesTrainer(); if (args != null) { if (args.length % 2 != 0) { throw new IllegalArgumentException("each argument must be supplied with a value: " + getUsageMessage()); } for (int i = 0; i < args.length; i += 2) { String optionName = args[i]; String optionValue = args[i + 1]; if (optionName.equals("--docLengthNormalization")) trainer.setDocLengthNormalization(Double.parseDouble(optionValue)); else throw new IllegalArgumentException(String.format( "the argument %1$s is invalid.", optionName) + getUsageMessage()); } } return trainer; }
NaiveBayes c = (NaiveBayes) nbTrainer.newClassifierTrainer().train (trainingSet); double prevLogLikelihood = 0, logLikelihood = 0; boolean converged = false; c = (NaiveBayes) nbTrainer.newClassifierTrainer().train (trainingSet2); logLikelihood = c.dataLogLikelihood (trainingSet2); System.err.println ("Loglikelihood = "+logLikelihood);
public NaiveBayes trainIncremental (InstanceList trainingInstancesToAdd) { // Initialize and check instance variables as necessary... setup(trainingInstancesToAdd, null); // Incrementally add the counts of this new training data for (Instance instance : trainingInstancesToAdd) incorporateOneInstance(instance, trainingInstancesToAdd.getInstanceWeight(instance)); // Estimate multinomials, and return a new naive Bayes classifier. // Note that, unlike MaxEnt, NaiveBayes is immutable, so we create a new one each time. classifier = new NaiveBayes (instancePipe, pe.estimate(), estimateFeatureMultinomials()); return classifier; }
NaiveBayesTrainer trainer = new NaiveBayesTrainer(); NaiveBayes classifier = (NaiveBayes) trainer.trainIncremental(instList); System.out.println("target alphabet size " + instList2.getTargetAlphabet().size()); NaiveBayes classifier2 = (NaiveBayes) trainer.trainIncremental(instList2);
public void testStringTrained () { String[] africaTraining = new String[] { "on the plains of africa the lions roar", "in swahili ngoma means to dance", "nelson mandela became president of south africa", "the saraha dessert is expanding"}; String[] asiaTraining = new String[] { "panda bears eat bamboo", "china's one child policy has resulted in a surplus of boys", "tigers live in the jungle"}; InstanceList instances = new InstanceList ( new SerialPipes (new Pipe[] { new Target2Label (), new CharSequence2TokenSequence (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector ()})); instances.addThruPipe (new ArrayIterator (africaTraining, "africa")); instances.addThruPipe (new ArrayIterator (asiaTraining, "asia")); Classifier c = new NaiveBayesTrainer ().train (instances); Classification cf = c.classify ("nelson mandela never eats lions"); assertTrue (cf.getLabeling().getBestLabel() == ((LabelAlphabet)instances.getTargetAlphabet()).lookupLabel("africa")); }
public NaiveBayesTrainer newClassifierTrainer(Classifier initialClassifier) { return new NaiveBayesTrainer ((NaiveBayes)initialClassifier); } public NaiveBayesTrainer.Factory setDocLengthNormalization (double docLengthNormalization) {
/** * Create a NaiveBayes classifier from a set of training data. * The trainer uses counts of each feature in an instance's feature vector * to provide an estimate of p(Labeling| feature). The internal state * of the trainer is thrown away ( by a call to reset() ) when train() returns. Each * call to train() is completely independent of any other. * @param trainingList The InstanceList to be used to train the classifier. * Within each instance the data slot is an instance of FeatureVector and the * target slot is an instance of Labeling * @param validationList Currently unused * @param testSet Currently unused * @param evaluator Currently unused * @param initialClassifier Currently unused * @return The NaiveBayes classifier as trained on the trainingList */ public NaiveBayes train (InstanceList trainingList) { // Forget all the previous sufficient statistics counts; me = null; pe = null; // Train a new classifier based on this data this.classifier = trainIncremental (trainingList); return classifier; }
NaiveBayes c = (NaiveBayes) nbTrainer.newClassifierTrainer().train (trainingSet); double prevLogLikelihood = 0, logLikelihood = 0; boolean converged = false; c = (NaiveBayes) nbTrainer.newClassifierTrainer().train (trainingSet2); logLikelihood = c.dataLogLikelihood (trainingSet2); System.err.println ("Loglikelihood = "+logLikelihood);
public NaiveBayes trainIncremental (Instance instance) { setup (null, instance); // Incrementally add the counts of this new training instance incorporateOneInstance (instance, 1.0); if (instancePipe == null) instancePipe = new Noop (dataAlphabet, targetAlphabet); classifier = new NaiveBayes (instancePipe, pe.estimate(), estimateFeatureMultinomials()); return classifier; }
NaiveBayesTrainer trainer = new NaiveBayesTrainer(); NaiveBayes classifier = (NaiveBayes) trainer.trainIncremental(instList); System.out.println("target alphabet size " + instList2.getTargetAlphabet().size()); NaiveBayes classifier2 = (NaiveBayes) trainer.trainIncremental(instList2);
public void testStringTrained () { String[] africaTraining = new String[] { "on the plains of africa the lions roar", "in swahili ngoma means to dance", "nelson mandela became president of south africa", "the saraha dessert is expanding"}; String[] asiaTraining = new String[] { "panda bears eat bamboo", "china's one child policy has resulted in a surplus of boys", "tigers live in the jungle"}; InstanceList instances = new InstanceList ( new SerialPipes (new Pipe[] { new Target2Label (), new CharSequence2TokenSequence (), new TokenSequence2FeatureSequence (), new FeatureSequence2FeatureVector ()})); instances.addThruPipe (new ArrayIterator (africaTraining, "africa")); instances.addThruPipe (new ArrayIterator (asiaTraining, "asia")); Classifier c = new NaiveBayesTrainer ().train (instances); Classification cf = c.classify ("nelson mandela never eats lions"); assertTrue (cf.getLabeling().getBestLabel() == ((LabelAlphabet)instances.getTargetAlphabet()).lookupLabel("africa")); }