private InstanceList subsetData (InstanceList data, double pct) { InstanceList[] lsts = data.split (r, new double[] { pct, 1 - pct }); return lsts[0]; }
private InstanceList subsetData (InstanceList data, double pct) { InstanceList[] lsts = data.split (r, new double[] { pct, 1 - pct }); return lsts[0]; }
private InstanceList subsetData (InstanceList data, double pct) { InstanceList[] lsts = data.split (r, new double[] { pct, 1 - pct }); return lsts[0]; }
/** * Constructs a new n-fold cross-validation iterator * * @param ilist instance list to split into folds and iterate over * @param nfolds number of folds to split InstanceList into * @param r The source of randomness to use in shuffling. */ public CrossValidationIterator (InstanceList ilist, int nfolds, java.util.Random r) { this.nfolds = nfolds; assert (nfolds > 0) : "nfolds: " + this.nfolds; this.index = 0; double fraction = (double) 1 / nfolds; double[] proportions = new double[nfolds]; for (int i=0; i < nfolds; i++) { proportions[i] = fraction; } this.folds = ilist.split (r, proportions); }
/** * Constructs a new n-fold cross-validation iterator * * @param ilist instance list to split into folds and iterate over * @param nfolds number of folds to split InstanceList into * @param r The source of randomness to use in shuffling. */ public CrossValidationIterator (InstanceList ilist, int nfolds, java.util.Random r) { assert (nfolds > 0) : "nfolds: " + this.nfolds; this.nfolds = nfolds; this.index = 0; double fraction = (double) 1 / nfolds; double[] proportions = new double[nfolds]; for (int i=0; i < nfolds; i++) { proportions[i] = fraction; } this.folds = ilist.split (r, proportions); }
/** * Constructs a new n-fold cross-validation iterator * * @param ilist instance list to split into folds and iterate over * @param nfolds number of folds to split InstanceList into * @param r The source of randomness to use in shuffling. */ public CrossValidationIterator (InstanceList ilist, int nfolds, java.util.Random r) { this.nfolds = nfolds; assert (nfolds > 0) : "nfolds: " + this.nfolds; this.index = 0; double fraction = (double) 1 / nfolds; double[] proportions = new double[nfolds]; for (int i=0; i < nfolds; i++) { proportions[i] = fraction; } this.folds = ilist.split (r, proportions); }
/** @param _nfolds number of folds to split InstanceList into @param seed seed for random number used to split InstanceList */ public CrossValidationIterator (int _nfolds, int seed) { assert (_nfolds > 0) : "nfolds: " + nfolds; this.nfolds = _nfolds; this.index = 0; folds = new InstanceList[_nfolds]; double fraction = (double) 1 / _nfolds; double[] proportions = new double[_nfolds]; for (int i=0; i < _nfolds; i++) proportions[i] = fraction; folds = split (new java.util.Random (seed), proportions); }
/** @param _nfolds number of folds to split InstanceList into @param seed seed for random number used to split InstanceList */ public CrossValidationIterator (int _nfolds, int seed) { assert (_nfolds > 0) : "nfolds: " + nfolds; this.nfolds = _nfolds; this.index = 0; folds = new InstanceList[_nfolds]; double fraction = (double) 1 / _nfolds; double[] proportions = new double[_nfolds]; for (int i=0; i < _nfolds; i++) proportions[i] = fraction; folds = split (new java.util.Random (seed), proportions); }
/** @param _nfolds number of folds to split InstanceList into @param seed seed for random number used to split InstanceList */ public CrossValidationIterator (int _nfolds, int seed) { assert (_nfolds > 0) : "nfolds: " + nfolds; this.nfolds = _nfolds; this.index = 0; folds = new InstanceList[_nfolds]; double fraction = (double) 1 / _nfolds; double[] proportions = new double[_nfolds]; for (int i=0; i < _nfolds; i++) proportions[i] = fraction; folds = split (new java.util.Random (seed), proportions); }
public InstanceList[] split (double[] proportions) { return split (new java.util.Random(System.currentTimeMillis()), proportions); }
public InstanceList[] split (double[] proportions) { return split (new java.util.Random(System.currentTimeMillis()), proportions); }
public InstanceList[] split (double[] proportions) { return split (new java.util.Random(System.currentTimeMillis()), proportions); }
public void train (ACRF acrf, InstanceList training, InstanceList validation, InstanceList testing, ACRFEvaluator eval, double[] proportions, int iterPerProportion) { for (int i = 0; i < proportions.length; i++) { double proportion = proportions[i]; InstanceList[] lists = training.split (r, new double[]{proportion, 1.0}); logger.info ("ACRF trainer: Round " + i + ", training proportion = " + proportion); train (acrf, lists[0], validation, testing, eval, iterPerProportion); } logger.info ("ACRF trainer: Training on full data"); train (acrf, training, validation, testing, eval, 99999); }
public void train (ACRF acrf, InstanceList training, InstanceList validation, InstanceList testing, ACRFEvaluator eval, double[] proportions, int iterPerProportion) { for (int i = 0; i < proportions.length; i++) { double proportion = proportions[i]; InstanceList[] lists = training.split (r, new double[]{proportion, 1.0}); logger.info ("ACRF trainer: Round " + i + ", training proportion = " + proportion); train (acrf, lists[0], validation, testing, eval, iterPerProportion); } logger.info ("ACRF trainer: Training on full data"); train (acrf, training, validation, testing, eval, 99999); }
public void train (ACRF acrf, InstanceList training, InstanceList validation, InstanceList testing, ACRFEvaluator eval, double[] proportions, int iterPerProportion) { for (int i = 0; i < proportions.length; i++) { double proportion = proportions[i]; InstanceList[] lists = training.split (r, new double[]{proportion, 1.0}); logger.info ("ACRF trainer: Round " + i + ", training proportion = " + proportion); train (acrf, lists[0], validation, testing, eval, iterPerProportion); } logger.info ("ACRF trainer: Training on full data"); train (acrf, training, validation, testing, eval, 99999); }
public static Trial testTrainSplit(InstanceList instances) { InstanceList[] instanceLists = instances.split(new Randoms(), new double[] { 0.9, 0.1, 0.0 }); // LOG.debug("{} training instance, {} testing instances", // instanceLists[0].size(), instanceLists[1].size()); @SuppressWarnings("rawtypes") ClassifierTrainer trainer = new MaxEntTrainer(); Classifier classifier = trainer.train(instanceLists[TRAINING]); return new Trial(classifier, instanceLists[TESTING]); }
@Override protected void execute(JobSettings settings) throws AnalysisEngineProcessException { Pipe pipe = new ClassifierPipe(stopwords); InstanceList instances = new InstanceList(pipe); instances.addThruPipe(getDocumentsFromMongo()); InstanceList training = null; InstanceList testing = null; if (forTesting > 0.0) { InstanceList[] ilists = instances.split(new double[] {1 - forTesting, forTesting}); training = ilists[0]; testing = ilists[1]; } else { training = instances; } processTrainerDefinitions(training, testing); }
@Override protected void execute(JobSettings settings) throws AnalysisEngineProcessException { Pipe pipe = new ClassifierPipe(stopwords); InstanceList instances = new InstanceList(pipe); instances.addThruPipe(getDocumentsFromMongo()); InstanceList training = null; InstanceList testing = null; if (forTesting > 0.0) { InstanceList[] ilists = instances.split(new double[] {1 - forTesting, forTesting}); training = ilists[0]; testing = ilists[1]; } else { training = instances; } processTrainerDefinitions(training, testing); }
public void testTokenAccuracy() { Pipe p = makeSpacePredictionPipe(); InstanceList instances = new InstanceList(p); instances.addThruPipe(new ArrayIterator(data)); InstanceList[] lists = instances.split(new Random(777), new double[] { .5, .5 }); CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet()); crf.addFullyConnectedStatesForLabels(); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); crft.setUseSparseWeights(true); crft.trainIncremental(lists[0]); TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists, new String[] { "Train", "Test" }); eval.evaluateInstanceList(crft, lists[1], "Test"); assertEquals(0.9409, eval.getAccuracy("Test"), 0.001); }
public void testTokenAccuracy() { Pipe p = makeSpacePredictionPipe(); InstanceList instances = new InstanceList(p); instances.addThruPipe(new ArrayIterator(data)); InstanceList[] lists = instances.split(new Random(777), new double[] { .5, .5 }); CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet()); crf.addFullyConnectedStatesForLabels(); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); crft.setUseSparseWeights(true); crft.trainIncremental(lists[0]); TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists, new String[] { "Train", "Test" }); eval.evaluateInstanceList(crft, lists[1], "Test"); assertEquals(0.9409, eval.getAccuracy("Test"), 0.001); }