public boolean trainIncremental (InstanceList training) { return train (training, Integer.MAX_VALUE); }
/** * Train a CRF on various-sized subsets of the data. This method is typically used to accelerate training by * quickly getting to reasonable parameters on only a subset of the parameters first, then on progressively more data. * @param training The training Instances. * @param numIterationsPerProportion Maximum number of Maximizer iterations per training proportion. * @param trainingProportions If non-null, train on increasingly * larger portions of the data, e.g. new double[] {0.2, 0.5, 1.0}. This can sometimes speedup convergence. * Be sure to end in 1.0 if you want to train on all the data in the end. * @return True if training has converged. */ public boolean train (InstanceList training, int numIterationsPerProportion, double[] trainingProportions) { return train(training, numIterationsPerProportion, numIterationsPerProportion, trainingProportions); }
public boolean trainIncremental (InstanceList training) { return train (training, Integer.MAX_VALUE); }
public boolean trainIncremental (InstanceList training) { return train (training, Integer.MAX_VALUE); }
public boolean train (InstanceList training, int numIterationsPerProportion, int maxIterationsOnFull, double[] trainingProportions) { int trainingIteration = 0; assert (trainingProportions.length > 0); boolean converged = false; for (int i = 0; i < trainingProportions.length; i++) { assert (trainingProportions[i] <= 1.0); logger.info ("Training on "+trainingProportions[i]+"% of the data this round."); if (trainingProportions[i] == 1.0) { converged = this.train (training, maxIterationsOnFull); } else { converged = this.train (training.split (new Random(1), new double[] {trainingProportions[i], 1-trainingProportions[i]})[0], numIterationsPerProportion); } trainingIteration += numIterationsPerProportion; } return converged; } }
/** * Train a CRF on various-sized subsets of the data. This method is typically used to accelerate training by * quickly getting to reasonable parameters on only a subset of the parameters first, then on progressively more data. * @param training The training Instances. * @param numIterationsPerProportion Maximum number of Maximizer iterations per training proportion. * @param trainingProportions If non-null, train on increasingly * larger portions of the data, e.g. new double[] {0.2, 0.5, 1.0}. This can sometimes speedup convergence. * Be sure to end in 1.0 if you want to train on all the data in the end. * @return True if training has converged. */ public boolean train (InstanceList training, int numIterationsPerProportion, double[] trainingProportions) { int trainingIteration = 0; assert (trainingProportions.length > 0); boolean converged = false; for (int i = 0; i < trainingProportions.length; i++) { assert (trainingProportions[i] <= 1.0); logger.info ("Training on "+trainingProportions[i]+"% of the data this round."); if (trainingProportions[i] == 1.0) { converged = this.train (training, numIterationsPerProportion); } else { converged = this.train (training.split (new Random(1), new double[] {trainingProportions[i], 1-trainingProportions[i]})[0], numIterationsPerProportion); } trainingIteration += numIterationsPerProportion; } return converged; } }
/** * Train a CRF on various-sized subsets of the data. This method is typically used to accelerate training by * quickly getting to reasonable parameters on only a subset of the parameters first, then on progressively more data. * @param training The training Instances. * @param numIterationsPerProportion Maximum number of Maximizer iterations per training proportion. * @param trainingProportions If non-null, train on increasingly * larger portions of the data, e.g. new double[] {0.2, 0.5, 1.0}. This can sometimes speedup convergence. * Be sure to end in 1.0 if you want to train on all the data in the end. * @return True if training has converged. */ public boolean train (InstanceList training, int numIterationsPerProportion, double[] trainingProportions) { int trainingIteration = 0; assert (trainingProportions.length > 0); boolean converged = false; for (int i = 0; i < trainingProportions.length; i++) { assert (trainingProportions[i] <= 1.0); logger.info ("Training on "+trainingProportions[i]+"% of the data this round."); if (trainingProportions[i] == 1.0) { converged = this.train (training, numIterationsPerProportion); } else { converged = this.train (training.split (new Random(1), new double[] {trainingProportions[i], 1-trainingProportions[i]})[0], numIterationsPerProportion); } trainingIteration += numIterationsPerProportion; } return converged; } }
private TransducerTrainer trainOnce(Pipe pipe, InstanceList trainData) { Stopwatch watch = Stopwatch.createStarted(); CRF crf = new CRF(pipe, null); crf.addOrderNStates(trainData, new int[]{1}, null, null, null, null, false); crf.addStartState(); log.info("Starting alignTag training..."); CRFTrainerByThreadedLabelLikelihood trainer = new CRFTrainerByThreadedLabelLikelihood(crf, 8); trainer.setGaussianPriorVariance(2); // trainer.setUseSomeUnsupportedTrick(false); trainer.train(trainData); trainer.shutdown(); watch.stop(); log.info("Align Tag CRF Training took " + watch.toString()); crf.getInputAlphabet().stopGrowth(); crf.getOutputAlphabet().stopGrowth(); return trainer; }
private TransducerTrainer trainOnce(Pipe pipe, InstanceList examples) { Stopwatch watch = Stopwatch.createStarted(); CRF crf = new CRF(pipe, null); crf.addOrderNStates(examples, new int[]{1}, null, null, null, null, false); crf.addStartState(); // crf.setWeightsDimensionAsIn(examples, false); log.info("Starting syllchain training..."); CRFTrainerByThreadedLabelLikelihood trainer = new CRFTrainerByThreadedLabelLikelihood(crf, 8); trainer.setGaussianPriorVariance(2); // trainer.setUseSomeUnsupportedTrick(false); // trainer.setAddNoFactors(true); trainer.train(examples); trainer.shutdown(); watch.stop(); log.info("SyllChain CRF Training took " + watch.toString()); crf.getInputAlphabet().stopGrowth(); crf.getOutputAlphabet().stopGrowth(); return trainer; }
private TransducerTrainer trainOnce(Pipe pipe, InstanceList trainData) { Stopwatch watch = Stopwatch.createStarted(); CRF crf = new CRF(pipe, null); crf.addOrderNStates(trainData, new int[]{1}, null, null, null, null, false); crf.addStartState(); crf.setWeightsDimensionAsIn(trainData, false); if (initFrom != null) { crf.initializeApplicableParametersFrom(initFrom); } log.info("Starting alignTag training..."); CRFTrainerByThreadedLabelLikelihood trainer = new CRFTrainerByThreadedLabelLikelihood(crf, 8); trainer.setGaussianPriorVariance(2); trainer.setAddNoFactors(true); trainer.setUseSomeUnsupportedTrick(false); trainer.train(trainData); trainer.shutdown(); watch.stop(); log.info("Syll align Tag CRF Training took " + watch.toString()); crf.getInputAlphabet().stopGrowth(); crf.getOutputAlphabet().stopGrowth(); return trainer; }
private TransducerTrainer trainOnce(Pipe pipe, InstanceList examples) { Stopwatch watch = Stopwatch.createStarted(); CRF crf = new CRF(pipe, null); crf.addOrderNStates(examples, new int[]{1}, null, null, null, null, false); crf.addStartState(); crf.setWeightsDimensionAsIn(examples, true); if (initFrom != null) { crf.initializeApplicableParametersFrom(initFrom); } log.info("Starting syllchain training..."); CRFTrainerByThreadedLabelLikelihood trainer = new CRFTrainerByThreadedLabelLikelihood(crf, 8); trainer.setGaussianPriorVariance(2); trainer.setAddNoFactors(true); // trainer.setUseSomeUnsupportedTrick(true); trainer.train(examples); trainer.shutdown(); watch.stop(); log.info("SyllChain CRF Training took " + watch.toString()); crf.getInputAlphabet().stopGrowth(); crf.getOutputAlphabet().stopGrowth(); return trainer; }
private TransducerTrainer trainOnce(Pipe pipe, InstanceList trainData) { Stopwatch watch = Stopwatch.createStarted(); CRF crf = new CRF(pipe, null); // O,O O,N -O,C- // N,O N,N N,C // C,O ?C,N? C,C Pattern forbidden = null; if (USE_ONC_CODING) { forbidden = Pattern.compile("(O,C|<START>,C|O,<END>)", Pattern.CASE_INSENSITIVE); } crf.addOrderNStates(trainData, new int[]{1}, null, null, forbidden, null, false); crf.addStartState(); crf.setWeightsDimensionAsIn(trainData); if (this.pullFrom != null) { crf.initializeApplicableParametersFrom(pullFrom); } log.info("Starting syll phone training..."); CRFTrainerByThreadedLabelLikelihood trainer = new CRFTrainerByThreadedLabelLikelihood(crf, 8); trainer.setGaussianPriorVariance(2); trainer.setAddNoFactors(false); trainer.setUseSomeUnsupportedTrick(true); trainer.train(trainData); trainer.shutdown(); watch.stop(); pipe.getAlphabet().stopGrowth(); pipe.getTargetAlphabet().stopGrowth(); log.info("Align Tag CRF Training took " + watch.toString()); return trainer; }
CRFTrainerByThreadedLabelLikelihood trainer = new CRFTrainerByThreadedLabelLikelihood( crf, threads); trainer.train(trainingInstanceList); LOG.info("done training CRF");
String[] continueTags = tags; trainer.train(trainingSet);
boolean converged; for (int i = 1; i <= iterations; i++) { converged = crft.train (training, 1); if (i % 1 == 0 && eval != null) { // Change the 1 to higher integer to evaluate less often eval.evaluate(crft);
boolean converged; for (int i = 1; i <= iterations; i++) { converged = crft.train (training, 1); if (i % 1 == 0 && eval != null) { // Change the 1 to higher integer to evaluate less often eval.evaluate(crft);
trainer.setAddNoFactors(true); trainer.setGaussianPriorVariance(gpv); trainer.train(trainingSet,supIterations); trainer.shutdown();
trainer.setAddNoFactors(true); trainer.setGaussianPriorVariance(gpv); trainer.train(trainingSet,supIterations); trainer.shutdown();
trainer.setAddNoFactors(true); trainer.setGaussianPriorVariance(gpv); trainer.train(trainingSet,supIterations); trainer.shutdown();