/** Trains a CRF until convergence. */ public boolean trainIncremental (InstanceList training) { return train (training, Integer.MAX_VALUE); }
/** Trains a CRF until convergence. */ public boolean trainIncremental (InstanceList training) { return train (training, Integer.MAX_VALUE); }
/** Trains a CRF until convergence. */ public boolean trainIncremental (InstanceList training) { return train (training, Integer.MAX_VALUE); }
/** * Train a CRF on various-sized subsets of the data. This method is typically used to accelerate training by * quickly getting to reasonable parameters on only a subset of the parameters first, then on progressively more data. * @param training The training Instances. * @param numIterationsPerProportion Maximum number of Maximizer iterations per training proportion. * @param trainingProportions If non-null, train on increasingly * larger portions of the data, e.g. new double[] {0.2, 0.5, 1.0}. This can sometimes speedup convergence. * Be sure to end in 1.0 if you want to train on all the data in the end. * @return True if training has converged. */ public boolean train (InstanceList training, int numIterationsPerProportion, double[] trainingProportions) { int trainingIteration = 0; assert (trainingProportions.length > 0); boolean converged = false; for (int i = 0; i < trainingProportions.length; i++) { assert (trainingProportions[i] <= 1.0); logger.info ("Training on "+trainingProportions[i]+"% of the data this round."); if (trainingProportions[i] == 1.0) converged = this.train (training, numIterationsPerProportion); else converged = this.train (training.split (new Random(1), new double[] {trainingProportions[i], 1-trainingProportions[i]})[0], numIterationsPerProportion); trainingIteration += numIterationsPerProportion; } return converged; }
/** * Train a CRF on various-sized subsets of the data. This method is typically used to accelerate training by * quickly getting to reasonable parameters on only a subset of the parameters first, then on progressively more data. * @param training The training Instances. * @param numIterationsPerProportion Maximum number of Maximizer iterations per training proportion. * @param trainingProportions If non-null, train on increasingly * larger portions of the data, e.g. new double[] {0.2, 0.5, 1.0}. This can sometimes speedup convergence. * Be sure to end in 1.0 if you want to train on all the data in the end. * @return True if training has converged. */ public boolean train (InstanceList training, int numIterationsPerProportion, double[] trainingProportions) { int trainingIteration = 0; assert (trainingProportions.length > 0); boolean converged = false; for (int i = 0; i < trainingProportions.length; i++) { assert (trainingProportions[i] <= 1.0); logger.info ("Training on "+trainingProportions[i]+"% of the data this round."); if (trainingProportions[i] == 1.0) converged = this.train (training, numIterationsPerProportion); else converged = this.train (training.split (new Random(1), new double[] {trainingProportions[i], 1-trainingProportions[i]})[0], numIterationsPerProportion); trainingIteration += numIterationsPerProportion; } return converged; }
/** * Train a CRF on various-sized subsets of the data. This method is typically used to accelerate training by * quickly getting to reasonable parameters on only a subset of the parameters first, then on progressively more data. * @param training The training Instances. * @param numIterationsPerProportion Maximum number of Maximizer iterations per training proportion. * @param trainingProportions If non-null, train on increasingly * larger portions of the data, e.g. new double[] {0.2, 0.5, 1.0}. This can sometimes speedup convergence. * Be sure to end in 1.0 if you want to train on all the data in the end. * @return True if training has converged. */ public boolean train (InstanceList training, int numIterationsPerProportion, double[] trainingProportions) { int trainingIteration = 0; assert (trainingProportions.length > 0); boolean converged = false; for (int i = 0; i < trainingProportions.length; i++) { assert (trainingProportions[i] <= 1.0); logger.info ("Training on "+trainingProportions[i]+"% of the data this round."); if (trainingProportions[i] == 1.0) converged = this.train (training, numIterationsPerProportion); else converged = this.train (training.split (new Random(1), new double[] {trainingProportions[i], 1-trainingProportions[i]})[0], numIterationsPerProportion); trainingIteration += numIterationsPerProportion; } return converged; }
public void train(TextBlock textBlock) throws Exception { InstanceList trainingData = new InstanceList(getPipes()); for (TextSentence textSentence : textBlock) { Instance textInstance = new TextInstance(textSentence, getTargetAlphabet()); trainingData.addThruPipe(textInstance); } if (crf == null) { crf = new CRF(getPipes(), null); crf.addFullyConnectedStatesForLabels(); crf.setWeightsDimensionAsIn(trainingData, false); CRFOptimizableByLabelLikelihood optLabel = new CRFOptimizableByLabelLikelihood(crf, trainingData); Optimizable.ByGradientValue[] opts = new Optimizable.ByGradientValue[] { optLabel }; crfTrainer = new CRFTrainerByValueGradients(crf, opts); crfTrainer.setMaxResets(0); } crfTrainer.train(trainingData, Integer.MAX_VALUE); }