/** * do the training * * @param instList * @param myPipe */ void train(final InstanceList instList, final Pipe myPipe) { final long s1 = System.currentTimeMillis(); // set up model model = new CRF(myPipe, null); model.addStatesForLabelsConnectedAsIn(instList); // get trainer final CRFTrainerByLabelLikelihood crfTrainer = new CRFTrainerByLabelLikelihood( model); // do the training with unlimited amount of iterations // --> refrained from using modified version of mallet; // it's now the original source final boolean b = crfTrainer.train(instList); LOGGER.info("Tokenizer training: model converged: " + b); final long s2 = System.currentTimeMillis(); // stop growth and set trained model.getInputPipe().getDataAlphabet().stopGrowth(); trained = true; LOGGER.debug("train() - training time: " + ((s2 - s1) / 1000) + " sec"); }
((CRF) model).addStatesForLabelsConnectedAsIn(data);
crf.addStatesForLabelsConnectedAsIn (trainingData); else if (labelGramOption.value == 2) crf.addStatesForBiLabelsConnectedAsIn (trainingData);
crf.addStatesForLabelsConnectedAsIn (trainingData); else if (labelGramOption.value == 2) crf.addStatesForBiLabelsConnectedAsIn (trainingData);
crf.addStatesForLabelsConnectedAsIn (trainingData); else if (labelGramOption.value == 2) crf.addStatesForBiLabelsConnectedAsIn (trainingData);
public void testStartState() { Pipe p = new SerialPipes(new Pipe[] { new LineGroupString2TokenSequence(), new TokenSequenceMatchDataAndTarget(Pattern .compile("^(\\S+) (.*)"), 2, 1), new TokenSequenceParseFeatureString(false), new TokenText(), new TokenSequence2FeatureVectorSequence(true, false), new Target2LabelSequence(), new PrintInputAndTarget(), }); InstanceList data = new InstanceList(p); data.addThruPipe(new LineGroupIterator(new StringReader(toy), Pattern .compile("\n"), true)); CRF crf = new CRF(p, null); crf.print(); crf.addStatesForLabelsConnectedAsIn(data); crf.addStartState(); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); Optimizable.ByGradientValue maxable = crft.getOptimizableCRF(data); assertEquals(-1.3862, maxable.getValue(), 1e-4); crf = new CRF(p, null); crf .addOrderNStates(data, new int[] { 1 }, null, "A", null, null, false); crf.print(); crft = new CRFTrainerByLabelLikelihood(crf); maxable = crft.getOptimizableCRF(data); assertEquals(-3.09104245335831, maxable.getValue(), 1e-4); }
public void testStartState() { Pipe p = new SerialPipes(new Pipe[] { new LineGroupString2TokenSequence(), new TokenSequenceMatchDataAndTarget(Pattern .compile("^(\\S+) (.*)"), 2, 1), new TokenSequenceParseFeatureString(false), new TokenText(), new TokenSequence2FeatureVectorSequence(true, false), new Target2LabelSequence(), new PrintInputAndTarget(), }); InstanceList data = new InstanceList(p); data.addThruPipe(new LineGroupIterator(new StringReader(toy), Pattern .compile("\n"), true)); CRF crf = new CRF(p, null); crf.print(); crf.addStatesForLabelsConnectedAsIn(data); crf.addStartState(); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); Optimizable.ByGradientValue maxable = crft.getOptimizableCRF(data); assertEquals(-1.3862, maxable.getValue(), 1e-4); crf = new CRF(p, null); crf .addOrderNStates(data, new int[] { 1 }, null, "A", null, null, false); crf.print(); crft = new CRFTrainerByLabelLikelihood(crf); maxable = crft.getOptimizableCRF(data); assertEquals(-3.09104245335831, maxable.getValue(), 1e-4); }