public static CRF getCRF(InstanceList training, int[] orders, String defaultLabel, String forbidden, String allowed, boolean connected) { Pattern forbiddenPat = Pattern.compile(forbidden); Pattern allowedPat = Pattern.compile(allowed); CRF crf = new CRF(training.getPipe(), (Pipe)null); String startName = crf.addOrderNStates(training, orders, null, defaultLabel, forbiddenPat, allowedPat, connected); for (int i = 0; i < crf.numStates(); i++) crf.getState(i).setInitialWeight (Transducer.IMPOSSIBLE_WEIGHT); crf.getState(startName).setInitialWeight(0.0); crf.setWeightsDimensionDensely(); return crf; }
public static CRF getCRF(InstanceList training, int[] orders, String defaultLabel, String forbidden, String allowed, boolean connected) { Pattern forbiddenPat = Pattern.compile(forbidden); Pattern allowedPat = Pattern.compile(allowed); CRF crf = new CRF(training.getPipe(), (Pipe)null); String startName = crf.addOrderNStates(training, orders, null, defaultLabel, forbiddenPat, allowedPat, connected); for (int i = 0; i < crf.numStates(); i++) crf.getState(i).setInitialWeight (Transducer.IMPOSSIBLE_WEIGHT); crf.getState(startName).setInitialWeight(0.0); crf.setWeightsDimensionDensely(); return crf; }
public static CRF getCRF(InstanceList training, int[] orders, String defaultLabel, String forbidden, String allowed, boolean connected) { Pattern forbiddenPat = Pattern.compile(forbidden); Pattern allowedPat = Pattern.compile(allowed); CRF crf = new CRF(training.getPipe(), (Pipe)null); String startName = crf.addOrderNStates(training, orders, null, defaultLabel, forbiddenPat, allowedPat, connected); for (int i = 0; i < crf.numStates(); i++) crf.getState(i).setInitialWeight (Transducer.IMPOSSIBLE_WEIGHT); crf.getState(startName).setInitialWeight(0.0); crf.setWeightsDimensionDensely(); return crf; }
public void testSpaceViewer () throws FileNotFoundException { Pipe pipe = TestMEMM.makeSpacePredictionPipe (); String[] data0 = { TestCRF.data[0] }; String[] data1 = { TestCRF.data[1] }; InstanceList training = new InstanceList (pipe); training.addThruPipe (new ArrayIterator (data0)); InstanceList testing = new InstanceList (pipe); testing.addThruPipe (new ArrayIterator (data1)); CRF crf = new CRF (pipe, null); crf.addFullyConnectedStatesForLabels (); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf); crft.trainIncremental (training); CRFExtractor extor = hackCrfExtor (crf); Extraction extration = extor.extract (new ArrayIterator (data1)); PrintStream out = new PrintStream (new FileOutputStream (htmlFile)); LatticeViewer.extraction2html (extration, extor, out); out.close(); out = new PrintStream (new FileOutputStream (latticeFile)); LatticeViewer.extraction2html (extration, extor, out, true); out.close(); }
private TransducerTrainer trainOnce(Pipe pipe, InstanceList trainData) { Stopwatch watch = Stopwatch.createStarted(); CRF crf = new CRF(pipe, null); crf.addOrderNStates(trainData, new int[]{1}, null, null, null, null, false); crf.addStartState(); log.info("Starting alignTag training..."); CRFTrainerByThreadedLabelLikelihood trainer = new CRFTrainerByThreadedLabelLikelihood(crf, 8); trainer.setGaussianPriorVariance(2); // trainer.setUseSomeUnsupportedTrick(false); trainer.train(trainData); trainer.shutdown(); watch.stop(); log.info("Align Tag CRF Training took " + watch.toString()); crf.getInputAlphabet().stopGrowth(); crf.getOutputAlphabet().stopGrowth(); return trainer; }
private void initializeFor(InstanceList examples) { this.crf = new CRF(examples.getPipe(), null); crf.addOrderNStates(examples, new int[]{1}, null, null, null, null, false); crf.addStartState(); crf.setWeightsDimensionAsIn(examples, false); if (crfFrom != null) { crf.initializeApplicableParametersFrom(crfFrom); } }
private TransducerTrainer trainOnce(Pipe pipe, InstanceList examples) { Stopwatch watch = Stopwatch.createStarted(); CRF crf = new CRF(pipe, null); crf.addOrderNStates(examples, new int[]{1}, null, null, null, null, false); crf.addStartState(); // crf.setWeightsDimensionAsIn(examples, false); log.info("Starting syllchain training..."); CRFTrainerByThreadedLabelLikelihood trainer = new CRFTrainerByThreadedLabelLikelihood(crf, 8); trainer.setGaussianPriorVariance(2); // trainer.setUseSomeUnsupportedTrick(false); // trainer.setAddNoFactors(true); trainer.train(examples); trainer.shutdown(); watch.stop(); log.info("SyllChain CRF Training took " + watch.toString()); crf.getInputAlphabet().stopGrowth(); crf.getOutputAlphabet().stopGrowth(); return trainer; }
private TransducerTrainer trainOnce(Pipe pipe, InstanceList examples) { Stopwatch watch = Stopwatch.createStarted(); CRF crf = new CRF(pipe, null); crf.addOrderNStates(examples, new int[]{1}, null, null, null, null, false); crf.addStartState(); crf.setWeightsDimensionAsIn(examples, true); if (initFrom != null) { crf.initializeApplicableParametersFrom(initFrom); } log.info("Starting syllchain training..."); CRFTrainerByThreadedLabelLikelihood trainer = new CRFTrainerByThreadedLabelLikelihood(crf, 8); trainer.setGaussianPriorVariance(2); trainer.setAddNoFactors(true); // trainer.setUseSomeUnsupportedTrick(true); trainer.train(examples); trainer.shutdown(); watch.stop(); log.info("SyllChain CRF Training took " + watch.toString()); crf.getInputAlphabet().stopGrowth(); crf.getOutputAlphabet().stopGrowth(); return trainer; }
private TransducerTrainer trainOnce(Pipe pipe, InstanceList trainData) { Stopwatch watch = Stopwatch.createStarted(); CRF crf = new CRF(pipe, null); crf.addOrderNStates(trainData, new int[]{1}, null, null, null, null, false); crf.addStartState(); crf.setWeightsDimensionAsIn(trainData, false); if (initFrom != null) { crf.initializeApplicableParametersFrom(initFrom); } log.info("Starting alignTag training..."); CRFTrainerByThreadedLabelLikelihood trainer = new CRFTrainerByThreadedLabelLikelihood(crf, 8); trainer.setGaussianPriorVariance(2); trainer.setAddNoFactors(true); trainer.setUseSomeUnsupportedTrick(false); trainer.train(trainData); trainer.shutdown(); watch.stop(); log.info("Syll align Tag CRF Training took " + watch.toString()); crf.getInputAlphabet().stopGrowth(); crf.getOutputAlphabet().stopGrowth(); return trainer; }
public void testSpaceViewer () throws IOException { Pipe pipe = TestMEMM.makeSpacePredictionPipe (); String[] data0 = { TestCRF.data[0] }; String[] data1 = { TestCRF.data[1] }; InstanceList training = new InstanceList (pipe); training.addThruPipe (new ArrayIterator (data0)); InstanceList testing = new InstanceList (pipe); testing.addThruPipe (new ArrayIterator (data1)); CRF crf = new CRF (pipe, null); crf.addFullyConnectedStatesForLabels (); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf); crft.trainIncremental (training); CRFExtractor extor = TestLatticeViewer.hackCrfExtor (crf); Extraction extraction = extor.extract (new ArrayIterator (data1)); if (!outputDir.exists ()) outputDir.mkdir (); DocumentViewer.writeExtraction (outputDir, extraction); }
public void testSpaceViewer () throws IOException { Pipe pipe = TestMEMM.makeSpacePredictionPipe (); String[] data0 = { TestCRF.data[0] }; String[] data1 = { TestCRF.data[1] }; InstanceList training = new InstanceList (pipe); training.addThruPipe (new ArrayIterator (data0)); InstanceList testing = new InstanceList (pipe); testing.addThruPipe (new ArrayIterator (data1)); CRF crf = new CRF (pipe, null); crf.addFullyConnectedStatesForLabels (); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf); crft.trainIncremental (training); CRFExtractor extor = TestLatticeViewer.hackCrfExtor (crf); Extraction extraction = extor.extract (new ArrayIterator (data1)); if (!outputDir.exists ()) outputDir.mkdir (); DocumentViewer.writeExtraction (outputDir, extraction); }
public void testGetSetParameters() { int inputVocabSize = 100; int numStates = 5; Alphabet inputAlphabet = new Alphabet(); for (int i = 0; i < inputVocabSize; i++) inputAlphabet.lookupIndex("feature" + i); Alphabet outputAlphabet = new Alphabet(); CRF crf = new CRF(inputAlphabet, outputAlphabet); String[] stateNames = new String[numStates]; for (int i = 0; i < numStates; i++) stateNames[i] = "state" + i; crf.addFullyConnectedStates(stateNames); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); Optimizable.ByGradientValue mcrf = crft .getOptimizableCRF(new InstanceList(null)); TestOptimizable.testGetSetParameters(mcrf); }
public void testGetSetParameters() { int inputVocabSize = 100; int numStates = 5; Alphabet inputAlphabet = new Alphabet(); for (int i = 0; i < inputVocabSize; i++) inputAlphabet.lookupIndex("feature" + i); Alphabet outputAlphabet = new Alphabet(); CRF crf = new CRF(inputAlphabet, outputAlphabet); String[] stateNames = new String[numStates]; for (int i = 0; i < numStates; i++) stateNames[i] = "state" + i; crf.addFullyConnectedStates(stateNames); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); Optimizable.ByGradientValue mcrf = crft .getOptimizableCRF(new InstanceList(null)); TestOptimizable.testGetSetParameters(mcrf); }
public void testDenseFeatureSelection() { Pipe p = makeSpacePredictionPipe(); InstanceList instances = new InstanceList(p); instances.addThruPipe(new ArrayIterator(data)); // Test that dense observations wights aren't added for // "default-feature" edges. CRF crf1 = new CRF(p, null); crf1.addOrderNStates(instances, new int[] { 0 }, null, "start", null, null, true); CRFTrainerByLabelLikelihood crft1 = new CRFTrainerByLabelLikelihood( crf1); crft1.setUseSparseWeights(false); crft1.train(instances, 1); // Set weights dimension int nParams1 = crft1.getOptimizableCRF(instances).getNumParameters(); CRF crf2 = new CRF(p, null); crf2.addOrderNStates(instances, new int[] { 0, 1 }, new boolean[] { false, true }, "start", null, null, true); CRFTrainerByLabelLikelihood crft2 = new CRFTrainerByLabelLikelihood( crf2); crft2.setUseSparseWeights(false); crft2.train(instances, 1); // Set weights dimension int nParams2 = crft2.getOptimizableCRF(instances).getNumParameters(); assertEquals(nParams2, nParams1 + 4); }
public void testDenseFeatureSelection() { Pipe p = makeSpacePredictionPipe(); InstanceList instances = new InstanceList(p); instances.addThruPipe(new ArrayIterator(data)); // Test that dense observations wights aren't added for // "default-feature" edges. CRF crf1 = new CRF(p, null); crf1.addOrderNStates(instances, new int[] { 0 }, null, "start", null, null, true); CRFTrainerByLabelLikelihood crft1 = new CRFTrainerByLabelLikelihood( crf1); crft1.setUseSparseWeights(false); crft1.train(instances, 1); // Set weights dimension int nParams1 = crft1.getOptimizableCRF(instances).getNumParameters(); CRF crf2 = new CRF(p, null); crf2.addOrderNStates(instances, new int[] { 0, 1 }, new boolean[] { false, true }, "start", null, null, true); CRFTrainerByLabelLikelihood crft2 = new CRFTrainerByLabelLikelihood( crf2); crft2.setUseSparseWeights(false); crft2.train(instances, 1); // Set weights dimension int nParams2 = crft2.getOptimizableCRF(instances).getNumParameters(); assertEquals(nParams2, nParams1 + 4); }
public void testTokenAccuracy() { Pipe p = makeSpacePredictionPipe(); InstanceList instances = new InstanceList(p); instances.addThruPipe(new ArrayIterator(data)); InstanceList[] lists = instances.split(new Random(777), new double[] { .5, .5 }); CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet()); crf.addFullyConnectedStatesForLabels(); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); crft.setUseSparseWeights(true); crft.trainIncremental(lists[0]); TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists, new String[] { "Train", "Test" }); eval.evaluateInstanceList(crft, lists[1], "Test"); assertEquals(0.9409, eval.getAccuracy("Test"), 0.001); }
public void testTokenAccuracy() { Pipe p = makeSpacePredictionPipe(); InstanceList instances = new InstanceList(p); instances.addThruPipe(new ArrayIterator(data)); InstanceList[] lists = instances.split(new Random(777), new double[] { .5, .5 }); CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet()); crf.addFullyConnectedStatesForLabels(); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); crft.setUseSparseWeights(true); crft.trainIncremental(lists[0]); TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists, new String[] { "Train", "Test" }); eval.evaluateInstanceList(crft, lists[1], "Test"); assertEquals(0.9409, eval.getAccuracy("Test"), 0.001); }
public void train(TextBlock textBlock) throws Exception { InstanceList trainingData = new InstanceList(getPipes()); for (TextSentence textSentence : textBlock) { Instance textInstance = new TextInstance(textSentence, getTargetAlphabet()); trainingData.addThruPipe(textInstance); } if (crf == null) { crf = new CRF(getPipes(), null); crf.addFullyConnectedStatesForLabels(); crf.setWeightsDimensionAsIn(trainingData, false); CRFOptimizableByLabelLikelihood optLabel = new CRFOptimizableByLabelLikelihood(crf, trainingData); Optimizable.ByGradientValue[] opts = new Optimizable.ByGradientValue[] { optLabel }; crfTrainer = new CRFTrainerByValueGradients(crf, opts); crfTrainer.setMaxResets(0); } crfTrainer.train(trainingData, Integer.MAX_VALUE); }
public void testPrint() { Pipe p = new SerialPipes(new Pipe[] { new CharSequence2TokenSequence("."), new TokenText(), new TestCRFTokenSequenceRemoveSpaces(), new TokenSequence2FeatureVectorSequence(), new PrintInputAndTarget(), }); InstanceList one = new InstanceList(p); String[] data = new String[] { "ABCDE", }; one.addThruPipe(new ArrayIterator(data)); CRF crf = new CRF(p, null); crf.addFullyConnectedStatesForThreeQuarterLabels(one); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); crf.setWeightsDimensionAsIn(one, false); Optimizable mcrf = crft.getOptimizableCRF(one); double[] params = new double[mcrf.getNumParameters()]; for (int i = 0; i < params.length; i++) { params[i] = i; } mcrf.setParameters(params); crf.print(); }
public void testPrint() { Pipe p = new SerialPipes(new Pipe[] { new CharSequence2TokenSequence("."), new TokenText(), new TestCRFTokenSequenceRemoveSpaces(), new TokenSequence2FeatureVectorSequence(), new PrintInputAndTarget(), }); InstanceList one = new InstanceList(p); String[] data = new String[] { "ABCDE", }; one.addThruPipe(new ArrayIterator(data)); CRF crf = new CRF(p, null); crf.addFullyConnectedStatesForThreeQuarterLabels(one); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); crf.setWeightsDimensionAsIn(one, false); Optimizable mcrf = crft.getOptimizableCRF(one); double[] params = new double[mcrf.getNumParameters()]; for (int i = 0; i < params.length; i++) { params[i] = i; } mcrf.setParameters(params); crf.print(); }