public Instance pipe (Instance carrier) { carrier.setData(new ArrayIterator ((Object[])carrier.getData())); return carrier; } }
public Instance pipe (Instance carrier) { carrier.setData(new ArrayIterator ((Object[])carrier.getData())); return carrier; } }
public Instance pipe (Instance carrier) { carrier.setData(new ArrayIterator ((Object[])carrier.getData())); return carrier; } }
public Instance pipe (Instance carrier) { carrier.setData(new ArrayIterator ((Object[])carrier.getData())); return carrier; } }
/** * Compute the maxent classifications of an array of instances * * @param classifier the classifier * @param features each row represents the on features for an instance * @return the array of classifications for the given instances */ static public Classification[] classify(Classifier classifier, String[][] features) { return classify(classifier, new PipeExtendedIterator( new ArrayIterator(features), new CharSequenceArray2TokenSequence())); }
/** * Compute the maxent classifications of an array of instances * * @param classifier the classifier * @param features each row represents the on features for an instance * @return the array of classifications for the given instances */ static public Classification[] classify(Classifier classifier, String[][] features) { return classify(classifier, new PipeExtendedIterator( new ArrayIterator(features), new CharSequenceArray2TokenSequence())); }
/** * Compute the maxent classifications of an array of instances * * @param classifier the classifier * @param features each row represents the on features for an instance * @return the array of classifications for the given instances */ static public Classification[] classify(Classifier classifier, String[][] features) { return classify(classifier, new PipeExtendedIterator( new ArrayIterator(features), new CharSequenceArray2TokenSequence())); }
public InstanceList malletPreprocess(List<TokenSequence> data) { ArrayList<Pipe> pipeList = new ArrayList<>(); pipeList.add(new TokenSequenceRemoveStopwords(false, false)); pipeList.add(new TokenSequence2FeatureSequence()); InstanceList instances = new InstanceList(new SerialPipes(pipeList)); ArrayIterator dataListIterator = new ArrayIterator(data); instances.addThruPipe(dataListIterator); return instances; }
public void testSpaceViewer () throws FileNotFoundException { Pipe pipe = TestMEMM.makeSpacePredictionPipe (); String[] data0 = { TestCRF.data[0] }; String[] data1 = { TestCRF.data[1] }; InstanceList training = new InstanceList (pipe); training.addThruPipe (new ArrayIterator (data0)); InstanceList testing = new InstanceList (pipe); testing.addThruPipe (new ArrayIterator (data1)); CRF crf = new CRF (pipe, null); crf.addFullyConnectedStatesForLabels (); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf); crft.trainIncremental (training); CRFExtractor extor = hackCrfExtor (crf); Extraction extration = extor.extract (new ArrayIterator (data1)); PrintStream out = new PrintStream (new FileOutputStream (htmlFile)); LatticeViewer.extraction2html (extration, extor, out); out.close(); out = new PrintStream (new FileOutputStream (latticeFile)); LatticeViewer.extraction2html (extration, extor, out, true); out.close(); }
public void testSpaceViewer () throws FileNotFoundException { Pipe pipe = TestMEMM.makeSpacePredictionPipe (); String[] data0 = { TestCRF.data[0] }; String[] data1 = { TestCRF.data[1] }; InstanceList training = new InstanceList (pipe); training.addThruPipe (new ArrayIterator (data0)); InstanceList testing = new InstanceList (pipe); testing.addThruPipe (new ArrayIterator (data1)); CRF crf = new CRF (pipe, null); crf.addFullyConnectedStatesForLabels (); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf); crft.trainIncremental (training); CRFExtractor extor = hackCrfExtor (crf); Extraction extration = extor.extract (new ArrayIterator (data1)); PrintStream out = new PrintStream (new FileOutputStream (htmlFile)); LatticeViewer.extraction2html (extration, extor, out); out.close(); out = new PrintStream (new FileOutputStream (latticeFile)); LatticeViewer.extraction2html (extration, extor, out, true); out.close(); }
public void testSpaceViewer () throws IOException { Pipe pipe = TestMEMM.makeSpacePredictionPipe (); String[] data0 = { TestCRF.data[0] }; String[] data1 = { TestCRF.data[1] }; InstanceList training = new InstanceList (pipe); training.addThruPipe (new ArrayIterator (data0)); InstanceList testing = new InstanceList (pipe); testing.addThruPipe (new ArrayIterator (data1)); CRF crf = new CRF (pipe, null); crf.addFullyConnectedStatesForLabels (); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf); crft.trainIncremental (training); CRFExtractor extor = TestLatticeViewer.hackCrfExtor (crf); Extraction extraction = extor.extract (new ArrayIterator (data1)); if (!outputDir.exists ()) outputDir.mkdir (); DocumentViewer.writeExtraction (outputDir, extraction); }
public void testSpaceViewer () throws IOException { Pipe pipe = TestMEMM.makeSpacePredictionPipe (); String[] data0 = { TestCRF.data[0] }; String[] data1 = { TestCRF.data[1] }; InstanceList training = new InstanceList (pipe); training.addThruPipe (new ArrayIterator (data0)); InstanceList testing = new InstanceList (pipe); testing.addThruPipe (new ArrayIterator (data1)); CRF crf = new CRF (pipe, null); crf.addFullyConnectedStatesForLabels (); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood (crf); crft.trainIncremental (training); CRFExtractor extor = TestLatticeViewer.hackCrfExtor (crf); Extraction extraction = extor.extract (new ArrayIterator (data1)); if (!outputDir.exists ()) outputDir.mkdir (); DocumentViewer.writeExtraction (outputDir, extraction); }
training.addThruPipe (new ArrayIterator (data0)); InstanceList testing = new InstanceList (pipe); testing.addThruPipe (new ArrayIterator (data1)); Extraction e1 = extor.extract (new ArrayIterator (data1)); training2.addThruPipe (new ArrayIterator (data0)); InstanceList testing2 = new InstanceList (pipe2); testing2.addThruPipe (new ArrayIterator (data1)); Extraction e2 = extor2.extract (new ArrayIterator (data1));
public void testSpacePipe () { Pipe p = new SerialPipes (new Pipe[] { new CharSequence2TokenSequence ("."), new TokenSequenceLowercase (), new TestCRF.TestCRFTokenSequenceRemoveSpaces (), new TokenText (), new OffsetConjunctions (false, new int[][] {{0}, {1},{-1,0},{0,1}, {-2,-1,0}, {0,1,2}, {-3,-2,-1}, {1,2,3}, }), new PrintInputAndTarget(), }); // Print to a string ByteArrayOutputStream out = new ByteArrayOutputStream (); PrintStream oldOut = System.out; System.setOut (new PrintStream (out)); InstanceList lst = new InstanceList (p); lst.addThruPipe (new ArrayIterator (new String[] { TestCRF.data[0], TestCRF.data[1], })); System.setOut (oldOut); assertEquals (spacePipeOutput, out.toString()); }
public void testSpacePipe () { Pipe p = new SerialPipes (new Pipe[] { new CharSequence2TokenSequence ("."), new TokenSequenceLowercase (), new TestCRF.TestCRFTokenSequenceRemoveSpaces (), new TokenText (), new OffsetConjunctions (false, new int[][] {{0}, {1},{-1,0},{0,1}, {-2,-1,0}, {0,1,2}, {-3,-2,-1}, {1,2,3}, }), new PrintInputAndTarget(), }); // Print to a string ByteArrayOutputStream out = new ByteArrayOutputStream (); PrintStream oldOut = System.out; System.setOut (new PrintStream (out)); InstanceList lst = new InstanceList (p); lst.addThruPipe (new ArrayIterator (new String[] { TestCRF.data[0], TestCRF.data[1], })); System.setOut (oldOut); assertEquals (spacePipeOutput, out.toString()); }
public void testTrainStochasticGradient() { Pipe p = makeSpacePredictionPipe(); Pipe p2 = new TestCRF2String(); InstanceList instances = new InstanceList(p); instances.addThruPipe(new ArrayIterator(data)); InstanceList[] lists = instances.split(new double[] { .5, .5 }); CRF crf = new CRF(p, p2); crf.addFullyConnectedStatesForLabels(); crf.setWeightsDimensionAsIn(lists[0], false); CRFTrainerByStochasticGradient crft = new CRFTrainerByStochasticGradient( crf, 0.0001); System.out.println("Training Accuracy before training = " + crf.averageTokenAccuracy(lists[0])); System.out.println("Testing Accuracy before training = " + crf.averageTokenAccuracy(lists[1])); System.out.println("Training..."); // either fixed learning rate or selected on a sample crft.setLearningRateByLikelihood(lists[0]); // crft.setLearningRate(0.01); crft.train(lists[0], 100); crf.print(); System.out.println("Training Accuracy after training = " + crf.averageTokenAccuracy(lists[0])); System.out.println("Testing Accuracy after training = " + crf.averageTokenAccuracy(lists[1])); }
public void testTrainStochasticGradient() { Pipe p = makeSpacePredictionPipe(); Pipe p2 = new TestCRF2String(); InstanceList instances = new InstanceList(p); instances.addThruPipe(new ArrayIterator(data)); InstanceList[] lists = instances.split(new double[] { .5, .5 }); CRF crf = new CRF(p, p2); crf.addFullyConnectedStatesForLabels(); crf.setWeightsDimensionAsIn(lists[0], false); CRFTrainerByStochasticGradient crft = new CRFTrainerByStochasticGradient( crf, 0.0001); System.out.println("Training Accuracy before training = " + crf.averageTokenAccuracy(lists[0])); System.out.println("Testing Accuracy before training = " + crf.averageTokenAccuracy(lists[1])); System.out.println("Training..."); // either fixed learning rate or selected on a sample crft.setLearningRateByLikelihood(lists[0]); // crft.setLearningRate(0.01); crft.train(lists[0], 100); crf.print(); System.out.println("Training Accuracy after training = " + crf.averageTokenAccuracy(lists[0])); System.out.println("Testing Accuracy after training = " + crf.averageTokenAccuracy(lists[1])); }
InstanceList one = new InstanceList(p); String[] data = new String[] { "ABCDE", }; one.addThruPipe(new ArrayIterator(data)); CRF crf = new CRF(p, null); crf.addFullyConnectedStatesForLabels();
public void testTokenAccuracy() { Pipe p = makeSpacePredictionPipe(); InstanceList instances = new InstanceList(p); instances.addThruPipe(new ArrayIterator(data)); InstanceList[] lists = instances.split(new Random(777), new double[] { .5, .5 }); CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet()); crf.addFullyConnectedStatesForLabels(); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); crft.setUseSparseWeights(true); crft.trainIncremental(lists[0]); TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists, new String[] { "Train", "Test" }); eval.evaluateInstanceList(crft, lists[1], "Test"); assertEquals(0.9409, eval.getAccuracy("Test"), 0.001); }
public void testTokenAccuracy() { Pipe p = makeSpacePredictionPipe(); InstanceList instances = new InstanceList(p); instances.addThruPipe(new ArrayIterator(data)); InstanceList[] lists = instances.split(new Random(777), new double[] { .5, .5 }); CRF crf = new CRF(p.getDataAlphabet(), p.getTargetAlphabet()); crf.addFullyConnectedStatesForLabels(); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); crft.setUseSparseWeights(true); crft.trainIncremental(lists[0]); TokenAccuracyEvaluator eval = new TokenAccuracyEvaluator(lists, new String[] { "Train", "Test" }); eval.evaluateInstanceList(crft, lists[1], "Test"); assertEquals(0.9409, eval.getAccuracy("Test"), 0.001); }