public static void addTextPipe(List<String> usedPipeNames, List<Pipe> pipes) throws Exception { usedPipeNames.add("Text"); pipes.add(new TokenText("text=")); }
public void testSpacePipe () { Pipe p = new SerialPipes (new Pipe[] { new CharSequence2TokenSequence ("."), new TokenSequenceLowercase (), new TestCRF.TestCRFTokenSequenceRemoveSpaces (), new TokenText (), new OffsetConjunctions (false, new int[][] {{0}, {1},{-1,0},{0,1}, {-2,-1,0}, {0,1,2}, {-3,-2,-1}, {1,2,3}, }), new PrintInputAndTarget(), }); // Print to a string ByteArrayOutputStream out = new ByteArrayOutputStream (); PrintStream oldOut = System.out; System.setOut (new PrintStream (out)); InstanceList lst = new InstanceList (p); lst.addThruPipe (new ArrayIterator (new String[] { TestCRF.data[0], TestCRF.data[1], })); System.setOut (oldOut); assertEquals (spacePipeOutput, out.toString()); }
public void testSpacePipe () { Pipe p = new SerialPipes (new Pipe[] { new CharSequence2TokenSequence ("."), new TokenSequenceLowercase (), new TestCRF.TestCRFTokenSequenceRemoveSpaces (), new TokenText (), new OffsetConjunctions (false, new int[][] {{0}, {1},{-1,0},{0,1}, {-2,-1,0}, {0,1,2}, {-3,-2,-1}, {1,2,3}, }), new PrintInputAndTarget(), }); // Print to a string ByteArrayOutputStream out = new ByteArrayOutputStream (); PrintStream oldOut = System.out; System.setOut (new PrintStream (out)); InstanceList lst = new InstanceList (p); lst.addThruPipe (new ArrayIterator (new String[] { TestCRF.data[0], TestCRF.data[1], })); System.setOut (oldOut); assertEquals (spacePipeOutput, out.toString()); }
public static Pipe makeSpacePredictionPipe () { Pipe p = new SerialPipes(new Pipe[]{ new CharSequence2TokenSequence("."), new TokenSequenceLowercase(), new TestMEMMTokenSequenceRemoveSpaces(), new TokenText(), new OffsetConjunctions(true, new int[][]{//{0}, /*{1},{-1,0},{0,1}, */ {1}, {-1, 0}, {0, 1}, // {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3}, //{-2,-1}, {-1,0}, {0,1}, {1,2}, //{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3}, }), // new PrintInputAndTarget(), new TokenSequence2FeatureVectorSequence() }); return p; }
public static Pipe makeSpacePredictionPipe () { Pipe p = new SerialPipes(new Pipe[]{ new CharSequence2TokenSequence("."), new TokenSequenceLowercase(), new TestMEMMTokenSequenceRemoveSpaces(), new TokenText(), new OffsetConjunctions(true, new int[][]{//{0}, /*{1},{-1,0},{0,1}, */ {1}, {-1, 0}, {0, 1}, // {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3}, //{-2,-1}, {-1,0}, {0,1}, {1,2}, //{-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3}, }), // new PrintInputAndTarget(), new TokenSequence2FeatureVectorSequence() }); return p; }
private Pipe makeSpacePredictionPipe() { Pipe p = new SerialPipes(new Pipe[] { new CharSequence2TokenSequence("."), new TokenSequenceLowercase(), new TestCRFTokenSequenceRemoveSpaces(), new TokenText(), new OffsetConjunctions(true, new int[][] { { 0 }, { 1 }, { -1, 0 }, // Original test had this conjunction in it too // {1},{-1,0},{0,1}, // {0, 1}, // I'd like to comment out this next line to make it run // faster, but then we'd need to adjust likelihood and // accuracy test values. -akm 12/2007 // TODO uncomment this line // {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3}, // (These were commented before...) // {-2,-1}, {-1,0}, {0,1}, {1,2}, // {-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3}, }), // new PrintInputAndTarget(), new TokenSequence2FeatureVectorSequence() }); return p; }
private Pipe makeSpacePredictionPipe() { Pipe p = new SerialPipes(new Pipe[] { new CharSequence2TokenSequence("."), new TokenSequenceLowercase(), new TestCRFTokenSequenceRemoveSpaces(), new TokenText(), new OffsetConjunctions(true, new int[][] { { 0 }, { 1 }, { -1, 0 }, // Original test had this conjunction in it too // {1},{-1,0},{0,1}, // {0, 1}, // I'd like to comment out this next line to make it run // faster, but then we'd need to adjust likelihood and // accuracy test values. -akm 12/2007 // TODO uncomment this line // {-2, -1, 0}, {0, 1, 2}, {-3, -2, -1}, {1, 2, 3}, // (These were commented before...) // {-2,-1}, {-1,0}, {0,1}, {1,2}, // {-3,-2,-1}, {-2,-1,0}, {-1,0,1}, {0,1,2}, {1,2,3}, }), // new PrintInputAndTarget(), new TokenSequence2FeatureVectorSequence() }); return p; }
public void testCopyStatesAndWeights() { Pipe p = new SerialPipes(new Pipe[] { new CharSequence2TokenSequence("."), new TokenText(), new TestCRFTokenSequenceRemoveSpaces(), new TokenSequence2FeatureVectorSequence(),
public void testCopyStatesAndWeights() { Pipe p = new SerialPipes(new Pipe[] { new CharSequence2TokenSequence("."), new TokenText(), new TestCRFTokenSequenceRemoveSpaces(), new TokenSequence2FeatureVectorSequence(),
public static void testMultiTagSerialization () throws IOException, ClassNotFoundException { Pipe origPipe = new SerialPipes (new Pipe[] { new SimpleTaggerSentence2TokenSequence (), new TokenText (), new RegexMatches ("digits", Pattern.compile ("[0-9]+")), new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")), new OffsetFeatureConjunction ("time", new String[] { "digits", "ampm" }, new int[] { 0, 1 }, true), new PrintInputAndTarget (), }); Pipe mtPipe = (Pipe) TestSerializable.cloneViaSerialization (origPipe); InstanceList mtLst = new InstanceList (mtPipe); mtLst.addThruPipe (new ArrayIterator (doc1)); Instance mtInst = mtLst.get (0); TokenSequence mtTs = (TokenSequence) mtInst.getData (); assertEquals (6, mtTs.size ()); assertEquals (1.0, mtTs.get (3).getFeatureValue ("time"), 1e-15); assertEquals (1.0, mtTs.get (4).getFeatureValue ("time"), 1e-15); }
public static void testMultiTagSerialization () throws IOException, ClassNotFoundException { Pipe origPipe = new SerialPipes (new Pipe[] { new SimpleTaggerSentence2TokenSequence (), new TokenText (), new RegexMatches ("digits", Pattern.compile ("[0-9]+")), new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")), new OffsetFeatureConjunction ("time", new String[] { "digits", "ampm" }, new int[] { 0, 1 }, true), new PrintInputAndTarget (), }); Pipe mtPipe = (Pipe) TestSerializable.cloneViaSerialization (origPipe); InstanceList mtLst = new InstanceList (mtPipe); mtLst.addThruPipe (new ArrayIterator (doc1)); Instance mtInst = mtLst.get (0); TokenSequence mtTs = (TokenSequence) mtInst.getData (); assertEquals (6, mtTs.size ()); assertEquals (1.0, mtTs.get (3).getFeatureValue ("time"), 1e-15); assertEquals (1.0, mtTs.get (4).getFeatureValue ("time"), 1e-15); }
public void testStartState() { Pipe p = new SerialPipes(new Pipe[] { new LineGroupString2TokenSequence(), new TokenSequenceMatchDataAndTarget(Pattern .compile("^(\\S+) (.*)"), 2, 1), new TokenSequenceParseFeatureString(false), new TokenText(), new TokenSequence2FeatureVectorSequence(true, false), new Target2LabelSequence(), new PrintInputAndTarget(), }); InstanceList data = new InstanceList(p); data.addThruPipe(new LineGroupIterator(new StringReader(toy), Pattern .compile("\n"), true)); CRF crf = new CRF(p, null); crf.print(); crf.addStatesForLabelsConnectedAsIn(data); crf.addStartState(); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); Optimizable.ByGradientValue maxable = crft.getOptimizableCRF(data); assertEquals(-1.3862, maxable.getValue(), 1e-4); crf = new CRF(p, null); crf .addOrderNStates(data, new int[] { 1 }, null, "A", null, null, false); crf.print(); crft = new CRFTrainerByLabelLikelihood(crf); maxable = crft.getOptimizableCRF(data); assertEquals(-3.09104245335831, maxable.getValue(), 1e-4); }
public void testStartState() { Pipe p = new SerialPipes(new Pipe[] { new LineGroupString2TokenSequence(), new TokenSequenceMatchDataAndTarget(Pattern .compile("^(\\S+) (.*)"), 2, 1), new TokenSequenceParseFeatureString(false), new TokenText(), new TokenSequence2FeatureVectorSequence(true, false), new Target2LabelSequence(), new PrintInputAndTarget(), }); InstanceList data = new InstanceList(p); data.addThruPipe(new LineGroupIterator(new StringReader(toy), Pattern .compile("\n"), true)); CRF crf = new CRF(p, null); crf.print(); crf.addStatesForLabelsConnectedAsIn(data); crf.addStartState(); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); Optimizable.ByGradientValue maxable = crft.getOptimizableCRF(data); assertEquals(-1.3862, maxable.getValue(), 1e-4); crf = new CRF(p, null); crf .addOrderNStates(data, new int[] { 1 }, null, "A", null, null, false); crf.print(); crft = new CRFTrainerByLabelLikelihood(crf); maxable = crft.getOptimizableCRF(data); assertEquals(-3.09104245335831, maxable.getValue(), 1e-4); }
public void testPrint() { Pipe p = new SerialPipes(new Pipe[] { new CharSequence2TokenSequence("."), new TokenText(), new TestCRFTokenSequenceRemoveSpaces(), new TokenSequence2FeatureVectorSequence(), new PrintInputAndTarget(), }); InstanceList one = new InstanceList(p); String[] data = new String[] { "ABCDE", }; one.addThruPipe(new ArrayIterator(data)); CRF crf = new CRF(p, null); crf.addFullyConnectedStatesForThreeQuarterLabels(one); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); crf.setWeightsDimensionAsIn(one, false); Optimizable mcrf = crft.getOptimizableCRF(one); double[] params = new double[mcrf.getNumParameters()]; for (int i = 0; i < params.length; i++) { params[i] = i; } mcrf.setParameters(params); crf.print(); }
new TokenText (), new RegexMatches ("digits", Pattern.compile ("[0-9]+")), new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")), Pipe noMtPipe = new SerialPipes (new Pipe[] { new SimpleTaggerSentence2TokenSequence (), new TokenText (), new RegexMatches ("digits", Pattern.compile ("[0-9]+")), new RegexMatches ("ampm", Pattern.compile ("[aApP][mM]")),
public void testPrint() { Pipe p = new SerialPipes(new Pipe[] { new CharSequence2TokenSequence("."), new TokenText(), new TestCRFTokenSequenceRemoveSpaces(), new TokenSequence2FeatureVectorSequence(), new PrintInputAndTarget(), }); InstanceList one = new InstanceList(p); String[] data = new String[] { "ABCDE", }; one.addThruPipe(new ArrayIterator(data)); CRF crf = new CRF(p, null); crf.addFullyConnectedStatesForThreeQuarterLabels(one); CRFTrainerByLabelLikelihood crft = new CRFTrainerByLabelLikelihood(crf); crf.setWeightsDimensionAsIn(one, false); Optimizable mcrf = crft.getOptimizableCRF(one); double[] params = new double[mcrf.getNumParameters()]; for (int i = 0; i < params.length; i++) { params[i] = i; } mcrf.setParameters(params); crf.print(); }
public void disabledtestPrint () { Pipe p = new SerialPipes (new Pipe[] { new CharSequence2TokenSequence("."), new TokenText(), new TestMEMM.TestMEMMTokenSequenceRemoveSpaces(), new TokenSequence2FeatureVectorSequence(), new PrintInputAndTarget(), }); InstanceList one = new InstanceList (p); String[] data = new String[] { "ABCDE", }; one.addThruPipe (new ArrayIterator (data)); MEMM crf = new MEMM (p, null); crf.addFullyConnectedStatesForLabels(); crf.setWeightsDimensionAsIn (one); MEMMTrainer memmt = new MEMMTrainer (crf); MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one); double[] params = new double[mcrf.getNumParameters()]; for (int i = 0; i < params.length; i++) { params [i] = i; } mcrf.setParameters (params); crf.print (); }
public void testOne () { String input = "abcdefghijklmnopqrstuvwxyz"; Pipe p = new SerialPipes (new Pipe[] { new CharSequence2TokenSequence ("."), //new PrintInput("1:"), new TokenSequenceLowercase (), //new PrintInput("2:"), new TokenText (), //new PrintInput("3:"), new RegexMatches ("V", Pattern.compile("[aeiou]")), //new PrintInput("4:"), new OffsetConjunctions (new int[][] {{0,0}, {0,1}, {-1,0,1}, {-1}, {-2}}), new PrintInput("5:"), }); Instance carrier = p.instanceFrom(new Instance (input, null, null, null)); TokenSequence ts = (TokenSequence) carrier.getData(); assertTrue (ts.size() == 26); assertTrue (ts.get(0).getFeatureValue("a_&_b@1") == 1.0); assertTrue (ts.get(0).getFeatureValue("V_&_a") == 1.0); assertTrue (ts.get(2).getFeatureValue("b@-1_&_c_&_d@1") == 1.0); }
public void disabledtestPrint () { Pipe p = new SerialPipes (new Pipe[] { new CharSequence2TokenSequence("."), new TokenText(), new TestMEMM.TestMEMMTokenSequenceRemoveSpaces(), new TokenSequence2FeatureVectorSequence(), new PrintInputAndTarget(), }); InstanceList one = new InstanceList (p); String[] data = new String[] { "ABCDE", }; one.addThruPipe (new ArrayIterator (data)); MEMM crf = new MEMM (p, null); crf.addFullyConnectedStatesForLabels(); crf.setWeightsDimensionAsIn (one); MEMMTrainer memmt = new MEMMTrainer (crf); MEMMTrainer.MEMMOptimizableByLabelLikelihood mcrf = memmt.getOptimizableMEMM(one); double[] params = new double[mcrf.getNumParameters()]; for (int i = 0; i < params.length; i++) { params [i] = i; } mcrf.setParameters (params); crf.print (); }
public void testOne () { String input = "abcdefghijklmnopqrstuvwxyz"; Pipe p = new SerialPipes (new Pipe[] { new CharSequence2TokenSequence ("."), //new PrintInput("1:"), new TokenSequenceLowercase (), //new PrintInput("2:"), new TokenText (), //new PrintInput("3:"), new RegexMatches ("V", Pattern.compile("[aeiou]")), //new PrintInput("4:"), new OffsetConjunctions (new int[][] {{0,0}, {0,1}, {-1,0,1}, {-1}, {-2}}), new PrintInput("5:"), }); Instance carrier = p.instanceFrom(new Instance (input, null, null, null)); TokenSequence ts = (TokenSequence) carrier.getData(); assertTrue (ts.size() == 26); assertTrue (ts.get(0).getFeatureValue("a_&_b@1") == 1.0); assertTrue (ts.get(0).getFeatureValue("V_&_a") == 1.0); assertTrue (ts.get(2).getFeatureValue("b@-1_&_c_&_d@1") == 1.0); }