/** * Verify that training and tagging does not cause * runtime problems. */ @Test public void testTreeInsertParserTraining() throws Exception { ObjectStream<Parse> parseSamples = ParserTestUtil.openTestTrainingData(); HeadRules headRules = ParserTestUtil.createTestHeadRules(); ParserModel model = Parser.train("eng", parseSamples, headRules, 100, 0); opennlp.tools.parser.Parser parser = ParserFactory.create(model); // Tests parsing to make sure the code does not has // a bug which fails always with a runtime exception parser.parse(Parse.parseParse("She was just another freighter from the " + "States and she seemed as commonplace as her name .")); // Test serializing and de-serializing model ByteArrayOutputStream outArray = new ByteArrayOutputStream(); model.serialize(outArray); outArray.close(); new ParserModel(new ByteArrayInputStream(outArray.toByteArray())); // TODO: compare both models } }
public void evaluate(ObjectStream<Parse> samples, int nFolds) throws IOException { CrossValidationPartitioner<Parse> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<Parse> trainingSampleStream = partitioner.next(); ParserModel model; if (ParserType.CHUNKING.equals(parserType)) { model = opennlp.tools.parser.chunking.Parser.train(languageCode, samples, rules, params); } else if (ParserType.TREEINSERT.equals(parserType)) { model = opennlp.tools.parser.treeinsert.Parser.train(languageCode, samples, rules, params); } else { throw new IllegalStateException("Unexpected parser type: " + parserType); } ParserEvaluator evaluator = new ParserEvaluator(ParserFactory.create(model), monitors); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); fmeasure.mergeInto(evaluator.getFMeasure()); } }
public static ParserModel train(String languageCode, ObjectStream<Parse> parseSamples, HeadRules rules, int iterations, int cut) throws IOException { TrainingParameters params = new TrainingParameters(); params.put("dict", TrainingParameters.CUTOFF_PARAM, cut); params.put("tagger", TrainingParameters.CUTOFF_PARAM, cut); params.put("tagger", TrainingParameters.ITERATIONS_PARAM, iterations); params.put("chunker", TrainingParameters.CUTOFF_PARAM, cut); params.put("chunker", TrainingParameters.ITERATIONS_PARAM, iterations); params.put("check", TrainingParameters.CUTOFF_PARAM, cut); params.put("check", TrainingParameters.ITERATIONS_PARAM, iterations); params.put("build", TrainingParameters.CUTOFF_PARAM, cut); params.put("build", TrainingParameters.ITERATIONS_PARAM, iterations); return train(languageCode, parseSamples, rules, params); }
model = opennlp.tools.parser.treeinsert.Parser.train(params.getLang(), sampleStream, rules, mlParams);
public void evaluate(ObjectStream<Parse> samples, int nFolds) throws IOException { CrossValidationPartitioner<Parse> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<Parse> trainingSampleStream = partitioner.next(); ParserModel model; if (ParserType.CHUNKING.equals(parserType)) { model = opennlp.tools.parser.chunking.Parser.train(languageCode, samples, rules, params); } else if (ParserType.TREEINSERT.equals(parserType)) { model = opennlp.tools.parser.treeinsert.Parser.train(languageCode, samples, rules, params); } else { throw new IllegalStateException("Unexpected parser type: " + parserType); } ParserEvaluator evaluator = new ParserEvaluator(ParserFactory.create(model), monitors); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); fmeasure.mergeInto(evaluator.getFMeasure()); } }
public void evaluate(ObjectStream<Parse> samples, int nFolds) throws IOException { CrossValidationPartitioner<Parse> partitioner = new CrossValidationPartitioner<>(samples, nFolds); while (partitioner.hasNext()) { CrossValidationPartitioner.TrainingSampleStream<Parse> trainingSampleStream = partitioner.next(); ParserModel model; if (ParserType.CHUNKING.equals(parserType)) { model = opennlp.tools.parser.chunking.Parser.train(languageCode, samples, rules, params); } else if (ParserType.TREEINSERT.equals(parserType)) { model = opennlp.tools.parser.treeinsert.Parser.train(languageCode, samples, rules, params); } else { throw new IllegalStateException("Unexpected parser type: " + parserType); } ParserEvaluator evaluator = new ParserEvaluator(ParserFactory.create(model), monitors); evaluator.evaluate(trainingSampleStream.getTestSampleStream()); fmeasure.mergeInto(evaluator.getFMeasure()); } }
public static ParserModel train(String languageCode, ObjectStream<Parse> parseSamples, HeadRules rules, int iterations, int cut) throws IOException { TrainingParameters params = new TrainingParameters(); params.put("dict", TrainingParameters.CUTOFF_PARAM, cut); params.put("tagger", TrainingParameters.CUTOFF_PARAM, cut); params.put("tagger", TrainingParameters.ITERATIONS_PARAM, iterations); params.put("chunker", TrainingParameters.CUTOFF_PARAM, cut); params.put("chunker", TrainingParameters.ITERATIONS_PARAM, iterations); params.put("check", TrainingParameters.CUTOFF_PARAM, cut); params.put("check", TrainingParameters.ITERATIONS_PARAM, iterations); params.put("build", TrainingParameters.CUTOFF_PARAM, cut); params.put("build", TrainingParameters.ITERATIONS_PARAM, iterations); return train(languageCode, parseSamples, rules, params); }
public static ParserModel train(String languageCode, ObjectStream<Parse> parseSamples, HeadRules rules, int iterations, int cut) throws IOException { TrainingParameters params = new TrainingParameters(); params.put("dict", TrainingParameters.CUTOFF_PARAM, cut); params.put("tagger", TrainingParameters.CUTOFF_PARAM, cut); params.put("tagger", TrainingParameters.ITERATIONS_PARAM, iterations); params.put("chunker", TrainingParameters.CUTOFF_PARAM, cut); params.put("chunker", TrainingParameters.ITERATIONS_PARAM, iterations); params.put("check", TrainingParameters.CUTOFF_PARAM, cut); params.put("check", TrainingParameters.ITERATIONS_PARAM, iterations); params.put("build", TrainingParameters.CUTOFF_PARAM, cut); params.put("build", TrainingParameters.ITERATIONS_PARAM, iterations); return train(languageCode, parseSamples, rules, params); }
model = opennlp.tools.parser.treeinsert.Parser.train(params.getLang(), sampleStream, rules, mlParams);
model = opennlp.tools.parser.treeinsert.Parser.train(params.getLang(), sampleStream, rules, mlParams);