@Before public void initIndexer() { TrainingParameters trainingParameters = new TrainingParameters(); trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1); testDataIndexer = new OnePassRealValueDataIndexer(); testDataIndexer.init(trainingParameters, new HashMap<>()); }
@Before public void initIndexer() { TrainingParameters trainingParameters = new TrainingParameters(); trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 0); testDataIndexer = new OnePassRealValueDataIndexer(); testDataIndexer.init(trainingParameters, new HashMap<>()); }
@Before public void initIndexer() { TrainingParameters trainingParameters = new TrainingParameters(); trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1); testDataIndexer = new OnePassRealValueDataIndexer(); testDataIndexer.init(trainingParameters, new HashMap<>()); }
@Before public void initIndexer() { TrainingParameters trainingParameters = new TrainingParameters(); trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1); trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false);; testDataIndexer = new TwoPassDataIndexer(); testDataIndexer.init(trainingParameters, new HashMap<>()); }
@Before public void initIndexer() { TrainingParameters trainingParameters = new TrainingParameters(); trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1); trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false);; testDataIndexer = new TwoPassDataIndexer(); testDataIndexer.init(trainingParameters, new HashMap<>()); }
@Before public void initIndexer() { TrainingParameters trainingParameters = new TrainingParameters(); trainingParameters.put(AbstractTrainer.CUTOFF_PARAM, 1); trainingParameters.put(AbstractDataIndexer.SORT_PARAM, false); testDataIndexer = new TwoPassDataIndexer(); testDataIndexer.init(trainingParameters, new HashMap<>()); }
@Before public void setup() { mlParams = new TrainingParameters(); mlParams.put(TrainingParameters.ALGORITHM_PARAM, GISTrainer.MAXENT_VALUE); mlParams.put(TrainingParameters.ITERATIONS_PARAM, 10); mlParams.put(TrainingParameters.CUTOFF_PARAM, 5); }
@Test(expected = InsufficientTrainingDataException.class) public void testInsufficientData() throws IOException { ObjectStream<LemmaSample> sampleStream = new LemmaSampleStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/lemmatizer/trial.old-insufficient.tsv")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); LemmatizerME.train("eng", sampleStream, params, new LemmatizerFactory()); }
public static LanguageDetectorModel trainModel(LanguageDetectorFactory factory) throws Exception { LanguageDetectorSampleStream sampleStream = createSampleStream(); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); params.put("DataIndexer", "TwoPass"); params.put(TrainingParameters.ALGORITHM_PARAM, "NAIVEBAYES"); return LanguageDetectorME.train(sampleStream, params, factory); }
@Test(expected = InsufficientTrainingDataException.class) public void testInsufficientData() throws IOException { ResourceAsStreamFactory in = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/chunker/test-insufficient.txt"); ObjectStream<ChunkSample> sampleStream = new ChunkSampleStream( new PlainTextByLineStream(in, StandardCharsets.UTF_8)); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); ChunkerME.train("eng", sampleStream, params, new ChunkerFactory()); }
@Test(expected = InsufficientTrainingDataException.class) public void testInsufficientData() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(getClass(), "/opennlp/tools/sentdetect/SentencesInsufficient.txt"); TrainingParameters mlParams = new TrainingParameters(); mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100); mlParams.put(TrainingParameters.CUTOFF_PARAM, 0); SentenceDetectorFactory factory = new SentenceDetectorFactory("eng", true, null, null); SentenceDetectorME.train("eng", new SentenceSampleStream( new PlainTextByLineStream(in, StandardCharsets.UTF_8)), factory, mlParams); }
@Test public void testMaxentOnPrepAttachDataWithParamsLLThreshold() throws IOException { TrainingParameters trainParams = new TrainingParameters(); trainParams.put(AbstractTrainer.ALGORITHM_PARAM, GISTrainer.MAXENT_VALUE); trainParams.put(GISTrainer.LOG_LIKELIHOOD_THRESHOLD_PARAM, 5.); EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, null); MaxentModel model = trainer.train(PrepAttachDataUtil.createTrainingStream()); PrepAttachDataUtil.testModel(model, 0.8103490963109681 ); } }
@Test public void testQNOnPrepAttachDataInParallel() throws IOException { TrainingParameters trainParams = new TrainingParameters(); trainParams.put(AbstractTrainer.ALGORITHM_PARAM, QNTrainer.MAXENT_QN_VALUE); trainParams.put(QNTrainer.THREADS_PARAM, 2); MaxentModel model = TrainerFactory.getEventTrainer(trainParams, null) .train(PrepAttachDataUtil.createTrainingStream()); PrepAttachDataUtil.testModel(model, 0.8115870264917059); } }
@Test public void testMaxentOnPrepAttachDataWithParams() throws IOException { TrainingParameters trainParams = new TrainingParameters(); trainParams.put(AbstractTrainer.ALGORITHM_PARAM, GISTrainer.MAXENT_VALUE); trainParams.put(AbstractEventTrainer.DATA_INDEXER_PARAM, AbstractEventTrainer.DATA_INDEXER_TWO_PASS_VALUE); trainParams.put(AbstractTrainer.CUTOFF_PARAM, 1); EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, null); MaxentModel model = trainer.train(PrepAttachDataUtil.createTrainingStream()); PrepAttachDataUtil.testModel(model, 0.7997028967566229); }
@Test public void testPerceptronOnPrepAttachDataWithSkippedAveraging() throws IOException { TrainingParameters trainParams = new TrainingParameters(); trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE); trainParams.put(AbstractTrainer.CUTOFF_PARAM, 1); trainParams.put("UseSkippedAveraging", true); EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, null); MaxentModel model = trainer.train(PrepAttachDataUtil.createTrainingStream()); PrepAttachDataUtil.testModel(model, 0.773706362961129); }
static TokenizerModel createMaxentTokenModel() throws IOException { InputStreamFactory trainDataIn = new ResourceAsStreamFactory( TokenizerModel.class, "/opennlp/tools/tokenize/token.train"); ObjectStream<TokenSample> samples = new TokenSampleStream( new PlainTextByLineStream(trainDataIn, StandardCharsets.UTF_8)); TrainingParameters mlParams = new TrainingParameters(); mlParams.put(TrainingParameters.ITERATIONS_PARAM, 100); mlParams.put(TrainingParameters.CUTOFF_PARAM, 0); return TokenizerME.train(samples, TokenizerFactory.create(null, "eng", null, true, null), mlParams); }
@Test public void testNaiveBayesOnPrepAttachDataUsingTrainUtil() throws IOException { TrainingParameters trainParams = new TrainingParameters(); trainParams.put(AbstractTrainer.ALGORITHM_PARAM, NaiveBayesTrainer.NAIVE_BAYES_VALUE); trainParams.put(AbstractTrainer.CUTOFF_PARAM, 1); EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, null); MaxentModel model = trainer.train(PrepAttachDataUtil.createTrainingStream()); Assert.assertTrue(model instanceof NaiveBayesModel); PrepAttachDataUtil.testModel(model, 0.7897994553107205); }
@BeforeClass public static void train() throws Exception { ResourceAsStreamFactory streamFactory = new ResourceAsStreamFactory( LanguageDetectorMETest.class, "/opennlp/tools/doccat/DoccatSample.txt"); PlainTextByLineStream lineStream = new PlainTextByLineStream(streamFactory, "UTF-8"); LanguageDetectorSampleStream sampleStream = new LanguageDetectorSampleStream(lineStream); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, "100"); params.put(TrainingParameters.CUTOFF_PARAM, "5"); params.put(TrainingParameters.ALGORITHM_PARAM, "NAIVEBAYES"); model = LanguageDetectorME.train(sampleStream, params, new DummyFactory()); }
@Test(expected = InsufficientTrainingDataException.class) public void insufficientTestData() throws IOException { InputStreamFactory in = new ResourceAsStreamFactory(POSTaggerMETest.class, "/opennlp/tools/postag/AnnotatedSentencesInsufficient.txt"); ObjectStream<POSSample> stream = new WordTagSampleStream( new PlainTextByLineStream(in, StandardCharsets.UTF_8)); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ALGORITHM_PARAM, ModelType.MAXENT.name()); params.put(TrainingParameters.ITERATIONS_PARAM, 100); params.put(TrainingParameters.CUTOFF_PARAM, 5); POSTaggerME.train("eng", stream, params, new POSTaggerFactory()); }
@Test public void testModelEquals() throws IOException { TrainingParameters trainParams = new TrainingParameters(); trainParams.put(AbstractTrainer.ALGORITHM_PARAM, PerceptronTrainer.PERCEPTRON_VALUE); trainParams.put(AbstractTrainer.CUTOFF_PARAM, 1); trainParams.put("UseSkippedAveraging", true); EventTrainer trainer = TrainerFactory.getEventTrainer(trainParams, null); AbstractModel modelA = (AbstractModel) trainer.train(PrepAttachDataUtil.createTrainingStream()); AbstractModel modelB = (AbstractModel) trainer.train(PrepAttachDataUtil.createTrainingStream()); Assert.assertEquals(modelA, modelB); Assert.assertEquals(modelA.hashCode(), modelB.hashCode()); }