private TokenNameFinderModel train(File trainFile, LANGUAGE lang, TrainingParameters params, int types) throws IOException { ObjectStream<NameSample> samples = new Conll02NameSampleStream( lang,new MarkableFileInputStreamFactory(trainFile), types); return NameFinderME.train(lang.toString().toLowerCase(), null, samples, params, new TokenNameFinderFactory()); }
private File trainModel() throws IOException { ObjectStream<String> lineStream = new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/AnnotatedSentencesWithTypes.txt")), StandardCharsets.ISO_8859_1); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel model; TokenNameFinderFactory nameFinderFactory = new TokenNameFinderFactory(); try (ObjectStream<NameSample> sampleStream = new NameSampleDataStream(lineStream)) { model = NameFinderME.train("eng", null, sampleStream, params, nameFinderFactory); } File modelFile = File.createTempFile("model", ".bin"); try (BufferedOutputStream modelOut = new BufferedOutputStream(new FileOutputStream(modelFile))) { model.serialize(modelOut); } return modelFile; }
params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("en", null, sampleStream, params, TokenNameFinderFactory.create(null, featureGeneratorString.getBytes(), resources, new BioCodec()));
model = opennlp.tools.namefind.NameFinderME.train( params.getLang(), params.getType(), sampleStream, mlParams, nameFinderFactory);
model = NameFinderME.train(languageCode, type, new DocumentToNameSampleStream(trainingSampleStream), params, factory); model = NameFinderME.train(languageCode, type, new DocumentToNameSampleStream(trainingSampleStream), params, TokenNameFinderFactory.create(null, featureGeneratorBytes, resources, new BioCodec()));
/** * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithNames() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNames.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, DEFAULT), names1[0]); Assert.assertEquals(new Span(2, 4, DEFAULT), names1[1]); Assert.assertEquals(new Span(4, 6, DEFAULT), names1[2]); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
@Test public void testOnlyWithNamesTypeOverride() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNames.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", TYPE_OVERRIDE, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, TYPE_OVERRIDE), names1[0]); Assert.assertEquals(new Span(2, 4, TYPE_OVERRIDE), names1[1]); Assert.assertEquals(new Span(4, 6, TYPE_OVERRIDE), names1[2]); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
/** * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithEntitiesWithTypes() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT"); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = "NATO United States Barack Obama".split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 1, "organization"), names1[0]); // NATO Assert.assertEquals(new Span(1, 3, "location"), names1[1]); // United States Assert.assertEquals("person", names1[2].getType()); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
/** * Train NamefinderME using OnlyWithNamesWithTypes.train. * The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithNamesWithTypes() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, "person"), names1[0]); Assert.assertEquals(new Span(2, 4, "person"), names1[1]); Assert.assertEquals(new Span(4, 6, "person"), names1[2]); Assert.assertEquals("person", names1[2].getType()); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
private TokenNameFinderModel train(List<NameSample> aNameSamples, TrainingParameters aParameters) throws RecommendationException { try (NameSampleStream stream = new NameSampleStream(aNameSamples)) { TokenNameFinderFactory finderFactory = new TokenNameFinderFactory(); return NameFinderME.train("unknown", null, stream, aParameters, finderFactory); } catch (IOException e) { LOG.error("Exception during training the OpenNLP Named Entity Recognizer model.", e); throw new RecommendationException("Error while training OpenNLP pos", e); } } }
params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
public final TokenNameFinderModel train(final TrainingParameters params) { if (getNameClassifierFactory() == null) { throw new IllegalStateException( "Classes derived from AbstractNameFinderTrainer must create and fill the AdaptiveFeatureGenerator features!"); } TokenNameFinderModel trainedModel = null; TokenNameFinderEvaluator nerEvaluator = null; try { trainedModel = NameFinderME.train(lang, null, trainSamples, params, nameClassifierFactory); NameFinderME nerTagger = new NameFinderME(trainedModel); nerEvaluator = new TokenNameFinderEvaluator(nerTagger); nerEvaluator.evaluate(testSamples); } catch (IOException e) { System.err.println("IO error while loading traing and test sets!"); e.printStackTrace(); System.exit(1); } System.out.println("Final Result: \n" + nerEvaluator.getFMeasure()); return trainedModel; }
return NameFinderME.train(language, null, stream, params, new TokenNameFinderFactory(featureGenCfg, Collections.<String, Object>emptyMap(),
TokenNameFinderModel tokenNameFinderModel = NameFinderME.train("en", slot, combinedNameSampleStream, trainingParams, new TokenNameFinderFactory()); combinedNameSampleStream.close(); tokenNameFinderModels.add(tokenNameFinderModel);
model = NameFinderME.train(languageCode, type, new DocumentToNameSampleStream(trainingSampleStream), params, factory); model = NameFinderME.train(languageCode, type, new DocumentToNameSampleStream(trainingSampleStream), params, TokenNameFinderFactory.create(null, featureGeneratorBytes, resources, new BioCodec()));
model = opennlp.tools.namefind.NameFinderME.train( params.getLang(), params.getType(), sampleStream, mlParams, nameFinderFactory);
model = opennlp.tools.namefind.NameFinderME.train( params.getLang(), params.getType(), sampleStream, mlParams, nameFinderFactory);
model = NameFinderME.train(languageCode, type, new DocumentToNameSampleStream(trainingSampleStream), params, factory); model = NameFinderME.train(languageCode, type, new DocumentToNameSampleStream(trainingSampleStream), params, TokenNameFinderFactory.create(null, featureGeneratorBytes, resources, new BioCodec()));