/** * Creates a {@link TokenNameFinderFactory} that provides the default implementation * of the resources. */ public TokenNameFinderFactory() { this.seqCodec = new BioCodec(); }
public TokenNameFinderCrossValidator(String languageCode, String type, TrainingParameters trainParams, byte[] featureGeneratorBytes, Map<String, Object> resources, TokenNameFinderEvaluationMonitor... listeners) { this(languageCode, type, trainParams, featureGeneratorBytes, resources, new BioCodec(), listeners); }
public NameSampleSequenceStream(ObjectStream<NameSample> psi, NameContextGenerator pcg, boolean useOutcomes) throws IOException { this(psi, pcg, useOutcomes, new BioCodec()); }
public static SequenceCodec<String> instantiateSequenceCodec( String sequenceCodecImplName) { if (sequenceCodecImplName != null) { return ExtensionLoader.instantiateExtension( SequenceCodec.class, sequenceCodecImplName); } else { // If nothing is specified return old default! return new BioCodec(); } } }
public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel, byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) { this(languageCode, nameFinderModel, NameFinderME.DEFAULT_BEAM_SIZE, generatorDescriptor, resources, manifestInfoEntries, new BioCodec(), new TokenNameFinderFactory()); }
/** * Creates a new name finder event stream using the specified data stream and context generator. * @param dataStream The data stream of events. * @param type null or overrides the type parameter in the provided samples * @param contextGenerator The context generator used to generate features for the event stream. */ public NameFinderEventStream(ObjectStream<NameSample> dataStream, String type, NameContextGenerator contextGenerator, SequenceCodec<String> codec) { super(dataStream); this.codec = codec; if (codec == null) { this.codec = new BioCodec(); } this.contextGenerator = contextGenerator; this.contextGenerator.addFeatureGenerator( new WindowFeatureGenerator(additionalContextFeatureGenerator, 8, 8)); this.defaultType = type; }
/** * Creates a {@link TokenNameFinderFactory} that provides the default implementation * of the resources. */ public TokenNameFinderFactory() { this.seqCodec = new BioCodec(); }
featureGeneratorString.getBytes(), resources, new BioCodec()));
public TokenNameFinderCrossValidator(String languageCode, String type, TrainingParameters trainParams, byte[] featureGeneratorBytes, Map<String, Object> resources, TokenNameFinderEvaluationMonitor... listeners) { this(languageCode, type, trainParams, featureGeneratorBytes, resources, new BioCodec(), listeners); }
@Test public void testOnlyWithNamesTypeOverride() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNames.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", TYPE_OVERRIDE, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, TYPE_OVERRIDE), names1[0]); Assert.assertEquals(new Span(2, 4, TYPE_OVERRIDE), names1[1]); Assert.assertEquals(new Span(4, 6, TYPE_OVERRIDE), names1[2]); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
/** * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithNames() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNames.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, DEFAULT), names1[0]); Assert.assertEquals(new Span(2, 4, DEFAULT), names1[1]); Assert.assertEquals(new Span(4, 6, DEFAULT), names1[2]); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
public TokenNameFinderModel(String languageCode, MaxentModel nameFinderModel, byte[] generatorDescriptor, Map<String, Object> resources, Map<String, String> manifestInfoEntries) { this(languageCode, nameFinderModel, NameFinderME.DEFAULT_BEAM_SIZE, generatorDescriptor, resources, manifestInfoEntries, new BioCodec(), new TokenNameFinderFactory()); }
/** * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithEntitiesWithTypes() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT"); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = "NATO United States Barack Obama".split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 1, "organization"), names1[0]); // NATO Assert.assertEquals(new Span(1, 3, "location"), names1[1]); // United States Assert.assertEquals("person", names1[2].getType()); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
public static SequenceCodec<String> instantiateSequenceCodec( String sequenceCodecImplName) { if (sequenceCodecImplName != null) { return ExtensionLoader.instantiateExtension( SequenceCodec.class, sequenceCodecImplName); } else { // If nothing is specified return old default! return new BioCodec(); } } }
/** * Train NamefinderME using OnlyWithNamesWithTypes.train. * The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithNamesWithTypes() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, "person"), names1[0]); Assert.assertEquals(new Span(2, 4, "person"), names1[1]); Assert.assertEquals(new Span(4, 6, "person"), names1[2]); Assert.assertEquals("person", names1[2].getType()); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
params, TokenNameFinderFactory.create(null, featureGeneratorBytes, resources, new BioCodec()));
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
@Override public Span[] find(String[] sentence) { if (sentence.length > 0) { TokenIds tokenIds = wordIndexer.toTokenIds(sentence); return new BioCodec().decode(Arrays.asList(predict(tokenIds)[0])); } else { return new Span[0]; } }
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));