params, TokenNameFinderFactory.create(null, featureGeneratorString.getBytes(), resources, new BioCodec()));
nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(), featureGeneratorBytes, resources, sequenceCodec); } catch (InvalidFormatException e) {
nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(), featureGeneratorBytes, resources, sequenceCodec); } catch (InvalidFormatException e) {
/** * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithNames() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNames.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, DEFAULT), names1[0]); Assert.assertEquals(new Span(2, 4, DEFAULT), names1[1]); Assert.assertEquals(new Span(4, 6, DEFAULT), names1[2]); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
@Test public void testOnlyWithNamesTypeOverride() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNames.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", TYPE_OVERRIDE, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, TYPE_OVERRIDE), names1[0]); Assert.assertEquals(new Span(2, 4, TYPE_OVERRIDE), names1[1]); Assert.assertEquals(new Span(4, 6, TYPE_OVERRIDE), names1[2]); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
/** * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithEntitiesWithTypes() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT"); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = "NATO United States Barack Obama".split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 1, "organization"), names1[0]); // NATO Assert.assertEquals(new Span(1, 3, "location"), names1[1]); // United States Assert.assertEquals("person", names1[2].getType()); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
/** * Train NamefinderME using OnlyWithNamesWithTypes.train. * The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithNamesWithTypes() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, "person"), names1[0]); Assert.assertEquals(new Span(2, 4, "person"), names1[1]); Assert.assertEquals(new Span(4, 6, "person"), names1[2]); Assert.assertEquals("person", names1[2].getType()); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
private void createNameFactory(TrainingParameters params) throws IOException { String featureDescription = XMLFeatureDescriptor .createXMLFeatureDescriptor(params); System.err.println(featureDescription); byte[] featureGeneratorBytes = featureDescription.getBytes(Charset .forName("UTF-8")); Map<String, Object> resources = FixedTrainer.loadResources(params, featureGeneratorBytes); this.nameClassifierFactory = TokenNameFinderFactory.create( TokenNameFinderFactory.class.getName(), featureGeneratorBytes, resources, sequenceCodec); }
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
params, TokenNameFinderFactory.create(null, featureGeneratorBytes, resources, new BioCodec()));
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
/** * Create {@code TokenNameFinderFactory} with custom features. * * @param params * the parameter training file * @throws IOException if io error */ public void createTrainer(TrainingParameters params) throws IOException { String seqCodec = getSequenceCodec(); SequenceCodec<String> sequenceCodec = TokenNameFinderFactory .instantiateSequenceCodec(seqCodec); String featureDescription = XMLFeatureDescriptor .createXMLFeatureDescriptor(params); System.err.println(featureDescription); byte[] featureGeneratorBytes = featureDescription.getBytes(Charset .forName("UTF-8")); Map<String, Object> resources = loadResources(params, featureGeneratorBytes); setNameClassifierFactory(TokenNameFinderFactory.create( TokenNameFinderFactory.class.getName(), featureGeneratorBytes, resources, sequenceCodec)); }
nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(), featureGeneratorBytes, resources, sequenceCodec); } catch (InvalidFormatException e) {
nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(), featureGeneratorBytes, resources, sequenceCodec); } catch (InvalidFormatException e) {
nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(), featureGeneratorBytes, resources, sequenceCodec); } catch (InvalidFormatException e) {
nameFinderFactory = TokenNameFinderFactory.create(params.getFactory(), featureGeneratorBytes, resources, sequenceCodec); } catch (InvalidFormatException e) {
params, TokenNameFinderFactory.create(null, featureGeneratorBytes, resources, new BioCodec()));
params, TokenNameFinderFactory.create(null, featureGeneratorBytes, resources, new BioCodec()));