public Span[] find(String[] tokens) { return find(tokens, EMPTY); }
public void getAllNameEntitiesfromInput(InputStream stream) throws IOException { String[] in = IOUtils.toString(stream, UTF_8).split(" "); Span nameE[]; //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind synchronized (nameFinder) { nameE = nameFinder.find(in); //the same name finder is reused, so clear adaptive data nameFinder.clearAdaptiveData(); } String spanNames = Arrays.toString(Span.spansToStrings(nameE, in)); spanNames = spanNames.substring(1, spanNames.length() - 1); String[] tmp = spanNames.split(","); for (String name : tmp) { name = name.trim(); this.locationNameEntities.add(name); } }
/** * finds names from given array of tokens * @param tokens the tokens array * @return map of EntityType -> set of entity names */ public Map<String, Set<String>> findNames(String[] tokens) { Span[] nameSpans = nameFinder.find(tokens); String[] names = Span.spansToStrings(nameSpans, tokens); Map<String, Set<String>> result = new HashMap<>(); if (names != null && names.length > 0) { result.put(nameType, new HashSet<>(Arrays.asList(names))); } nameFinder.clearAdaptiveData(); return result; } }
protected Span[] find(CAS cas, String[] tokens) { Span[] names = mNameFinder.find(tokens); double[] probs = mNameFinder.probs(); for (double prob : probs) { documentConfidence.add(prob); } return names; }
public Span[] find(String[] tokens) { return find(tokens, EMPTY); }
public Span[] find(String[] tokens) { return find(tokens, EMPTY); }
@Test public void testOnlyWithNamesTypeOverride() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNames.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", TYPE_OVERRIDE, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, TYPE_OVERRIDE), names1[0]); Assert.assertEquals(new Span(2, 4, TYPE_OVERRIDE), names1[1]); Assert.assertEquals(new Span(4, 6, TYPE_OVERRIDE), names1[2]); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
/** * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithNames() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNames.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, DEFAULT), names1[0]); Assert.assertEquals(new Span(2, 4, DEFAULT), names1[1]); Assert.assertEquals(new Span(4, 6, DEFAULT), names1[2]); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
/** * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithEntitiesWithTypes() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT"); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = "NATO United States Barack Obama".split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 1, "organization"), names1[0]); // NATO Assert.assertEquals(new Span(1, 3, "location"), names1[1]); // United States Assert.assertEquals("person", names1[2].getType()); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
/** * Train NamefinderME using OnlyWithNamesWithTypes.train. * The goal is to check if the model validator accepts it. * This is related to the issue OPENNLP-9 */ @Test public void testOnlyWithNamesWithTypes() throws Exception { // train the name finder ObjectStream<NameSample> sampleStream = new NameSampleDataStream( new PlainTextByLineStream(new MockInputStreamFactory( new File("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")), "UTF-8")); TrainingParameters params = new TrainingParameters(); params.put(TrainingParameters.ITERATIONS_PARAM, 70); params.put(TrainingParameters.CUTOFF_PARAM, 1); TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream, params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec())); NameFinderME nameFinder = new NameFinderME(nameFinderModel); // now test if it can detect the sample sentences String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " + "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+"); Span[] names1 = nameFinder.find(sentence); Assert.assertEquals(new Span(0, 2, "person"), names1[0]); Assert.assertEquals(new Span(2, 4, "person"), names1[1]); Assert.assertEquals(new Span(4, 6, "person"), names1[2]); Assert.assertEquals("person", names1[2].getType()); Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel)); }
/** * This method receives as input an array of tokenized text and calls the * NameFinderME.find(tokens) to recognize and classify Named Entities. It * outputs the spans of the detected and classified Named Entities. * * From Apache OpenNLP documentation: "After every document clearAdaptiveData * must be called to clear the adaptive data in the feature generators. Not * calling clearAdaptiveData can lead to a sharp drop in the detection rate * after a few documents." * * @param tokens * an array of tokenized text * @return an list of {@link Span}s of Named Entities */ public final Span[] nercToSpans(final String[] tokens) { Span[] annotatedText = nameFinder.find(tokens); List<Span> probSpans = new ArrayList<Span>(Arrays.asList(annotatedText)); return probSpans.toArray(new Span[probSpans.size()]); }
public void getAllNameEntitiesfromInput(InputStream stream) throws IOException { String[] in = IOUtils.toString(stream, UTF_8).split(" "); Span nameE[]; //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind synchronized (nameFinder) { nameE = nameFinder.find(in); //the same name finder is reused, so clear adaptive data nameFinder.clearAdaptiveData(); } String spanNames = Arrays.toString(Span.spansToStrings(nameE, in)); spanNames = spanNames.substring(1, spanNames.length() - 1); String[] tmp = spanNames.split(","); for (String name : tmp) { name = name.trim(); this.locationNameEntities.add(name); } }
/** * finds names from given array of tokens * @param tokens the tokens array * @return map of EntityType -> set of entity names */ public Map<String, Set<String>> findNames(String[] tokens) { Span[] nameSpans = nameFinder.find(tokens); String[] names = Span.spansToStrings(nameSpans, tokens); Map<String, Set<String>> result = new HashMap<>(); if (names != null && names.length > 0) { result.put(nameType, new HashSet<>(Arrays.asList(names))); } nameFinder.clearAdaptiveData(); return result; } }
public void getAllNameEntitiesfromInput(InputStream stream) throws IOException { String[] in = IOUtils.toString(stream, UTF_8).split(" "); Span nameE[]; //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind synchronized (nameFinder) { nameE = nameFinder.find(in); //the same name finder is reused, so clear adaptive data nameFinder.clearAdaptiveData(); } String spanNames = Arrays.toString(Span.spansToStrings(nameE, in)); spanNames = spanNames.substring(1, spanNames.length() - 1); String[] tmp = spanNames.split(","); for (String name : tmp) { name = name.trim(); this.locationNameEntities.add(name); } }
/** * finds names from given array of tokens * @param tokens the tokens array * @return map of EntityType -> set of entity names */ public Map<String, Set<String>> findNames(String[] tokens) { Span[] nameSpans = nameFinder.find(tokens); String[] names = Span.spansToStrings(nameSpans, tokens); Map<String, Set<String>> result = new HashMap<>(); if (names != null && names.length > 0) { result.put(nameType, new HashSet<>(Arrays.asList(names))); } nameFinder.clearAdaptiveData(); return result; } }
/** * Identify all occuring names * * @param view * view to identify names from * @return span of names wrt token indices */ private Span[] identifyName(JCas view) { NameFinderME nameFinder = new NameFinderME(model); // get all tokens in given view Collection<Token> tokens = JCasUtil.select(view, Token.class); String[] tokenStr = new String[tokens.size()]; int i = 0; for (Iterator<Token> iter = tokens.iterator(); iter.hasNext();) { tokenStr[i++] = iter.next().getCoveredText(); } Span nameSpans[] = nameFinder.find(tokenStr); return nameSpans; }
protected Span[] find(CAS cas, String[] tokens) { Span[] names = mNameFinder.find(tokens); double[] probs = mNameFinder.probs(); for (double prob : probs) { documentConfidence.add(prob); } return names; }
public Map<String, Set<String>> tokenize(String content) { Map<String, Set<String>> namedEntities = Maps.newHashMap(); List<TextAnnotation> allTextAnnotations = new ArrayList<TextAnnotation>(); String[] tokens = SimpleTokenizer.INSTANCE.tokenize(content); for (Map.Entry<String, TokenNameFinderModel> finderEntry : finders.entrySet()) { String type = finderEntry.getKey(); NameFinderME finder = new NameFinderME(finderEntry.getValue()); Span[] spans = finder.find(tokens); double[] probs = finder.probs(spans); for (int ni = 0; ni < spans.length; ni++) { allTextAnnotations.add(new TextAnnotation(type, spans[ni], probs[ni])); } } if (allTextAnnotations.size() > 0 ) { removeConflicts(allTextAnnotations); } convertTextAnnotationsToNamedEntities(tokens, allTextAnnotations, namedEntities); return namedEntities; }