opennlp.tools.namefind.NameFinderME.find java code examples

public Span[] find(String[] tokens) {
 return find(tokens, EMPTY);
}

public void getAllNameEntitiesfromInput(InputStream stream) throws IOException {
  String[] in = IOUtils.toString(stream, UTF_8).split(" ");
  Span nameE[];
  
  //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind
  synchronized (nameFinder) {
    nameE = nameFinder.find(in);
    //the same name finder is reused, so clear adaptive data
    nameFinder.clearAdaptiveData();
  }
  String spanNames = Arrays.toString(Span.spansToStrings(nameE, in));
  spanNames = spanNames.substring(1, spanNames.length() - 1);
  String[] tmp = spanNames.split(",");
  for (String name : tmp) {
    name = name.trim();
    this.locationNameEntities.add(name);
  }
}

  /**
   * finds names from given array of tokens
   * @param tokens the tokens array
   * @return map of EntityType -&gt; set of entity names
   */
  public Map<String, Set<String>> findNames(String[] tokens) {
    Span[] nameSpans = nameFinder.find(tokens);
    String[] names = Span.spansToStrings(nameSpans, tokens);
    Map<String, Set<String>> result = new HashMap<>();
    if (names != null && names.length > 0) {
      result.put(nameType, new HashSet<>(Arrays.asList(names)));
    }
    nameFinder.clearAdaptiveData();
    return result;
  }
}

protected Span[] find(CAS cas, String[] tokens) {
 Span[] names = mNameFinder.find(tokens);
 double[] probs = mNameFinder.probs();
 for (double prob : probs) {
  documentConfidence.add(prob);
 }
 return names;
}

public Span[] find(String[] tokens) {
 return find(tokens, EMPTY);
}

public Span[] find(String[] tokens) {
 return find(tokens, EMPTY);
}

@Test
public void testOnlyWithNamesTypeOverride() throws Exception {
 // train the name finder
 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
   new PlainTextByLineStream(new MockInputStreamFactory(
    new File("opennlp/tools/namefind/OnlyWithNames.train")), "UTF-8"));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
 params.put(TrainingParameters.CUTOFF_PARAM, 1);
 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", TYPE_OVERRIDE, sampleStream,
   params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
 // now test if it can detect the sample sentences
 String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " +
   "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+");
 Span[] names1 = nameFinder.find(sentence);
 Assert.assertEquals(new Span(0, 2, TYPE_OVERRIDE), names1[0]);
 Assert.assertEquals(new Span(2, 4, TYPE_OVERRIDE), names1[1]);
 Assert.assertEquals(new Span(4, 6, TYPE_OVERRIDE), names1[2]);
 Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel));
}

/**
 * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it.
 * This is related to the issue OPENNLP-9
 */
@Test
public void testOnlyWithNames() throws Exception {
 // train the name finder
 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
     new PlainTextByLineStream(new MockInputStreamFactory(
      new File("opennlp/tools/namefind/OnlyWithNames.train")), "UTF-8"));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
 params.put(TrainingParameters.CUTOFF_PARAM, 1);
 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
     params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
 // now test if it can detect the sample sentences
 String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " +
     "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+");
 Span[] names1 = nameFinder.find(sentence);
 Assert.assertEquals(new Span(0, 2, DEFAULT), names1[0]);
 Assert.assertEquals(new Span(2, 4, DEFAULT), names1[1]);
 Assert.assertEquals(new Span(4, 6, DEFAULT), names1[2]);
 Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel));
}

/**
 * Train NamefinderME using OnlyWithNames.train. The goal is to check if the model validator accepts it.
 * This is related to the issue OPENNLP-9
 */
@Test
public void testOnlyWithEntitiesWithTypes() throws Exception {
 // train the name finder
 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
   new PlainTextByLineStream(new MockInputStreamFactory(
    new File("opennlp/tools/namefind/OnlyWithEntitiesWithTypes.train")), "UTF-8"));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ALGORITHM_PARAM, "MAXENT");
 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
 params.put(TrainingParameters.CUTOFF_PARAM, 1);
 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
   params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
 // now test if it can detect the sample sentences
 String[] sentence = "NATO United States Barack Obama".split("\\s+");
 Span[] names1 = nameFinder.find(sentence);
 Assert.assertEquals(new Span(0, 1, "organization"), names1[0]); // NATO
 Assert.assertEquals(new Span(1, 3, "location"), names1[1]); // United States
 Assert.assertEquals("person", names1[2].getType());
 Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel));
}

/**
 * Train NamefinderME using OnlyWithNamesWithTypes.train.
 * The goal is to check if the model validator accepts it.
 * This is related to the issue OPENNLP-9
 */
@Test
public void testOnlyWithNamesWithTypes() throws Exception {
 // train the name finder
 ObjectStream<NameSample> sampleStream = new NameSampleDataStream(
   new PlainTextByLineStream(new MockInputStreamFactory(
    new File("opennlp/tools/namefind/OnlyWithNamesWithTypes.train")), "UTF-8"));
 TrainingParameters params = new TrainingParameters();
 params.put(TrainingParameters.ITERATIONS_PARAM, 70);
 params.put(TrainingParameters.CUTOFF_PARAM, 1);
 TokenNameFinderModel nameFinderModel = NameFinderME.train("eng", null, sampleStream,
   params, TokenNameFinderFactory.create(null, null, Collections.emptyMap(), new BioCodec()));
 NameFinderME nameFinder = new NameFinderME(nameFinderModel);
 // now test if it can detect the sample sentences
 String[] sentence = ("Neil Abercrombie Anibal Acevedo-Vila Gary Ackerman " +
   "Robert Aderholt Daniel Akaka Todd Akin Lamar Alexander Rodney Alexander").split("\\s+");
 Span[] names1 = nameFinder.find(sentence);
 Assert.assertEquals(new Span(0, 2, "person"), names1[0]);
 Assert.assertEquals(new Span(2, 4, "person"), names1[1]);
 Assert.assertEquals(new Span(4, 6, "person"), names1[2]);
 Assert.assertEquals("person", names1[2].getType());
 Assert.assertTrue(!hasOtherAsOutcome(nameFinderModel));
}

/**
 * This method receives as input an array of tokenized text and calls the
 * NameFinderME.find(tokens) to recognize and classify Named Entities. It
 * outputs the spans of the detected and classified Named Entities.
 *
 * From Apache OpenNLP documentation: "After every document clearAdaptiveData
 * must be called to clear the adaptive data in the feature generators. Not
 * calling clearAdaptiveData can lead to a sharp drop in the detection rate
 * after a few documents."
 *
 * @param tokens
 *          an array of tokenized text
 * @return an list of {@link Span}s of Named Entities
 */
public final Span[] nercToSpans(final String[] tokens) {
 Span[] annotatedText = nameFinder.find(tokens);
 List<Span> probSpans = new ArrayList<Span>(Arrays.asList(annotatedText));
 return probSpans.toArray(new Span[probSpans.size()]);
}

  "Schmidt", "." };
Span[] names2 = nameFinder.find(sentence2);
  "enjoyed", "a", "delicious", "traditional", "meal." };
Span[] names = nameFinder.find(sentence);

public void getAllNameEntitiesfromInput(InputStream stream) throws IOException {
  String[] in = IOUtils.toString(stream, UTF_8).split(" ");
  Span nameE[];
  
  //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind
  synchronized (nameFinder) {
    nameE = nameFinder.find(in);
    //the same name finder is reused, so clear adaptive data
    nameFinder.clearAdaptiveData();
  }
  String spanNames = Arrays.toString(Span.spansToStrings(nameE, in));
  spanNames = spanNames.substring(1, spanNames.length() - 1);
  String[] tmp = spanNames.split(",");
  for (String name : tmp) {
    name = name.trim();
    this.locationNameEntities.add(name);
  }
}

  /**
   * finds names from given array of tokens
   * @param tokens the tokens array
   * @return map of EntityType -&gt; set of entity names
   */
  public Map<String, Set<String>> findNames(String[] tokens) {
    Span[] nameSpans = nameFinder.find(tokens);
    String[] names = Span.spansToStrings(nameSpans, tokens);
    Map<String, Set<String>> result = new HashMap<>();
    if (names != null && names.length > 0) {
      result.put(nameType, new HashSet<>(Arrays.asList(names)));
    }
    nameFinder.clearAdaptiveData();
    return result;
  }
}

public void getAllNameEntitiesfromInput(InputStream stream) throws IOException {
  String[] in = IOUtils.toString(stream, UTF_8).split(" ");
  Span nameE[];
  
  //name finder is not thread safe https://opennlp.apache.org/documentation/1.5.2-incubating/manual/opennlp.html#tools.namefind
  synchronized (nameFinder) {
    nameE = nameFinder.find(in);
    //the same name finder is reused, so clear adaptive data
    nameFinder.clearAdaptiveData();
  }
  String spanNames = Arrays.toString(Span.spansToStrings(nameE, in));
  spanNames = spanNames.substring(1, spanNames.length() - 1);
  String[] tmp = spanNames.split(",");
  for (String name : tmp) {
    name = name.trim();
    this.locationNameEntities.add(name);
  }
}

  /**
   * finds names from given array of tokens
   * @param tokens the tokens array
   * @return map of EntityType -> set of entity names
   */
  public Map<String, Set<String>> findNames(String[] tokens) {
    Span[] nameSpans = nameFinder.find(tokens);
    String[] names = Span.spansToStrings(nameSpans, tokens);
    Map<String, Set<String>> result = new HashMap<>();
    if (names != null && names.length > 0) {
      result.put(nameType, new HashSet<>(Arrays.asList(names)));
    }
    nameFinder.clearAdaptiveData();
    return result;
  }
}

/**
 * Identify all occuring names
 * 
 * @param view
 *            view to identify names from
 * @return span of names wrt token indices
 */
private Span[] identifyName(JCas view) {
  NameFinderME nameFinder = new NameFinderME(model);
  // get all tokens in given view
  Collection<Token> tokens = JCasUtil.select(view, Token.class);
  String[] tokenStr = new String[tokens.size()];
  int i = 0;
  for (Iterator<Token> iter = tokens.iterator(); iter.hasNext();) {
    tokenStr[i++] = iter.next().getCoveredText();
  }
  Span nameSpans[] = nameFinder.find(tokenStr);
  return nameSpans;
}

protected Span[] find(CAS cas, String[] tokens) {
 Span[] names = mNameFinder.find(tokens);
 double[] probs = mNameFinder.probs();
 for (double prob : probs) {
  documentConfidence.add(prob);
 }
 return names;
}

  "programs", "." };
Span[] names1 = nameFinder.find(sentence);
  "the", "Center", "for", "U", ".", "S", ".", "Korea", "Policy", "." };
Span[] names2 = nameFinder.find(sentence);

public Map<String, Set<String>> tokenize(String content) {
  Map<String, Set<String>> namedEntities = Maps.newHashMap();
  List<TextAnnotation> allTextAnnotations = new ArrayList<TextAnnotation>();
  String[] tokens = SimpleTokenizer.INSTANCE.tokenize(content);
  for (Map.Entry<String, TokenNameFinderModel> finderEntry : finders.entrySet()) {
    String type = finderEntry.getKey();
    NameFinderME finder = new NameFinderME(finderEntry.getValue());
    Span[] spans = finder.find(tokens);
    double[] probs = finder.probs(spans);
    for (int ni = 0; ni < spans.length; ni++) {
      allTextAnnotations.add(new TextAnnotation(type, spans[ni], probs[ni]));
    }
  }
  if (allTextAnnotations.size() > 0 ) {
    removeConflicts(allTextAnnotations);
  }
  convertTextAnnotationsToNamedEntities(tokens, allTextAnnotations, namedEntities);
  return namedEntities;
}

Javadoc

Generates name tags for the given sequence, typically a sentence, returning token spans for any identified names.

Popular methods of NameFinderME

<init>
clearAdaptiveData
Forgets all adaptive data which was collected during previous calls to one of the find methods. This
probs
Returns an array of probabilities for each of the specified spans which is the arithmetic mean of th
train
dropOverlappingSpans
Removes spans with are intersecting or crossing in anyway. The following rules are used to remove th
extractNameType
Gets the name type from the outcome
setProbs
sets the probs for the spans

Popular in Java

Finding current android device location
requestLocationUpdates (LocationManager)
compareTo (BigDecimal)
getContentResolver (Context)
NoSuchElementException (java.util)
Thrown when trying to retrieve an element past the end of an Enumeration or Iterator.
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
BoxLayout (javax.swing)
JCheckBox (javax.swing)
Runner (org.openjdk.jmh.runner)
Github Copilot alternatives

How to use findmethodin opennlp.tools.namefind.NameFinderME

Best Java code snippets using opennlp.tools.namefind.NameFinderME.find (Showing top 20 results out of 315)

How to use
find
method
in
opennlp.tools.namefind.NameFinderME