opennlp.tools.namefind.TokenNameFinder java code examples

protected Span[] find(CAS cas, String[] tokens) {
 return mNameFinder.find(tokens);
}

/**
 * Evaluates the given reference {@link NameSample} object.
 *
 * This is done by finding the names with the
 * {@link TokenNameFinder} in the sentence from the reference
 * {@link NameSample}. The found names are then used to
 * calculate and update the scores.
 *
 * @param reference the reference {@link NameSample}.
 *
 * @return the predicted {@link NameSample}.
 */
@Override
protected NameSample processSample(NameSample reference) {
 if (reference.isClearAdaptiveDataSet()) {
  nameFinder.clearAdaptiveData();
 }
 Span[] predictedNames = nameFinder.find(reference.getSentence());
 Span[] references = reference.getNames();
 // OPENNLP-396 When evaluating with a file in the old format
 // the type of the span is null, but must be set to default to match
 // the output of the name finder.
 for (int i = 0; i < references.length; i++) {
  if (references[i].getType() == null) {
   references[i] = new Span(references[i].getStart(), references[i].getEnd(), "default");
  }
 }
 fmeasure.updateScores(references, predictedNames);
 return new NameSample(reference.getSentence(), predictedNames, reference.isClearAdaptiveDataSet());
}

nameFinderMe.clearAdaptiveData();

 public void createFeatures(List<String> features, String[] tokens, int index,
   String[] preds) {
  // cache results for sentence
  if (currentSentence != tokens) {
   currentSentence = tokens;
   currentNames = finder.find(tokens);
  }

  // iterate over names and check if a span is contained
  for (Span currentName : currentNames) {
   if (currentName.contains(index)) {
    // found a span for the current token
    features.add(prefix + ":w=dic");
    features.add(prefix + ":w=dic=" + tokens[index]);

    // TODO: consider generation start and continuation features

    break;
   }
  }
 }
}

namefinder.clearAdaptiveData();
 Span[] names = nameFinders[i].find(tokens);
 Parse.addNames(tags[i], names, parseTokens);

@SuppressWarnings("unchecked")
public Event[] updateContext(Sequence sequence, AbstractModel model) {
 TokenNameFinder tagger = new NameFinderME(new TokenNameFinderModel(
   "x-unspecified", model, Collections.emptyMap(), null));
 String[] sentence = ((Sequence<NameSample>) sequence).getSource().getSentence();
 String[] tags = seqCodec.encode(tagger.find(sentence), sentence.length);
 Event[] events = new Event[sentence.length];
 NameFinderEventStream.generateEvents(sentence,tags,pcg).toArray(events);
 return events;
}

namefinder.clearAdaptiveData();
 Span names[] = nameFinders[i].find(tokens);
 Parse.addNames(tags[i], names, parseTokens);

Span[] names = nameFinder.find(tokens);

mappingProvider.configure(cas);
modelProvider.getResource().clearAdaptiveData();
  Span[] namedEntities = modelProvider.getResource().find(tokens);

private void evalNameFinder(TokenNameFinderModel model, BigInteger expectedHash)
  throws Exception {
 MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM);
 TokenNameFinder nameFinder = new NameFinderME(model);
 try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) {
  LeipzigTestSample line;
  while ((line = lines.read()) != null) {
   Span[] names = nameFinder.find(line.getText());
   for (Span name : names) {
    digest.update((name.getType() + name.getStart()
      + name.getEnd()).getBytes(StandardCharsets.UTF_8));
   }
  }
 }
 Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest()));
}

/**
 * Evaluates the given reference {@link NameSample} object.
 *
 * This is done by finding the names with the
 * {@link TokenNameFinder} in the sentence from the reference
 * {@link NameSample}. The found names are then used to
 * calculate and update the scores.
 *
 * @param reference the reference {@link NameSample}.
 *
 * @return the predicted {@link NameSample}.
 */
@Override
protected NameSample processSample(NameSample reference) {
 if (reference.isClearAdaptiveDataSet()) {
  nameFinder.clearAdaptiveData();
 }
 Span[] predictedNames = nameFinder.find(reference.getSentence());
 Span[] references = reference.getNames();
 // OPENNLP-396 When evaluating with a file in the old format
 // the type of the span is null, but must be set to default to match
 // the output of the name finder.
 for (int i = 0; i < references.length; i++) {
  if (references[i].getType() == null) {
   references[i] = new Span(references[i].getStart(), references[i].getEnd(), "default");
  }
 }
 fmeasure.updateScores(references, predictedNames);
 return new NameSample(reference.getSentence(), predictedNames, reference.isClearAdaptiveDataSet());
}

 @Test
 public void testCaseLongerEntry() {
  String[] sentence = {"a", "b", "michael", "jordan"};
  Span[] names = mNameFinder.find(sentence);
  Assert.assertTrue(names.length == 1);
  Assert.assertTrue(names[0].length() == 2);
 }
}

/**
 * Evaluates the given reference {@link NameSample} object.
 *
 * This is done by finding the names with the
 * {@link TokenNameFinder} in the sentence from the reference
 * {@link NameSample}. The found names are then used to
 * calculate and update the scores.
 *
 * @param reference the reference {@link NameSample}.
 *
 * @return the predicted {@link NameSample}.
 */
@Override
protected NameSample processSample(NameSample reference) {
 if (reference.isClearAdaptiveDataSet()) {
  nameFinder.clearAdaptiveData();
 }
 Span[] predictedNames = nameFinder.find(reference.getSentence());
 Span[] references = reference.getNames();
 // OPENNLP-396 When evaluating with a file in the old format
 // the type of the span is null, but must be set to default to match
 // the output of the name finder.
 for (int i = 0; i < references.length; i++) {
  if (references[i].getType() == null) {
   references[i] = new Span(references[i].getStart(), references[i].getEnd(), "default");
  }
 }
 fmeasure.updateScores(references, predictedNames);
 return new NameSample(reference.getSentence(), predictedNames, reference.isClearAdaptiveDataSet());
}

Collections.addAll(names, nameFinder.find(whitespaceTokenizerLine));

protected Span[] find(CAS cas, String[] tokens) {
 return mNameFinder.find(tokens);
}

@Test
public void testCaseSensitivity() {
 String[] sentence = {"a", "b", "c", "vanessa", "williams"};
 Span[] names = mNameFinder.find(sentence);
 Assert.assertTrue(names.length == 1);
 Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 5);
}

@Test
public void testLongerTokenNameIsPreferred() {
 String[] sentence = {"a", "b", "c", "Vanessa", "Williams"};
 Span[] names = mNameFinder.find(sentence);
 Assert.assertTrue(names.length == 1);
 Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 5);
}

@Test
public void testLastMatchingTokenNameIsChoosen() {
 String[] sentence = {"a", "b", "c", "Vanessa"};
 Span[] names = mNameFinder.find(sentence);
 Assert.assertTrue(names.length == 1);
 Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 4);
}

@Test
public void testSingleTokeNameInsideSentence() {
 String sentence = "a b  Max c d";
 SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
 String[] tokens = tokenizer.tokenize(sentence);
 Span[] names = mNameFinder.find(tokens);
 Assert.assertTrue(names.length == 1);
 Assert.assertTrue(names[0].getStart() == 2 && names[0].getEnd() == 3);
}

@Test
public void testSingleTokeNameAtSentenceStart() {
 String sentence = "Max a b c d";
 SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE;
 String[] tokens = tokenizer.tokenize(sentence);
 Span[] names = mNameFinder.find(tokens);
 Assert.assertTrue(names.length == 1);
 Assert.assertTrue(names[0].getStart() == 0 && names[0].getEnd() == 1);
}

Javadoc

The interface for name finders which provide name tags for a sequence of tokens.

Most used methods

find
Generates name tags for the given sequence, typically a sentence, returning token spans for any iden
clearAdaptiveData
Forgets all adaptive data which was collected during previous calls to one of the find methods. This

Popular in Java

Finding current android device location
addToBackStack (FragmentTransaction)
setContentView (Activity)
onCreateOptionsMenu (Activity)
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
DateFormat (java.text)
Formats or parses dates and times.This class provides factories for obtaining instances configured f
Format (java.text)
The base class for all formats. This is an abstract base class which specifies the protocol for clas
BlockingQueue (java.util.concurrent)
A java.util.Queue that additionally supports operations that wait for the queue to become non-empty
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
JList (javax.swing)
Top plugins for Android Studio

How to useTokenNameFinder in opennlp.tools.namefind

Best Java code snippets using opennlp.tools.namefind.TokenNameFinder (Showing top 20 results out of 315)

How to use
TokenNameFinder
in
opennlp.tools.namefind