protected Span[] find(CAS cas, String[] tokens) { return mNameFinder.find(tokens); }
/** * Evaluates the given reference {@link NameSample} object. * * This is done by finding the names with the * {@link TokenNameFinder} in the sentence from the reference * {@link NameSample}. The found names are then used to * calculate and update the scores. * * @param reference the reference {@link NameSample}. * * @return the predicted {@link NameSample}. */ @Override protected NameSample processSample(NameSample reference) { if (reference.isClearAdaptiveDataSet()) { nameFinder.clearAdaptiveData(); } Span[] predictedNames = nameFinder.find(reference.getSentence()); Span[] references = reference.getNames(); // OPENNLP-396 When evaluating with a file in the old format // the type of the span is null, but must be set to default to match // the output of the name finder. for (int i = 0; i < references.length; i++) { if (references[i].getType() == null) { references[i] = new Span(references[i].getStart(), references[i].getEnd(), "default"); } } fmeasure.updateScores(references, predictedNames); return new NameSample(reference.getSentence(), predictedNames, reference.isClearAdaptiveDataSet()); }
public void createFeatures(List<String> features, String[] tokens, int index, String[] preds) { // cache results for sentence if (currentSentence != tokens) { currentSentence = tokens; currentNames = finder.find(tokens); } // iterate over names and check if a span is contained for (Span currentName : currentNames) { if (currentName.contains(index)) { // found a span for the current token features.add(prefix + ":w=dic"); features.add(prefix + ":w=dic=" + tokens[index]); // TODO: consider generation start and continuation features break; } } } }
namefinder.clearAdaptiveData(); Span[] names = nameFinders[i].find(tokens); Parse.addNames(tags[i], names, parseTokens);
@SuppressWarnings("unchecked") public Event[] updateContext(Sequence sequence, AbstractModel model) { TokenNameFinder tagger = new NameFinderME(new TokenNameFinderModel( "x-unspecified", model, Collections.emptyMap(), null)); String[] sentence = ((Sequence<NameSample>) sequence).getSource().getSentence(); String[] tags = seqCodec.encode(tagger.find(sentence), sentence.length); Event[] events = new Event[sentence.length]; NameFinderEventStream.generateEvents(sentence,tags,pcg).toArray(events); return events; }
namefinder.clearAdaptiveData(); Span names[] = nameFinders[i].find(tokens); Parse.addNames(tags[i], names, parseTokens);
Span[] names = nameFinder.find(tokens);
mappingProvider.configure(cas); modelProvider.getResource().clearAdaptiveData(); Span[] namedEntities = modelProvider.getResource().find(tokens);
private void evalNameFinder(TokenNameFinderModel model, BigInteger expectedHash) throws Exception { MessageDigest digest = MessageDigest.getInstance(HASH_ALGORITHM); TokenNameFinder nameFinder = new NameFinderME(model); try (ObjectStream<LeipzigTestSample> lines = createLineWiseStream()) { LeipzigTestSample line; while ((line = lines.read()) != null) { Span[] names = nameFinder.find(line.getText()); for (Span name : names) { digest.update((name.getType() + name.getStart() + name.getEnd()).getBytes(StandardCharsets.UTF_8)); } } } Assert.assertEquals(expectedHash, new BigInteger(1, digest.digest())); }
/** * Evaluates the given reference {@link NameSample} object. * * This is done by finding the names with the * {@link TokenNameFinder} in the sentence from the reference * {@link NameSample}. The found names are then used to * calculate and update the scores. * * @param reference the reference {@link NameSample}. * * @return the predicted {@link NameSample}. */ @Override protected NameSample processSample(NameSample reference) { if (reference.isClearAdaptiveDataSet()) { nameFinder.clearAdaptiveData(); } Span[] predictedNames = nameFinder.find(reference.getSentence()); Span[] references = reference.getNames(); // OPENNLP-396 When evaluating with a file in the old format // the type of the span is null, but must be set to default to match // the output of the name finder. for (int i = 0; i < references.length; i++) { if (references[i].getType() == null) { references[i] = new Span(references[i].getStart(), references[i].getEnd(), "default"); } } fmeasure.updateScores(references, predictedNames); return new NameSample(reference.getSentence(), predictedNames, reference.isClearAdaptiveDataSet()); }
@Test public void testCaseLongerEntry() { String[] sentence = {"a", "b", "michael", "jordan"}; Span[] names = mNameFinder.find(sentence); Assert.assertTrue(names.length == 1); Assert.assertTrue(names[0].length() == 2); } }
/** * Evaluates the given reference {@link NameSample} object. * * This is done by finding the names with the * {@link TokenNameFinder} in the sentence from the reference * {@link NameSample}. The found names are then used to * calculate and update the scores. * * @param reference the reference {@link NameSample}. * * @return the predicted {@link NameSample}. */ @Override protected NameSample processSample(NameSample reference) { if (reference.isClearAdaptiveDataSet()) { nameFinder.clearAdaptiveData(); } Span[] predictedNames = nameFinder.find(reference.getSentence()); Span[] references = reference.getNames(); // OPENNLP-396 When evaluating with a file in the old format // the type of the span is null, but must be set to default to match // the output of the name finder. for (int i = 0; i < references.length; i++) { if (references[i].getType() == null) { references[i] = new Span(references[i].getStart(), references[i].getEnd(), "default"); } } fmeasure.updateScores(references, predictedNames); return new NameSample(reference.getSentence(), predictedNames, reference.isClearAdaptiveDataSet()); }
Collections.addAll(names, nameFinder.find(whitespaceTokenizerLine));
protected Span[] find(CAS cas, String[] tokens) { return mNameFinder.find(tokens); }
@Test public void testCaseSensitivity() { String[] sentence = {"a", "b", "c", "vanessa", "williams"}; Span[] names = mNameFinder.find(sentence); Assert.assertTrue(names.length == 1); Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 5); }
@Test public void testLongerTokenNameIsPreferred() { String[] sentence = {"a", "b", "c", "Vanessa", "Williams"}; Span[] names = mNameFinder.find(sentence); Assert.assertTrue(names.length == 1); Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 5); }
@Test public void testLastMatchingTokenNameIsChoosen() { String[] sentence = {"a", "b", "c", "Vanessa"}; Span[] names = mNameFinder.find(sentence); Assert.assertTrue(names.length == 1); Assert.assertTrue(names[0].getStart() == 3 && names[0].getEnd() == 4); }
@Test public void testSingleTokeNameInsideSentence() { String sentence = "a b Max c d"; SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE; String[] tokens = tokenizer.tokenize(sentence); Span[] names = mNameFinder.find(tokens); Assert.assertTrue(names.length == 1); Assert.assertTrue(names[0].getStart() == 2 && names[0].getEnd() == 3); }
@Test public void testSingleTokeNameAtSentenceStart() { String sentence = "Max a b c d"; SimpleTokenizer tokenizer = SimpleTokenizer.INSTANCE; String[] tokens = tokenizer.tokenize(sentence); Span[] names = mNameFinder.find(tokens); Assert.assertTrue(names.length == 1); Assert.assertTrue(names[0].getStart() == 0 && names[0].getEnd() == 1); }