@Override public String ner() { return label.ner(); }
public static String tokensAndNELabelsToString(CoreMap sentence) { StringBuffer os = new StringBuffer(); List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); if(tokens != null){ boolean first = true; for(CoreLabel token: tokens) { if(! first) os.append(" "); os.append(token.word()); if(token.ner() != null && ! token.ner().equals("O")){ os.append("/" + token.ner()); } first = false; } } return os.toString(); }
public List<CoreLabel> nerTokens() { if (nerString == null || "O".equals(nerString)) return null; int start = headIndex-startIndex; int end = headIndex-startIndex+1; while (start > 0) { CoreLabel prev = originalSpan.get(start-1); if (nerString.equals(prev.ner())) { start--; } else { break; } } while (end < originalSpan.size()) { CoreLabel next = originalSpan.get(end); if (nerString.equals(next.ner())) { end++; } else { break; } } return originalSpan.subList(start, end); }
public List<CoreLabel> nerTokens() { if (nerString == null || "O".equals(nerString)) return null; int start = headIndex-startIndex; int end = headIndex-startIndex+1; while (start > 0) { CoreLabel prev = originalSpan.get(start-1); if (nerString.equals(prev.ner())) { start--; } else { break; } } while (end < originalSpan.size()) { CoreLabel next = originalSpan.get(end); if (nerString.equals(next.ner())) { end++; } else { break; } } return originalSpan.subList(start, end); }
/** list of ner tags **/ public List<String> nerTags() { return tokens().stream().map(token -> token.ner()).collect(Collectors.toList()); }
/** * for printing labeled sentence in less verbose manner * * @return string for printing */ public static String labeledSentenceToString(List<CoreLabel> labeledSentence, boolean printNer) { StringBuilder sb = new StringBuilder(); sb.append("[ "); for (CoreLabel label : labeledSentence) { String word = label.getString(annotationForWord); String answer = label.getString(AnswerAnnotation.class); String tag = label.getString(PartOfSpeechAnnotation.class); sb.append(word).append("(").append(tag); if (!SeqClassifierFlags.DEFAULT_BACKGROUND_SYMBOL.equals(answer)) { sb.append(" ").append(answer); } if (printNer) { sb.append(" ner:").append(label.ner()); } sb.append(") "); } sb.append("]"); return sb.toString(); }
/** * A utility to get useful information out of a CorefMention. In particular, it returns the CoreLabels which are * associated with this mention, and it returns a score for how much we think this mention should be the canonical * mention. * * @param doc The document this mention is referenced into. * @param mention The mention itself. * @return A pair of the tokens in the mention, and a score for how much we like this mention as the canonical mention. */ private static Pair<List<CoreLabel>, Double> grokCorefMention(Annotation doc, CorefChain.CorefMention mention) { List<CoreLabel> tokens = doc.get(CoreAnnotations.SentencesAnnotation.class).get(mention.sentNum - 1).get(CoreAnnotations.TokensAnnotation.class); List<CoreLabel> mentionAsTokens = tokens.subList(mention.startIndex - 1, mention.endIndex - 1); // Try to assess this mention's NER type Counter<String> nerVotes = new ClassicCounter<>(); mentionAsTokens.stream().filter(token -> token.ner() != null && !"O".equals(token.ner())).forEach(token -> nerVotes.incrementCount(token.ner())); String ner = Counters.argmax(nerVotes, (o1, o2) -> o1 == null ? 0 : o1.compareTo(o2)); double nerCount = nerVotes.getCount(ner); double nerScore = nerCount * nerCount / ((double) mentionAsTokens.size()); // Return return Pair.makePair(mentionAsTokens, nerScore); }
while (currCMTokenIndex < cmTokens.size() && cmTokens.get(currCMTokenIndex).ner().equals("TITLE")) { currCMTokenIndex++;
/** {@inheritDoc} */ @Override public void doOneFailedSentence(Annotation annotation, CoreMap sentence) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (CoreLabel token : tokens) { // add the background named entity tag to each token if it doesn't have an NER tag. if (token.ner() == null) { token.setNER(this.ner.backgroundSymbol()); } } }
List<CoreLabel> existingMention = canonicalMentionMap.get(token); if (existingMention == null || existingMention.isEmpty() || "O".equals(existingMention.get(0).ner())) { // Don't clobber existing good mentions canonicalMentionMap.put(token, canonicalMention);
private void doOneSentenceNew(List<CoreLabel> words, Annotation doc, CoreMap sentence) { List<CoreLabel> newWords = NumberSequenceClassifier.copyTokens(words, sentence); nsc.classifyWithGlobalInformation(newWords, doc, sentence); Iterator<? extends CoreLabel> newFLIter = newWords.iterator(); for (CoreLabel origWord : words) { CoreLabel newWord = newFLIter.next(); String before = origWord.ner(); String newGuess = newWord.get(CoreAnnotations.AnswerAnnotation.class); // log.info(origWord.word()); // log.info(origWord.ner()); if (VERBOSE) log.info(newWord); // log.info("-------------------------------------"); if ((before == null || before.equals(BACKGROUND_SYMBOL) || before.equals("MISC")) && !newGuess.equals(BACKGROUND_SYMBOL)) { origWord.setNER(newGuess); } // transfer other annotations generated by SUTime or NumberNormalizer NumberSequenceClassifier.transferAnnotations(newWord, origWord); } }
private <TOKEN extends CoreLabel> void annotateTokens(List<TOKEN> tokens) { // Make a copy of the tokens before annotating because QuantifiableEntityNormalizer may change the POS too List<CoreLabel> words = new ArrayList<>(); for (CoreLabel token : tokens) { CoreLabel word = new CoreLabel(); word.setWord(token.word()); word.setNER(token.ner()); word.setTag(token.tag()); // copy fields potentially set by SUTime NumberSequenceClassifier.transferAnnotations(token, word); words.add(word); } doOneSentence(words); // TODO: If collapsed is set, tokens for entities are collapsed into one node then // (words.size() != tokens.size() and the logic below just don't work!!! for (int i = 0; i < words.size(); i++) { String ner = words.get(i).ner(); tokens.get(i).setNER(ner); tokens.get(i).set(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class, words.get(i).get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class)); } }
if ("O".equals(token.ner()) && token.word().toUpperCase().equals(token.word()) && token.word().length() >= 3) { for (List<CoreLabel> org : organizations) {
String ner = p.headWord.ner(); int sIdx = p.startIndex; int eIdx = p.endIndex; if(neStrings.contains(span)) { features.incrementCount("B-NE-STRING-EXIST"); if( ( preWord==null || !preWord.ner().equals(ner) ) && ( nextWord==null || !nextWord.ner().equals(ner) ) ) { features.incrementCount("B-NE-FULLSPAN");
/** * TODO(gabor) JavaDoc * * @param tokens * @param span * @return */ public static String guessNER(List<CoreLabel> tokens, Span span) { Counter<String> nerGuesses = new ClassicCounter<>(); for (int i : span) { nerGuesses.incrementCount(tokens.get(i).ner()); } nerGuesses.remove("O"); nerGuesses.remove(null); if (nerGuesses.size() > 0 && Counters.max(nerGuesses) >= span.size() / 2) { return Counters.argmax(nerGuesses); } else { return "O"; } }
private static void modifyUsingCoreNLPNER(Annotation doc) { Properties ann = new Properties(); ann.setProperty("annotators", "pos, lemma, ner"); StanfordCoreNLP pipeline = new StanfordCoreNLP(ann, false); pipeline.annotate(doc); for (CoreMap sentence : doc.get(CoreAnnotations.SentencesAnnotation.class)) { List<EntityMention> entities = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class); if (entities != null) { List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); for (EntityMention en : entities) { //System.out.println("old ner tag for " + en.getExtentString() + " was " + en.getType()); Span s = en.getExtent(); Counter<String> allNertagforSpan = new ClassicCounter<>(); for (int i = s.start(); i < s.end(); i++) { allNertagforSpan.incrementCount(tokens.get(i).ner()); } String entityNertag = Counters.argmax(allNertagforSpan); en.setType(entityNertag); //System.out.println("new ner tag is " + entityNertag); } } } }