Refine search
case 1: wi.setWord(BOUNDARY); wi.set(CoreAnnotations.AnswerAnnotation.class, flags.backgroundSymbol); break; case 2: wi.setWord(bits[0]); wi.set(CoreAnnotations.AnswerAnnotation.class, bits[1]); break; case 3: wi.setWord(bits[0]); wi.setTag(bits[1]); wi.set(CoreAnnotations.AnswerAnnotation.class, bits[2]); break; case 4: wi.setWord(bits[0]); wi.setTag(bits[1]); wi.set(CoreAnnotations.ChunkAnnotation.class, bits[2]); wi.set(CoreAnnotations.AnswerAnnotation.class, bits[3]); break; case 5: wi.set(CoreAnnotations.LemmaAnnotation.class, bits[1]); wi.setTag(bits[2]); wi.set(CoreAnnotations.ChunkAnnotation.class, bits[3]); wi.set(CoreAnnotations.AnswerAnnotation.class, bits[4]); break; default: wi.set(CoreAnnotations.ValueAnnotation.class, wi.word());
static Tree createNode(Tree top, String label, Tree ... children) { CoreLabel headLabel = (CoreLabel) top.label(); CoreLabel production = new CoreLabel(); production.setValue(label); production.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class)); production.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class)); Tree newTop = new LabeledScoredTreeNode(production); for (Tree child : children) { newTop.addChild(child); } return newTop; }
for (int i = 0, sz = tokens.size(); i < sz; ++i) { String neTag = output.get(i).get(CoreAnnotations.NamedEntityTagAnnotation.class); String normNeTag = output.get(i).get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class); Map<String,Double> neTagProbMap = output.get(i).get(CoreAnnotations.NamedEntityTagProbsAnnotation.class); if (language.equals(LanguageInfo.HumanLanguage.SPANISH)) { neTag = spanishToEnglishTag(neTag); tokens.get(i).set(CoreAnnotations.NamedEntityTagProbsAnnotation.class, neTagProbMap); tokens.get(i).set(CoreAnnotations.CoarseNamedEntityTagAnnotation.class, neTag); if (normNeTag != null) tokens.get(i).set(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class, normNeTag); NumberSequenceClassifier.transferAnnotations(output.get(i), tokens.get(i));
l2.setWord(l.word()); if (l.get(anscl.getValue()).equals(label)) { l2.set(CoreAnnotations.AnswerAnnotation.class, label); } else l2.set(CoreAnnotations.AnswerAnnotation.class, constVars.backgroundSymbol); if (!l.get(CoreAnnotations.GoldAnswerAnnotation.class).equals(label)) { l2.set(CoreAnnotations.GoldAnswerAnnotation.class, constVars.backgroundSymbol); } else l2.set(CoreAnnotations.GoldAnswerAnnotation.class, label); doceval.add(l2);
public static void updateOffsetsInCoreLabels(List<CoreLabel> tokens, int offset) { for(CoreLabel l: tokens) { l.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, l.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) + offset); l.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, l.get(CoreAnnotations.CharacterOffsetEndAnnotation.class) + offset); } }
/** * Copies only the fields required for numeric entity extraction into the new CoreLabel. * * @param src Source CoreLabel to copy. */ private static CoreLabel copyCoreLabel(CoreLabel src, Integer startOffset, Integer endOffset) { CoreLabel dst = new CoreLabel(); dst.setWord(src.word()); dst.setTag(src.tag()); if (src.containsKey(CoreAnnotations.OriginalTextAnnotation.class)) { dst.set(CoreAnnotations.OriginalTextAnnotation.class, src.get(CoreAnnotations.OriginalTextAnnotation.class)); } if(startOffset == null){ dst.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, src.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)); } else { dst.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, startOffset); } if(endOffset == null){ dst.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, src.get(CoreAnnotations.CharacterOffsetEndAnnotation.class)); } else { dst.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, endOffset); } transferAnnotations(src, dst); return dst; }
production.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class)); production.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class)); Tree newTop = new LabeledScoredTreeNode(production); newTop.addChild(left);
/** * Create a datum from a string. The CoreAnnotations must correspond to those used by * SequenceClassifier. The following annotations are copied from the provided * CoreLabel cl, if present: * DomainAnnotation * startOffset and endOffset will be added to the {@link CharacterOffsetBeginAnnotation} of * the {@link CoreLabel} cl to give the {@link CharacterOffsetBeginAnnotation} and * {@link CharacterOffsetEndAnnotation} of the resulting datum. */ private static CoreLabel createDatum(CoreLabel cl, String token, String label, int startOffset, int endOffset) { CoreLabel newTok = new CoreLabel(); newTok.set(CoreAnnotations.TextAnnotation.class, token); newTok.set(CoreAnnotations.CharAnnotation.class, token); newTok.set(CoreAnnotations.AnswerAnnotation.class, label); newTok.set(CoreAnnotations.GoldAnswerAnnotation.class, label); newTok.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, cl.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) + startOffset); newTok.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, cl.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class) + endOffset); if (cl != null && cl.containsKey(CoreAnnotations.DomainAnnotation.class)) newTok.set(CoreAnnotations.DomainAnnotation.class, cl.get(CoreAnnotations.DomainAnnotation.class)); return newTok; }
private <TOKEN extends CoreLabel> void annotateTokens(List<TOKEN> tokens) { // Make a copy of the tokens before annotating because QuantifiableEntityNormalizer may change the POS too List<CoreLabel> words = new ArrayList<>(); for (CoreLabel token : tokens) { CoreLabel word = new CoreLabel(); word.setWord(token.word()); word.setNER(token.ner()); word.setTag(token.tag()); // copy fields potentially set by SUTime NumberSequenceClassifier.transferAnnotations(token, word); words.add(word); } doOneSentence(words); // TODO: If collapsed is set, tokens for entities are collapsed into one node then // (words.size() != tokens.size() and the logic below just don't work!!! for (int i = 0; i < words.size(); i++) { String ner = words.get(i).ner(); tokens.get(i).setNER(ner); tokens.get(i).set(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class, words.get(i).get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class)); } }
private static void annotateMarkersOnWord(List<CoreLabel> labeledSequence, int wordBegin, int wordEnd, List<String> segments) { Pair<Integer, Integer> headBounds = getHeadBounds(segments); int currentIndex = 0; for (int i = wordBegin; i < wordEnd; i++) { String label = labeledSequence.get(i).get(CoreAnnotations.AnswerAnnotation.class); labeledSequence.get(i).set(PrefixMarkerAnnotation.class, Boolean.FALSE); labeledSequence.get(i).set(SuffixMarkerAnnotation.class, Boolean.FALSE); if (label.equals(BeginSymbol)) { // Add prefix markers for BEGIN characters up to and including the start of the head // (but don't add prefix markers if there aren't any prefixes) if (currentIndex <= headBounds.first && currentIndex != 0) labeledSequence.get(i).set(PrefixMarkerAnnotation.class, Boolean.TRUE); // Add suffix markers for BEGIN characters starting one past the end of the head // (headBounds.second is one past the end, no need to add one) if (currentIndex >= headBounds.second) labeledSequence.get(i).set(SuffixMarkerAnnotation.class, Boolean.TRUE); currentIndex++; } } }
private void resetPatternLabelsInSents(Map<String, DataInstance> sents) { for(Map.Entry<String, DataInstance> sent: sents.entrySet()){ for(CoreLabel l : sent.getValue().getTokens()){ for(Map.Entry<String, Class<? extends TypesafeMap.Key<String>>> cl: humanLabelClasses.entrySet()){ l.set(machineAnswerClasses.get(cl.getKey()), l.get(cl.getValue())); } } } }
/** * Recognizes ordinal numbers * @param tokenSequence */ private void ordinalRecognizer(List<CoreLabel> tokenSequence) { for (CoreLabel crt : tokenSequence) { if ((crt.get(CoreAnnotations.AnswerAnnotation.class).equals(flags.backgroundSymbol) || crt.get(CoreAnnotations.AnswerAnnotation.class).equals("NUMBER")) && ORDINAL_PATTERN.matcher(crt.word()).matches()) { crt.set(CoreAnnotations.AnswerAnnotation.class, "ORDINAL"); } } }
public void setNamedEntityTagGranularity(Annotation annotation, String granularity) { List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); Class<? extends CoreAnnotation<String>> sourceNERTagClass; if (granularity.equals("fine")) sourceNERTagClass = CoreAnnotations.FineGrainedNamedEntityTagAnnotation.class; else if (granularity.equals("coarse")) sourceNERTagClass = CoreAnnotations.CoarseNamedEntityTagAnnotation.class; else sourceNERTagClass = CoreAnnotations.NamedEntityTagAnnotation.class; // switch tags for (CoreLabel token : tokens) { if (!"".equals(token.get(sourceNERTagClass)) && token.get(sourceNERTagClass) != null) token.set(CoreAnnotations.NamedEntityTagAnnotation.class, token.get(sourceNERTagClass)); } }
private int changeLeftToRight(List<CoreLabel> tokens, int start, String oldTag, String posTag, String newTag) { while(start < tokens.size()) { CoreLabel crt = tokens.get(start); // we are scanning for a NER tag and found something different if(! oldTag.equals(flags.backgroundSymbol) && ! crt.get(CoreAnnotations.AnswerAnnotation.class).equals(oldTag)) { break; } // the NER tag is not set, so we scan for similar POS tags if(oldTag.equals(flags.backgroundSymbol) && ! crt.tag().equals(posTag)) { break; } crt.set(CoreAnnotations.AnswerAnnotation.class, newTag); start ++; } return start; }
private int changeRightToLeft(List<CoreLabel> tokens, int start, String oldTag, String posTag, String newTag) { while(start >= 0) { CoreLabel crt = tokens.get(start); // we are scanning for a NER tag and found something different if(! oldTag.equals(flags.backgroundSymbol) && ! crt.get(CoreAnnotations.AnswerAnnotation.class).equals(oldTag)) { break; } // the NER tag is not set, so we scan for similar POS tags if(oldTag.equals(flags.backgroundSymbol) && ! crt.tag().equals(posTag)) { break; } crt.set(CoreAnnotations.AnswerAnnotation.class, newTag); start --; } return start; }
/** * Transfer from src to dst all annotations generated bu SUTime and NumberNormalizer * @param src * @param dst */ public static void transferAnnotations(CoreLabel src, CoreLabel dst) { // // annotations potentially set by NumberNormalizer // if(src.containsKey(CoreAnnotations.NumericCompositeValueAnnotation.class)){ dst.set(CoreAnnotations.NumericCompositeValueAnnotation.class, src.get(CoreAnnotations.NumericCompositeValueAnnotation.class)); } if(src.containsKey(CoreAnnotations.NumericCompositeTypeAnnotation.class)) dst.set(CoreAnnotations.NumericCompositeTypeAnnotation.class, src.get(CoreAnnotations.NumericCompositeTypeAnnotation.class)); // // annotations set by SUTime // if(src.containsKey(TimeAnnotations.TimexAnnotation.class)) dst.set(TimeAnnotations.TimexAnnotation.class, src.get(TimeAnnotations.TimexAnnotation.class)); }
private static void setNamedEntityTagGranularity(Annotation annotation, String granularity) { List<CoreLabel> tokens = annotation.get(CoreAnnotations.TokensAnnotation.class); Class<? extends CoreAnnotation<String>> sourceNERTagClass; if (granularity.equals("fine")) sourceNERTagClass = CoreAnnotations.FineGrainedNamedEntityTagAnnotation.class; else if (granularity.equals("coarse")) sourceNERTagClass = CoreAnnotations.CoarseNamedEntityTagAnnotation.class; else sourceNERTagClass = CoreAnnotations.NamedEntityTagAnnotation.class; // switch tags for (CoreLabel token : tokens) { if ( ! StringUtils.isNullOrEmpty(token.get(sourceNERTagClass)) ) { token.set(CoreAnnotations.NamedEntityTagAnnotation.class, token.get(sourceNERTagClass)); } } }
/** * Set the word value for the label. Also, clears the lemma, since * that may have changed if the word changed. */ @Override public void setWord(String word) { String originalWord = get(CoreAnnotations.TextAnnotation.class); set(CoreAnnotations.TextAnnotation.class, word); // Pado feb 09: if you change the word, delete the lemma. // Gabor dec 2012: check if there was a real change -- this remove is actually rather expensive if it gets called a lot // todo [cdm 2015]: probably no one now knows why this was even needed, but maybe it should just be removed. It's kind of weird. if (word != null && !word.equals(originalWord) && containsKey(CoreAnnotations.LemmaAnnotation.class)) { remove(CoreAnnotations.LemmaAnnotation.class); } }
private void recognizeNumberSequences(List<CoreLabel> words, final CoreMap document, final CoreMap sentence) { // we need to copy here because NumberSequenceClassifier overwrites the AnswerAnnotation List<CoreLabel> newWords = NumberSequenceClassifier.copyTokens(words, sentence); nsc.classifyWithGlobalInformation(newWords, document, sentence); // copy AnswerAnnotation back. Do not overwrite! // also, copy all the additional annotations generated by SUTime and NumberNormalizer for (int i = 0, sz = words.size(); i < sz; i++){ CoreLabel origWord = words.get(i); CoreLabel newWord = newWords.get(i); // log.info(newWord.word() + " => " + newWord.get(CoreAnnotations.AnswerAnnotation.class) + " " + origWord.ner()); String before = origWord.get(CoreAnnotations.AnswerAnnotation.class); String newGuess = newWord.get(CoreAnnotations.AnswerAnnotation.class); if ((before == null || before.equals(nsc.flags.backgroundSymbol) || before.equals("MISC")) && !newGuess.equals(nsc.flags.backgroundSymbol)) { origWord.set(CoreAnnotations.AnswerAnnotation.class, newGuess); } // transfer other annotations generated by SUTime or NumberNormalizer NumberSequenceClassifier.transferAnnotations(newWord, origWord); } }