/** Index all spans (constituents) in the tree. * For this, spans uses 0-based indexing and the span records the fencepost * to the left of the first word and after the last word of the span. * The spans are only recorded if the Tree has labels of a class which * extends CoreMap. */ public void indexSpans() { indexSpans(0); }
public void indexSpans(int startIndex) { indexSpans(new MutableInteger(startIndex)); }
protected void findHead(CoreMap s, List<Mention> mentions) { Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class); List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class); tree.indexSpans(0); for (Mention m : mentions){ Tree head = findSyntacticHead(m, tree, sent); m.headIndex = ((CoreLabel) head.label()).get(CoreAnnotations.IndexAnnotation.class)-1; m.headWord = sent.get(m.headIndex); m.headString = m.headWord.get(CoreAnnotations.TextAnnotation.class).toLowerCase(Locale.ENGLISH); int start = m.headIndex - m.startIndex; if (start < 0 || start >= m.originalSpan.size()) { SieveCoreferenceSystem.logger.warning("Invalid index for head " + start + "=" + m.headIndex + "-" + m.startIndex + ": originalSpan=[" + StringUtils.joinWords(m.originalSpan, " ") + "], head=" + m.headWord); SieveCoreferenceSystem.logger.warning("Setting head string to entire mention"); m.headIndex = m.startIndex; m.headWord = m.originalSpan.size() > 0 ? m.originalSpan.get(0) : sent.get(m.startIndex); m.headString = m.originalSpan.toString(); } } }
public void findHead(CoreMap s, List<Mention> mentions) { Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class); List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class); tree.indexSpans(0); for (Mention m : mentions){ if (lang == Locale.CHINESE) { findHeadChinese(sent, m); } else { CoreLabel head = (CoreLabel) findSyntacticHead(m, tree, sent).label(); m.headIndex = head.get(CoreAnnotations.IndexAnnotation.class)-1; m.headWord = sent.get(m.headIndex); m.headString = m.headWord.get(CoreAnnotations.TextAnnotation.class).toLowerCase(Locale.ENGLISH); } int start = m.headIndex - m.startIndex; if (start < 0 || start >= m.originalSpan.size()) { Redwood.log("Invalid index for head " + start + "=" + m.headIndex + "-" + m.startIndex + ": originalSpan=[" + StringUtils.joinWords(m.originalSpan, " ") + "], head=" + m.headWord); Redwood.log("Setting head string to entire mention"); m.headIndex = m.startIndex; m.headWord = m.originalSpan.size() > 0 ? m.originalSpan.get(0) : sent.get(m.startIndex); m.headString = m.originalSpan.toString(); } } }
mention.sentenceWords = sentence; mention.originalSpan = new ArrayList<>(mention.sentenceWords.subList(mention.startIndex, mention.endIndex)); if(!((CoreLabel) tree.label()).containsKey(CoreAnnotations.BeginIndexAnnotation.class)) tree.indexSpans(0); if(mention.headWord==null) { Tree headTree = ((RuleBasedCorefMentionFinder) mentionFinder).findSyntacticHead(mention, tree, sentence);
tree.indexSpans(0); logger.fine("Index spans were generated."); } else {
Tree tree = parse(extentTokens, constraints); tree.indexSpans(m.startIndex - ADDED_WORDS); // remember it has ADDED_WORDS extra words at the beginning Tree subtree = findPartialSpan(tree, m.startIndex);
Tree tree = parse(extentTokens, constraints); tree.indexSpans(m.startIndex - ADDED_WORDS); // remember it has ADDED_WORDS extra words at the beginning Tree subtree = findPartialSpan(tree, m.startIndex);
collapsedUnary.indexSpans();
logger.fine("No exact match found. Local parse:\n" + tree.pennString()); convertToCoreLabels(tree); tree.indexSpans(ent.getExtentTokenStart() - ADDED_WORDS); // remember it has ADDED_WORDS extra words at the beginning Tree subtree = findPartialSpan(tree, ent.getExtentTokenStart()); Tree extentHead = safeHead(subtree);
logger.fine("No exact match found. Local parse:\n" + tree.pennString()); convertToCoreLabels(tree); tree.indexSpans(ent.getExtentTokenStart()); Tree extentHead = safeHead(tree); assert (extentHead != null);
private void assignSyntacticHeadToEntities(Annotation corpus) { assert(corpus != null); assert(corpus.get(SentencesAnnotation.class) != null); for(CoreMap sent: corpus.get(SentencesAnnotation.class)){ List<CoreLabel> tokens = sent.get(TokensAnnotation.class); assert(tokens != null); Tree tree = sent.get(TreeAnnotation.class); if (MachineReadingProperties.forceGenerationOfIndexSpans) { tree.indexSpans(0); } assert(tree != null); if(sent.get(EntityMentionsAnnotation.class) != null){ for(EntityMention e: sent.get(EntityMentionsAnnotation.class)){ reader.assignSyntacticHead(e, tree, tokens, true); } } } }
final List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class); Tree tree = sentence.get(TreeCoreAnnotations.TreeAnnotation.class); tree.indexSpans(0);
try { tree.indexSpans(); tree.setSpans();
/** * Assigns span indices (BeginIndexAnnotation and EndIndexAnnotation) to all nodes in a tree. * The beginning index is equivalent to the IndexAnnotation of the first leaf in the constituent. * The end index is equivalent to the first integer after the IndexAnnotation of the last leaf in the constituent. * * @param startIndex Begin indexing at this value */ public Pair<Integer, Integer> indexSpans(MutableInteger startIndex) { int start = Integer.MAX_VALUE; int end = Integer.MIN_VALUE; if(isLeaf()){ start = startIndex.intValue(); end = startIndex.intValue() + 1; startIndex.incValue(1); } else { for (Tree kid : children()) { Pair<Integer, Integer> span = kid.indexSpans(startIndex); if(span.first < start) start = span.first; if(span.second > end) end = span.second; } } Label label = label(); if (label instanceof CoreMap) { CoreMap afl = (CoreMap) label(); afl.set(CoreAnnotations.BeginIndexAnnotation.class, start); afl.set(CoreAnnotations.EndIndexAnnotation.class, end); } return new Pair<>(start, end); }
/** Index all spans (constituents) in the tree. * For this, spans uses 0-based indexing and the span records the fencepost * to the left of the first word and after the last word of the span. * The spans are only recorded if the Tree has labels of a class which * extends CoreMap. */ public void indexSpans() { indexSpans(0); }
/** Index all spans (constituents) in the tree. * For this, spans uses 0-based indexing and the span records the fencepost * to the left of the first word and after the last word of the span. * The spans are only recorded if the Tree has labels of a class which * extends CoreMap. */ public void indexSpans() { indexSpans(0); }
public void indexSpans(int startIndex) { indexSpans(new MutableInteger(startIndex)); }
private void findHead(CoreMap s, List<Mention> mentions) { Tree tree = s.get(TreeAnnotation.class); List<CoreLabel> sent = s.get(TokensAnnotation.class); tree.indexSpans(0); for (Mention m : mentions){ Tree head = findSyntacticHead(m, tree, sent); m.headIndex = ((CoreLabel) head.label()).get(IndexAnnotation.class)-1; m.headWord = sent.get(m.headIndex); m.headString = m.headWord.get(TextAnnotation.class).toLowerCase(); } }