@Override public IntTuple getCopy() { return new IntPair(elements[0], elements[1]); }
public IntPair getIndices() { return new IntPair(startIndex, endIndex); }
/** get CorefMention by position */ public Set<CorefMention> getMentionsWithSameHead(int sentenceNumber, int headIndex) { return mentionMap.get(new IntPair(sentenceNumber, headIndex)); }
/** * Delete a mention from this coreference chain. * @param m The mention to delete. */ public void deleteMention(CorefMention m) { this.mentions.remove(m); IntPair position = new IntPair(m.sentNum, m.headIndex); this.mentionMap.remove(position); }
/** * Delete a mention from this coreference chain. * @param m The mention to delete. */ public void deleteMention(CorefMention m) { this.mentions.remove(m); IntPair position = new IntPair(m.sentNum, m.headIndex); this.mentionMap.remove(position); }
int indexOf(int x, int y) { IntPair iP = new IntPair(x, y); return instanceIndex.indexOf(iP); }
/** get CorefMention by position */ public Set<CorefMention> getMentionsWithSameHead(int sentenceNumber, int headIndex) { return getMentionsWithSameHead(new IntPair(sentenceNumber, headIndex)); }
private void addMention(int beginIdx, int endIdx, IndexedWord headword, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet, List<CoreLabel> sent, SemanticGraph basic, SemanticGraph enhanced) { IntPair mSpan = new IntPair(beginIdx, endIdx); if(!mentionSpanSet.contains(mSpan) && (!insideNE(mSpan, namedEntitySpanSet)) ) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIdx, endIdx, sent, basic, enhanced, new ArrayList<>(sent.subList(beginIdx, endIdx))); m.headIndex = headword.index()-1; m.headWord = sent.get(m.headIndex); m.headString = m.headWord.word().toLowerCase(Locale.ENGLISH); mentions.add(m); mentionSpanSet.add(mSpan); } }
/** Returns a chunk annotation based on char offsets. * * @param annotation Annotation from which to extract the text for this chunk * @param charOffsetStart Start character offset * @param charOffsetEnd End (not inclusive) character offset * @return An Annotation representing the new chunk. Or {@code null} if no chunk matches offsets. */ public static CoreMap getAnnotatedChunkUsingCharOffsets(CoreMap annotation, int charOffsetStart, int charOffsetEnd) { // TODO: make more efficient search List<CoreMap> cm = getAnnotatedChunksUsingSortedCharOffsets(annotation, CollectionUtils.makeList(new IntPair(charOffsetStart, charOffsetEnd))); if ( ! cm.isEmpty()) { return cm.get(0); } else { return null; } }
private static void findSpeakersInArticle(Document doc, Dictionaries dict) { List<CoreMap> sentences = doc.annotation.get(CoreAnnotations.SentencesAnnotation.class); IntPair beginQuotation = null; IntPair endQuotation = null; boolean insideQuotation = false; int utterNum = -1; for (int i = 0 ; i < sentences.size(); i++) { List<CoreLabel> sent = sentences.get(i).get(CoreAnnotations.TokensAnnotation.class); for(int j = 0 ; j < sent.size() ; j++) { int utterIndex = sent.get(j).get(CoreAnnotations.UtteranceAnnotation.class); if(utterIndex != 0 && !insideQuotation) { utterNum = utterIndex; insideQuotation = true; beginQuotation = new IntPair(i,j); } else if (utterIndex == 0 && insideQuotation) { insideQuotation = false; endQuotation = new IntPair(i,j); findQuotationSpeaker(doc, utterNum, sentences, beginQuotation, endQuotation, dict); } } } if(insideQuotation) { endQuotation = new IntPair(sentences.size()-1, sentences.get(sentences.size()-1).get(CoreAnnotations.TokensAnnotation.class).size()-1); findQuotationSpeaker(doc, utterNum, sentences, beginQuotation, endQuotation, dict); } }
public CorefChain(CorefCluster c, Map<Mention, IntTuple> positions){ chainID = c.clusterID; // Collect mentions mentions = new ArrayList<>(); mentionMap = Generics.newHashMap(); CorefMention represents = null; for (Mention m : c.getCorefMentions()) { CorefMention men = new CorefMention(m, positions.get(m)); mentions.add(men); } Collections.sort(mentions, new CorefMentionComparator()); // Find representative mention for (CorefMention men : mentions) { IntPair position = new IntPair(men.sentNum, men.headIndex); if (!mentionMap.containsKey(position)) mentionMap.put(position, Generics.<CorefMention>newHashSet()); mentionMap.get(position).add(men); if (men.moreRepresentativeThan(represents)) { represents = men; } } representative = represents; }
public CorefChain(CorefCluster c, Map<Mention, IntTuple> positions){ chainID = c.clusterID; // Collect mentions mentions = new ArrayList<>(); mentionMap = Generics.newHashMap(); CorefMention represents = null; for (Mention m : c.getCorefMentions()) { CorefMention men = new CorefMention(m, positions.get(m)); mentions.add(men); } Collections.sort(mentions, new CorefMentionComparator()); // Find representative mention for (CorefMention men : mentions) { IntPair position = new IntPair(men.sentNum, men.headIndex); if (!mentionMap.containsKey(position)) mentionMap.put(position, Generics.<CorefMention>newHashSet()); mentionMap.get(position).add(men); if (men.moreRepresentativeThan(represents)) { represents = men; } } representative = represents; }
protected static void addGoldMentions(List<CoreMap> sentences, List<Set<IntPair>> mentionSpanSetList, List<List<Mention>> predictedMentions, List<List<Mention>> allGoldMentions) { for (int i=0, sz = sentences.size(); i < sz; i++) { List<Mention> mentions = predictedMentions.get(i); CoreMap sent = sentences.get(i); List<CoreLabel> tokens = sent.get(TokensAnnotation.class); Set<IntPair> mentionSpanSet = mentionSpanSetList.get(i); List<Mention> golds = allGoldMentions.get(i); for (Mention g : golds) { IntPair pair = new IntPair(g.startIndex, g.endIndex); if(!mentionSpanSet.contains(pair)) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, g.startIndex, g.endIndex, tokens, sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), sent.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class) != null ? sent.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class) : sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), new ArrayList<>(tokens.subList(g.startIndex, g.endIndex))); mentions.add(m); mentionSpanSet.add(pair); } } } }
/** When mention boundaries are given */ public List<List<Mention>> filterPredictedMentions(List<List<Mention>> allGoldMentions, Annotation doc, Dictionaries dict){ List<List<Mention>> predictedMentions = new ArrayList<>(); for(int i = 0 ; i < allGoldMentions.size(); i++){ CoreMap s = doc.get(CoreAnnotations.SentencesAnnotation.class).get(i); List<Mention> goldMentions = allGoldMentions.get(i); List<Mention> mentions = new ArrayList<>(); predictedMentions.add(mentions); mentions.addAll(goldMentions); findHead(s, mentions); // todo [cdm 2013]: This block seems to do nothing - the two sets are never used Set<IntPair> mentionSpanSet = Generics.newHashSet(); Set<IntPair> namedEntitySpanSet = Generics.newHashSet(); for(Mention m : mentions) { mentionSpanSet.add(new IntPair(m.startIndex, m.endIndex)); if(!m.headWord.get(CoreAnnotations.NamedEntityTagAnnotation.class).equals("O")) { namedEntitySpanSet.add(new IntPair(m.startIndex, m.endIndex)); } } setBarePlural(mentions); removeSpuriousMentions(s, mentions, dict); } return predictedMentions; }
/** When mention boundaries are given */ public List<List<Mention>> filterPredictedMentions(List<List<Mention>> allGoldMentions, Annotation doc, Dictionaries dict, Properties props){ List<List<Mention>> predictedMentions = new ArrayList<>(); for(int i = 0 ; i < allGoldMentions.size(); i++){ CoreMap s = doc.get(CoreAnnotations.SentencesAnnotation.class).get(i); List<Mention> goldMentions = allGoldMentions.get(i); List<Mention> mentions = new ArrayList<>(); predictedMentions.add(mentions); mentions.addAll(goldMentions); findHead(s, mentions); // todo [cdm 2013]: This block seems to do nothing - the two sets are never used Set<IntPair> mentionSpanSet = Generics.newHashSet(); Set<IntPair> namedEntitySpanSet = Generics.newHashSet(); for(Mention m : mentions) { mentionSpanSet.add(new IntPair(m.startIndex, m.endIndex)); if(!m.headWord.get(CoreAnnotations.NamedEntityTagAnnotation.class).equals("O")) { namedEntitySpanSet.add(new IntPair(m.startIndex, m.endIndex)); } } setBarePlural(mentions); } removeSpuriousMentions(doc, predictedMentions, dict, CorefProperties.removeNestedMentions(props), lang); return predictedMentions; }
public Index<IntPair> createIndex() { Index<IntPair> index = new HashIndex<>(); for (int x = 0; x < px.length; x++) { int numberY = numY(x); for (int y = 0; y < numberY; y++) { index.add(new IntPair(x, y)); } } return index; }
private IntPair getNPSpanOld(IndexedWord headword, SemanticGraph dep, List<CoreLabel> sent) { IndexedWord cop = dep.getChildWithReln(headword, UniversalEnglishGrammaticalRelations.COPULA); Pair<IndexedWord, IndexedWord> leftRight = SemanticGraphUtils.leftRightMostChildVertices(headword, dep); // headword can be first or last word int beginIdx = Math.min(headword.index()-1, leftRight.first.index()-1); int endIdx = Math.max(headword.index()-1, leftRight.second.index()-1); // no copula relation if(cop==null) return new IntPair(beginIdx, endIdx); // if we have copula relation List<IndexedWord> children = dep.getChildList(headword); int copIdx = children.indexOf(cop); if(copIdx+1 < children.size()) { beginIdx = Math.min(headword.index()-1, SemanticGraphUtils.leftMostChildVertice(children.get(copIdx+1), dep).index()-1); } else { beginIdx = headword.index()-1; } return new IntPair(beginIdx, endIdx); }
protected static void extractNPorPRP(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) { List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class); Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class); tree.indexLeaves(); SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class); TregexPattern tgrepPattern = npOrPrpMentionPattern; TregexMatcher matcher = tgrepPattern.matcher(tree); while (matcher.find()) { Tree t = matcher.getMatch(); List<Tree> mLeaves = t.getLeaves(); int beginIdx = ((CoreLabel)mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class)-1; int endIdx = ((CoreLabel)mLeaves.get(mLeaves.size()-1).label()).get(CoreAnnotations.IndexAnnotation.class); if (",".equals(sent.get(endIdx-1).word())) { endIdx--; } // try not to have span that ends with , IntPair mSpan = new IntPair(beginIdx, endIdx); if(!mentionSpanSet.contains(mSpan) && !insideNE(mSpan, namedEntitySpanSet)) { int dummyMentionId = -1; Mention m = new Mention(dummyMentionId, beginIdx, endIdx, dependency, new ArrayList<>(sent.subList(beginIdx, endIdx)), t); mentions.add(m); mentionSpanSet.add(mSpan); } } } /** Extract enumerations (A, B, and C) */
public static IntTuple getIntTuple(int num) { if (num == 1) { return new IntUni(); } if ((num == 2)) { return new IntPair(); } if (num == 3) { return new IntTriple(); } if (num == 4) { return new IntQuadruple(); } else { return new IntTuple(num); } }
/** * Same as int constituents but just puts the span as an IntPair * in the CoreLabel of the nodes. * * @param left The left position to begin labeling from * @return The index of the right frontier of the constituent */ private int constituentsNodes(int left) { if (isLeaf()) { if (label() instanceof CoreLabel) { ((CoreLabel) label()).set(CoreAnnotations.SpanAnnotation.class, new IntPair(left, left)); } else { throw new UnsupportedOperationException("Can only set spans on trees which use CoreLabel"); } return (left + 1); } int position = left; // enumerate through daughter trees Tree[] kids = children(); for (Tree kid : kids) position = kid.constituentsNodes(position); //Parent span if (label() instanceof CoreLabel) { ((CoreLabel) label()).set(CoreAnnotations.SpanAnnotation.class, new IntPair(left, position - 1)); } else { throw new UnsupportedOperationException("Can only set spans on trees which use CoreLabel"); } return position; }