private static int getParagraphEndToken(CoreMap sentence, List<CoreMap> sentences) { int quoteParagraphId = sentence.get(CoreAnnotations.ParagraphIndexAnnotation.class); int paragraphEndToken = sentence.get(CoreAnnotations.TokenEndAnnotation.class) - 1; for(int i = sentence.get(CoreAnnotations.SentenceIndexAnnotation.class); i < sentences.size(); i++) { CoreMap currSentence = sentences.get(i); if(currSentence.get(CoreAnnotations.ParagraphIndexAnnotation.class) == quoteParagraphId) { paragraphEndToken = currSentence.get(CoreAnnotations.TokenEndAnnotation.class) - 1; } else { break; } } return paragraphEndToken; }
private static Map<Integer, List<CoreMap>> getQuotesInParagraph(Annotation doc) { List<CoreMap> quotes = doc.get(CoreAnnotations.QuotationsAnnotation.class); List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class); Map<Integer, List<CoreMap>> paragraphToQuotes = new HashMap<>(); for(CoreMap quote : quotes) { CoreMap sentence = sentences.get(quote.get(CoreAnnotations.SentenceBeginAnnotation.class)); paragraphToQuotes.putIfAbsent(sentence.get(CoreAnnotations.ParagraphIndexAnnotation.class), new ArrayList<>()); paragraphToQuotes.get(sentence.get(CoreAnnotations.ParagraphIndexAnnotation.class)).add(quote); } return paragraphToQuotes; }
public static boolean isAcronym(String str, List<?> tokens) { List<String> strs = new ArrayList<>(tokens.size()); for (Object tok : tokens) { if (tok instanceof String) { strs.add(tok.toString()); } else if (tok instanceof CoreMap) { strs.add(((CoreMap) tok).get(CoreAnnotations.TextAnnotation.class)); } else { strs.add(tok.toString()); } } return isAcronymImpl(str, strs); }
/** * extracts the entity starting at the given position * and adds it to the entity list. returns the index * of the last element in the entity (<b>not</b> index+1) **/ public Entity extractEntity(int[] sequence, int position) { Entity entity = new Entity(); entity.type = sequence[position]; entity.startPosition = position; entity.words = new ArrayList<>(); for ( ; position < sequence.length; position++) { if (sequence[position] == entity.type) { String word = doc.get(position).get(CoreAnnotations.TextAnnotation.class); entity.words.add(word); if (position == sequence.length - 1) { entity.otherOccurrences = otherOccurrences(entity); } } else { entity.otherOccurrences = otherOccurrences(entity); break; } } return entity; }
public static void addSentences(CoreMap dataset, List<CoreMap> sentences) { List<CoreMap> sents = dataset.get(CoreAnnotations.SentencesAnnotation.class); if(sents == null){ sents = new ArrayList<>(); dataset.set(CoreAnnotations.SentencesAnnotation.class, sents); } for(CoreMap sentence: sentences){ sents.add(sentence); } }
public static List<CoreMap> getSentsInParagraph(Annotation doc, int paragraph) { List<CoreMap> sents = doc.get(CoreAnnotations.SentencesAnnotation.class); List<CoreMap> targets = Generics.newArrayList(); for (CoreMap sent : sents) { if (sent.get(CoreAnnotations.ParagraphIndexAnnotation.class) == paragraph) { targets.add(sent); } } return sents; }
@Override public void doOneFailedSentence(Annotation annotation, CoreMap sentence) { final List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class); Tree tree = ParserUtils.xTree(words); for (CoreLabel word : words) { if (word.tag() == null) { word.setTag("XX"); } } List<Tree> trees = Generics.newArrayList(1); trees.add(tree); finishSentence(sentence, trees); }
public MentionData getClosestMention(CoreMap quote) { MentionData closestBackward = findClosestMentionInSpanBackward(new Pair<>(0, quote.get(CoreAnnotations.TokenBeginAnnotation.class) - 1)); MentionData closestForward = findClosestMentionInSpanForward(new Pair<>(quote.get(CoreAnnotations.TokenEndAnnotation.class), doc.get(CoreAnnotations.TokensAnnotation.class).size() - 1)); int backDistance = quote.get(CoreAnnotations.TokenBeginAnnotation.class) - closestBackward.end; int forwardDistance = closestForward.begin - quote.get(CoreAnnotations.TokenEndAnnotation.class) + 1; if(backDistance < forwardDistance) { return closestBackward; } else { return closestForward; } }
/** * Create a copy of srcTokens, detecting on the fly if character offsets need adjusting * @param srcTokens * @param srcSentence */ public static List<CoreLabel> copyTokens(List<CoreLabel> srcTokens, CoreMap srcSentence) { boolean adjustCharacterOffsets = false; if (srcSentence == null || srcSentence.get(CoreAnnotations.TextAnnotation.class) == null || srcTokens.isEmpty() || srcTokens.get(0).get(CoreAnnotations.OriginalTextAnnotation.class) == null) { adjustCharacterOffsets = true; } return copyTokens(srcTokens, adjustCharacterOffsets, true); }
public static void addEntityMentions(CoreMap sentence, Collection<EntityMention> args) { List<EntityMention> l = sentence.get(MachineReadingAnnotations.EntityMentionsAnnotation.class); if(l == null){ l = new ArrayList<>(); sentence.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, l); } l.addAll(args); }
private static boolean hasSpeakerAnnotations(Annotation annotation) { for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) { for (CoreLabel t : sentence.get(CoreAnnotations.TokensAnnotation.class)) { if (t.get(CoreAnnotations.SpeakerAnnotation.class) != null) { return true; } } } return false; }
/** * Set index for each token and sentence in the document. * @param doc */ private static void setTokenIndices(Document doc) { int token_index = 0; for (CoreMap sent : doc.annotation.get(SentencesAnnotation.class)) { for (CoreLabel token : sent.get(TokensAnnotation.class)) { token.set(TokenBeginAnnotation.class, token_index++); } } }
public int tokenToLocation(CoreLabel token) { CoreMap sentence = doc.get(CoreAnnotations.SentencesAnnotation.class).get( token.get(CoreAnnotations.SentenceIndexAnnotation.class)); return sentence.get(CoreAnnotations.TokenBeginAnnotation.class) + token.get(CoreAnnotations.IndexAnnotation.class) - 1; }
public EntityCachingAbstractSequencePriorBIO(String backgroundSymbol, Index<String> classIndex, Index<String> tagIndex, List<IN> doc) { this.classIndex = classIndex; this.tagIndex = tagIndex; this.backgroundSymbol = classIndex.indexOf(backgroundSymbol); this.numClasses = classIndex.size(); this.possibleValues = new int[numClasses]; for (int i=0; i<numClasses; i++) { possibleValues[i] = i; } this.wordDoc = new ArrayList<>(doc.size()); for (IN w: doc) { wordDoc.add(w.get(CoreAnnotations.TextAnnotation.class)); } }