/** Fetches the first encountered Number set by SUTime */ private static <E extends CoreMap> Number fetchNumberFromSUTime(List<E> l) { for(E e: l) { if(e.containsKey(CoreAnnotations.NumericCompositeValueAnnotation.class)){ return e.get(CoreAnnotations.NumericCompositeValueAnnotation.class); } } return null; }
private static <E extends CoreMap> Timex fetchTimexFromSUTime(List<E> l) { for(E e: l) { if(e.containsKey(TimeAnnotations.TimexAnnotation.class)){ return e.get(TimeAnnotations.TimexAnnotation.class); } } return null; }
private Optional<String> getPubDate(CoreMap document) { //--Get Date //(error checks) if (!document.containsKey(CoreAnnotations.CalendarAnnotation.class) && !document.containsKey(CoreAnnotations.DocDateAnnotation.class)) { throw new IllegalArgumentException("CoreMap must have either a Calendar or DocDate annotation"); //not strictly necessary, technically... } //(variables) Calendar dateCalendar = document.get(CoreAnnotations.CalendarAnnotation.class); if (dateCalendar != null) { //(case: calendar annotation) return Optional.of(String.format("%TF", dateCalendar)); } else { //(case: docdateannotation) String s = document.get(CoreAnnotations.DocDateAnnotation.class); if (s != null) { return Optional.of(s); } } return Optional.empty(); }
public static String getTokenText(List<? extends CoreMap> tokens, Class tokenTextKey, String delimiter) { StringBuilder sb = new StringBuilder(); int prevEndIndex = -1; for (CoreMap cm:tokens) { Object obj = cm.get(tokenTextKey); boolean includeDelimiter = sb.length() > 0; if (cm.containsKey(CoreAnnotations.CharacterOffsetBeginAnnotation.class) && cm.containsKey(CoreAnnotations.CharacterOffsetEndAnnotation.class)) { int beginIndex = cm.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class); int endIndex = cm.get(CoreAnnotations.CharacterOffsetEndAnnotation.class); if (prevEndIndex == beginIndex) { // No spaces includeDelimiter = false; } prevEndIndex = endIndex; } if (obj != null) { if (includeDelimiter) { sb.append(delimiter); } sb.append(obj); } } return sb.toString(); }
for (int i = 0; i < list.size(); i++) { CoreMap cm = list.get(i); if (cm.containsKey(CoreAnnotations.TokenBeginAnnotation.class) && cm.containsKey(CoreAnnotations.TokenEndAnnotation.class)) { tokenBeginToListIndexMap.put(cm.get(CoreAnnotations.TokenBeginAnnotation.class), i); tokenEndToListIndexMap.put(cm.get(CoreAnnotations.TokenEndAnnotation.class), i+1);
/** Make a new Annotation from a List of tokenized sentences. */ public Annotation(List<CoreMap> sentences) { super(); this.set(CoreAnnotations.SentencesAnnotation.class, sentences); List<CoreLabel> tokens = new ArrayList<>(); StringBuilder text = new StringBuilder(); for (CoreMap sentence : sentences) { List<CoreLabel> sentenceTokens = sentence.get(CoreAnnotations.TokensAnnotation.class); tokens.addAll(sentenceTokens); if (sentence.containsKey(CoreAnnotations.TextAnnotation.class)) { text.append(sentence.get(CoreAnnotations.TextAnnotation.class)); } else { // If there is no text in the sentence, fake it as best as we can if (text.length() > 0) { text.append('\n'); } text.append(SentenceUtils.listToString(sentenceTokens)); } } this.set(CoreAnnotations.TokensAnnotation.class, tokens); this.set(CoreAnnotations.TextAnnotation.class, text.toString()); }
/** * Copies annotation over to this CoreMap if not already set. */ public static void copyUnsetAnnotations(CoreMap src, CoreMap dest) { for (Class key : src.keySet()) { if ( ! dest.containsKey(key)) { dest.set(key, src.get(key)); } } }
public List<MatchedExpression> extract(CoreMap annotation) { if (!annotation.containsKey(CoreAnnotations.NumerizedTokensAnnotation.class)) { List<CoreMap> mergedNumbers = NumberNormalizer.findAndMergeNumbers(annotation); annotation.set(CoreAnnotations.NumerizedTokensAnnotation.class, mergedNumbers); } return extractor.extractExpressions(annotation); }
if(!document.containsKey(CoreAnnotations.CalendarAnnotation.class) && !document.containsKey(CoreAnnotations.DocDateAnnotation.class)){ throw new IllegalArgumentException("CoreMap must have either a Calendar or DocDate annotation"); //not strictly necessary, technically...
(!document.containsKey(CoreAnnotations.CalendarAnnotation.class) && !document.containsKey(CoreAnnotations.DocDateAnnotation.class));
/** * Returns SpanAnnotation of this node, or null if annotation is not assigned. * Use {@code setSpans()} to assign SpanAnnotations to a tree. * * @return an IntPair: the SpanAnnotation of this node. */ public IntPair getSpan() { if(label() instanceof CoreMap && ((CoreMap) label()).containsKey(CoreAnnotations.SpanAnnotation.class)) return ((CoreMap) label()).get(CoreAnnotations.SpanAnnotation.class); return null; }
/** * Convert a CoreMap into a simple Sentence object. * Note that this is a copy operation -- the implementing CoreMap will not be updated, and all of its * contents are copied over to the protocol buffer format backing the {@link Sentence} object. * * @param sentence The CoreMap representation of the sentence. */ public Sentence(CoreMap sentence) { this(new Document(new Annotation(sentence.get(CoreAnnotations.TextAnnotation.class)) {{ set(CoreAnnotations.SentencesAnnotation.class, Collections.singletonList(sentence)); if (sentence.containsKey(CoreAnnotations.DocIDAnnotation.class)) { set(CoreAnnotations.DocIDAnnotation.class, sentence.get(CoreAnnotations.DocIDAnnotation.class)); } }}), sentence); }
if (!other.containsKey(key)) { return false;
if (sentences.size() > 0 && !sentences.get(0).containsKey(TreeCoreAnnotations.TreeAnnotation.class)) { logger.info("Annotating dataset with " + processor); processor.annotate(dataset);
if (token.containsKey(CoreAnnotations.CharacterOffsetBeginAnnotation.class) && token.containsKey(CoreAnnotations.CharacterOffsetEndAnnotation.class)) { setSingleElement(wordInfo, "CharacterOffsetBegin", curNS, Integer.toString(token.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class))); setSingleElement(wordInfo, "CharacterOffsetEnd", curNS, Integer.toString(token.get(CoreAnnotations.CharacterOffsetEndAnnotation.class))); if (token.containsKey(CoreAnnotations.PartOfSpeechAnnotation.class)) { setSingleElement(wordInfo, "POS", curNS, token.get(CoreAnnotations.PartOfSpeechAnnotation.class)); if (token.containsKey(CoreAnnotations.NamedEntityTagAnnotation.class)) { setSingleElement(wordInfo, "NER", curNS, token.get(CoreAnnotations.NamedEntityTagAnnotation.class)); if (token.containsKey(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class)) { setSingleElement(wordInfo, "NormalizedNER", curNS, token.get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class)); if (token.containsKey(CoreAnnotations.SpeakerAnnotation.class)) { setSingleElement(wordInfo, "Speaker", curNS, token.get(CoreAnnotations.SpeakerAnnotation.class)); if (token.containsKey(TimeAnnotations.TimexAnnotation.class)) { Timex timex = token.get(TimeAnnotations.TimexAnnotation.class); Element timexElem = new Element("Timex", curNS); if (token.containsKey(CoreAnnotations.TrueCaseAnnotation.class)) { Element cur = new Element("TrueCase", curNS); cur.appendChild(token.get(CoreAnnotations.TrueCaseAnnotation.class)); wordInfo.appendChild(cur); if (token.containsKey(CoreAnnotations.TrueCaseTextAnnotation.class)) { Element cur = new Element("TrueCaseText", curNS); cur.appendChild(token.get(CoreAnnotations.TrueCaseTextAnnotation.class));
if (sentence.containsKey(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class)) { for (IndexedWord token : sentence.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class).vertexSet()) { token.set(PolarityAnnotation.class, Polarity.DEFAULT); if (sentence.containsKey(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class)) { for (IndexedWord token : sentence.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class).vertexSet()) { token.set(PolarityAnnotation.class, Polarity.DEFAULT); if (sentence.containsKey(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class)) { for (IndexedWord token : sentence.get(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class).vertexSet()) { token.set(PolarityAnnotation.class, Polarity.DEFAULT);
public List<TimeExpression> extractTimeExpressions(CoreMap annotation, SUTime.Time refDate, SUTime.TimeIndex timeIndex) { if (!annotation.containsKey(CoreAnnotations.NumerizedTokensAnnotation.class)) { try { List<CoreMap> mergedNumbers = NumberNormalizer.findAndMergeNumbers(annotation);
int end = timeExpression.get(CoreAnnotations.TokenEndAnnotation.class); int offset = 0; if(sentence != null && sentence.containsKey(CoreAnnotations.TokenBeginAnnotation.class)) { offset = sentence.get(CoreAnnotations.TokenBeginAnnotation.class); if(number.containsKey(CoreAnnotations.NumericCompositeValueAnnotation.class)){ int start = number.get(CoreAnnotations.TokenBeginAnnotation.class); int end = number.get(CoreAnnotations.TokenEndAnnotation.class); int offset = 0; if(sentence != null && sentence.containsKey(CoreAnnotations.TokenBeginAnnotation.class)) { offset = sentence.get(CoreAnnotations.TokenBeginAnnotation.class);
if (sentence.containsKey(TokensAnnotation.class)) { for (CoreLabel tok : sentence.get(TokensAnnotation.class)) { builder.addToken(toProto(tok)); } keysToSerialize.remove(TokensAnnotation.class); if (sentence.containsKey(SegmenterCoreAnnotations.CharactersAnnotation.class)) { for (CoreLabel c : sentence.get(SegmenterCoreAnnotations.CharactersAnnotation.class)) { builder.addCharacter(toProto(c));
annotation = aggregator.merge((List<? extends CoreMap>) sourceAnnotation.get(tokensAnnotationKey), chunkOffsets.getBegin(), chunkOffsets.getEnd()); if (sourceAnnotation.containsKey(CoreAnnotations.TextAnnotation.class)) { ChunkAnnotationUtils.annotateChunkText(annotation, sourceAnnotation);