edu.stanford.nlp.pipeline.Annotation.get java code examples

Refine search

public Document(InputDoc input, List<List<Mention>> mentions) {
 this();
 this.annotation = input.annotation;
 this.predictedMentions = mentions;
 this.goldMentions = input.goldMentions;
 this.docInfo = input.docInfo;
 this.numSentences = input.annotation.get(SentencesAnnotation.class).size();
 this.conllDoc = input.conllDoc;   // null if it's not conll input
}

 protected int getQuoteParagraph(CoreMap quote) {
  List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
  return sentences.get(quote.get(CoreAnnotations.SentenceBeginAnnotation.class)).get(CoreAnnotations.ParagraphIndexAnnotation.class);
 }
}

public List<Integer> scanForAnimates(Pair<Integer, Integer> span) {
 List<Integer> animateIndices = new ArrayList<>();
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 for(int i = span.first; i <= span.second && i < tokens.size() ; i++)
 {
  CoreLabel token = tokens.get(i);
  if(animacySet.contains(token.word()))
   animateIndices.add(i);
 }
 return animateIndices;
}

public static Temporal parseOrNull(String str) {
 Annotation doc = new Annotation(str);
 pipeline.annotate(doc);
 if (doc.get(CoreAnnotations.SentencesAnnotation.class) == null) {
  return null;
 }
 if (doc.get(CoreAnnotations.SentencesAnnotation.class).isEmpty()) {
  return null;
 }
 List<CoreMap> timexAnnotations = doc.get(TimeAnnotations.TimexAnnotations.class);
 if (timexAnnotations.size() > 1) {
  return null;
 } else if (timexAnnotations.isEmpty()) {
  return null;
 }
 CoreMap timex = timexAnnotations.get(0);
 if (timex.get(TimeExpression.Annotation.class) == null) {
  return null;
 } else {
  return timex.get(TimeExpression.Annotation.class).getTemporal();
 }
}

public MentionData getClosestMention(CoreMap quote) {
  MentionData closestBackward = findClosestMentionInSpanBackward(new Pair<>(0, quote.get(CoreAnnotations.TokenBeginAnnotation.class) - 1));
  MentionData closestForward = findClosestMentionInSpanForward(new Pair<>(quote.get(CoreAnnotations.TokenEndAnnotation.class), doc.get(CoreAnnotations.TokensAnnotation.class).size() - 1));
  int backDistance = quote.get(CoreAnnotations.TokenBeginAnnotation.class) - closestBackward.end;
  int forwardDistance = closestForward.begin - quote.get(CoreAnnotations.TokenEndAnnotation.class) + 1;
  if(backDistance < forwardDistance) {
    return closestBackward;
  } else {
    return closestForward;
  }
}

public String tokenRangeToString(int token_idx) {
 return doc.get(CoreAnnotations.TokensAnnotation.class).get(token_idx).word();
}

/**
 * Set index for each token and sentence in the document.
 * @param doc
 */
private static void setTokenIndices(Document doc) {
 int token_index = 0;
 for (CoreMap sent : doc.annotation.get(SentencesAnnotation.class)) {
  for (CoreLabel token : sent.get(TokensAnnotation.class)) {
   token.set(TokenBeginAnnotation.class, token_index++);
  }
 }
}

private static void recallErrors(List<List<Mention>> goldMentions, List<List<Mention>> predictedMentions, Annotation doc) throws IOException {
 List<CoreMap> coreMaps = doc.get(CoreAnnotations.SentencesAnnotation.class);
 int numSentences = goldMentions.size();
 for (int i=0;i<numSentences;i++){
  CoreMap coreMap = coreMaps.get(i);
  List<CoreLabel> words = coreMap.get(CoreAnnotations.TokensAnnotation.class);
  Tree tree = coreMap.get(TreeCoreAnnotations.TreeAnnotation.class);
  List<Mention> goldMentionsSent = goldMentions.get(i);
  List<Pair<Integer,Integer>> goldMentionsSpans = extractSpans(goldMentionsSent);
  for (Pair<Integer,Integer> mentionSpan: goldMentionsSpans){
   logger.finer("RECALL ERROR\n");
   logger.finer(coreMap + "\n");
   for (int x=mentionSpan.first;x<mentionSpan.second;x++){
    logger.finer(words.get(x).value() + " ");
   }
   logger.finer("\n"+tree + "\n");
  }
 }
}

private static void mentionReordering(Document doc, HeadFinder headFinder) throws Exception {
 List<List<Mention>> mentions = doc.predictedMentions;
 List<CoreMap> sentences = doc.annotation.get(SentencesAnnotation.class);
 for (int i=0 ; i<sentences.size() ; i++) {
  List<Mention> mentionsInSent = mentions.get(i);
  mentions.set(i, mentionReorderingBySpan(mentionsInSent));
 }
}

private static int getQuoteChapter(Annotation doc, CoreMap quote) {
 List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
 return sentences.get(quote.get(CoreAnnotations.SentenceBeginAnnotation.class)).get(ChapterAnnotator.ChapterAnnotation.class);
}

 public void oneNameSentence(Annotation doc) {
  List<CoreMap> quotes = doc.get(CoreAnnotations.QuotationsAnnotation.class);
  for(CoreMap quote : quotes) {
   if (quote.get(QuoteAttributionAnnotator.MentionAnnotation.class) != null) {
    continue;
   }
   Pair<Integer, Integer> range = QuoteAttributionUtils.getRemainderInSentence(doc, quote);
   if(range == null) {
    continue;
   }

   Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> namesAndNameIndices = scanForNames(range);
   ArrayList<String> names = namesAndNameIndices.first;
   ArrayList<Pair<Integer, Integer>> nameIndices = namesAndNameIndices.second;

   ArrayList<Integer> pronounsIndices = scanForPronouns(range);
   if (names.size() == 1) {
    List<Person> p = characterMap.get(names.get(0));

    //guess if exactly one name
    if (p.size() == 1 && pronounsIndices.size() == 0) {
     fillInMention(quote, tokenRangeToString(nameIndices.get(0)), nameIndices.get(0).first, nameIndices.get(0).second,
         sieveName, NAME);
    }
   }
  }
 }
}

/** {@inheritDoc} */
@Override
public void annotate(Annotation annotation) {
 super.annotate(annotation);
 List<CoreLabel> words = annotation.get(CoreAnnotations.TokensAnnotation.class);
 if (words != null) {
  numWords += words.size();
 }
}

public  boolean rangeContainsCharIndex(Pair<Integer, Integer> tokenRange, int charIndex) {
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 CoreLabel startToken = tokens.get(tokenRange.first());
 CoreLabel endToken = tokens.get(tokenRange.second());
 int startTokenCharBegin  = startToken.beginPosition();
 int endTokenCharEnd = endToken.endPosition();
 return (startTokenCharBegin <= charIndex && charIndex <= endTokenCharEnd);
}

/**
 * Set index for each token and sentence in the document.
 * @param doc
 */
private static void setTokenIndices(Document doc) {
 int token_index = 0;
 for (CoreMap sent : doc.annotation.get(SentencesAnnotation.class)) {
  for (CoreLabel token : sent.get(TokensAnnotation.class)) {
   token.set(TokenBeginAnnotation.class, token_index++);
  }
 }
}

public static void addEnhancedSentences(Annotation doc) {
 //for every sentence that begins a paragraph: append this sentence and the previous one and see if sentence splitter would make a single sentence out of it. If so, add as extra sentence.
 //for each sieve that potentially uses augmentedSentences in original:
 List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
 WordToSentenceProcessor wsp =
     new WordToSentenceProcessor(WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER); //create SentenceSplitter that never splits on newline
 int prevParagraph = 0;
 for(int i = 1; i < sentences.size(); i++) {
  CoreMap sentence = sentences.get(i);
  CoreMap prevSentence = sentences.get(i-1);
  List<CoreLabel> tokensConcat = new ArrayList<>();
  tokensConcat.addAll(prevSentence.get(CoreAnnotations.TokensAnnotation.class));
  tokensConcat.addAll(sentence.get(CoreAnnotations.TokensAnnotation.class));
  List<List<CoreLabel>> sentenceTokens = wsp.process(tokensConcat);
  if(sentenceTokens.size() == 1) { //wsp would have put them into a single sentence --> add enhanced sentence.
   sentence.set(EnhancedSentenceAnnotation.class, constructSentence(sentenceTokens.get(0), prevSentence, sentence));
  }
 }
}

private static void findGoldMentionHeads(Document doc) {
 List<CoreMap> sentences = doc.annotation.get(SentencesAnnotation.class);
 for (int i=0 ; i<sentences.size() ; i++ ) {
  DependencyCorefMentionFinder.findHeadInDependency(sentences.get(i), doc.goldMentions.get(i));
 }
}

public static int getQuoteParagraphIndex(Annotation doc, CoreMap quote) {
 List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
 return sentences.get(quote.get(CoreAnnotations.SentenceBeginAnnotation.class)).get(CoreAnnotations.ParagraphIndexAnnotation.class);
}

/**
 * Convert a CoreNLP Annotation object to a Document.
 * @param ann The CoreNLP Annotation object.
 */
@SuppressWarnings("Convert2streamapi")
public Document(Properties props, Annotation ann) {
 this.defaultProps = props;
 StanfordCoreNLP.getDefaultAnnotatorPool(props, new AnnotatorImplementations());  // cache the annotator pool
 this.impl = new ProtobufAnnotationSerializer(false).toProtoBuilder(ann);
 List<CoreMap> sentences = ann.get(CoreAnnotations.SentencesAnnotation.class);
 this.sentences = new ArrayList<>(sentences.size());
 for (CoreMap sentence : sentences) {
  this.sentences.add(new Sentence(this, this.serializer.toProtoBuilder(sentence), sentence.get(CoreAnnotations.TextAnnotation.class), this.defaultProps));
 }
}

/** Print raw document for analysis */
public static String printRawDoc(Document document, boolean gold, boolean printClusterID) throws FileNotFoundException {
 StringBuilder sb = new StringBuilder();
 List<CoreMap> sentences = document.annotation.get(CoreAnnotations.SentencesAnnotation.class);
 StringBuilder doc = new StringBuilder();
 for(int i = 0 ; i<sentences.size(); i++) {
  doc.append(sentenceStringWithMention(i, document, gold, printClusterID));
  doc.append("\n");
 }
 sb.append("PRINT RAW DOC START\n");
 sb.append(document.annotation.get(CoreAnnotations.DocIDAnnotation.class)).append("\n");
 if (gold) {
  sb.append("New DOC: (GOLD MENTIONS) ==================================================\n");
 } else {
  sb.append("New DOC: (Predicted Mentions) ==================================================\n");
 }
 sb.append(doc.toString()).append("\n");
 sb.append("PRINT RAW DOC END").append("\n");
 return sb.toString();
}

private static CoreMap constructCoreMap(Annotation doc, Pair<Integer, Integer> run) {
 List<CoreLabel> tokens = doc.get(CoreAnnotations.TokensAnnotation.class);
 // check if the second part of the run is a *NL* token, adjust accordingly
 int endTokenIndex = run.second;
 while (endTokenIndex > 0 && tokens.get(endTokenIndex).get(CoreAnnotations.IsNewlineAnnotation.class)) {
  endTokenIndex--;
 }
 // get the sentence text from the first and last character offsets
 int begin = tokens.get(run.first).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
 int end = tokens.get(endTokenIndex).get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
 String sentenceText = doc.get(CoreAnnotations.TextAnnotation.class).substring(begin, end);
 List<CoreLabel> sentenceTokens = tokens.subList(run.first, endTokenIndex+1);
 // create a sentence annotation with text and token offsets
 CoreMap sentence = new Annotation(sentenceText);
 sentence.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, begin);
 sentence.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, end);
 sentence.set(CoreAnnotations.TokensAnnotation.class, sentenceTokens);
 return sentence;
}

Popular methods of Annotation

<init>
Make a new Annotation from a List of tokenized sentences.
set
containsKey
has
toShorterString
keySet
toString
The basic toString() method of an Annotation simply prints out the text over which any annotations h

Popular in Java

Creating JSON documents from java classes using gson
onCreateOptionsMenu (Activity)
setScale (BigDecimal)
scheduleAtFixedRate (Timer)
File (java.io)
An "abstract" representation of a file system entity identified by a pathname. The pathname may be a
System (java.lang)
Provides access to system-related information and resources including standard input and output. Ena
GregorianCalendar (java.util)
GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
Handler (java.util.logging)
A Handler object accepts a logging request and exports the desired messages to a target, for example
GridLayout (java.awt)
The GridLayout class is a layout manager that lays out a container's components in a rectangular gri
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
Best IntelliJ plugins

How to use getmethodin edu.stanford.nlp.pipeline.Annotation

Best Java code snippets using edu.stanford.nlp.pipeline.Annotation.get (Showing top 20 results out of 432)

Refine search

How to use
get
method
in
edu.stanford.nlp.pipeline.Annotation