edu.stanford.nlp.ling.CoreLabel.get java code examples

Refine search

private static int findStartOfNERAnnotation(List<CoreLabel> tokens, int start) {
 String type = tokens.get(start).get(CoreAnnotations.NamedEntityTagAnnotation.class);
 while (start >= 0 && type.equals(tokens.get(start).get(CoreAnnotations.NamedEntityTagAnnotation.class)))
  start--;
 return start + 1;
}

private static int findEndOfAnswerAnnotation(List<CoreLabel> tokens, int start) {
 String type = tokens.get(start).get(CoreAnnotations.AnswerAnnotation.class);
 while (start < tokens.size() && type.equals(tokens.get(start).get(CoreAnnotations.AnswerAnnotation.class)))
  start++;
 return start;
}

@Override
public void printAnswers(List<CoreLabel> doc, PrintWriter out) {
 for (CoreLabel wi : doc) {
  String answer = wi.get(CoreAnnotations.AnswerAnnotation.class);
  String goldAnswer = wi.get(CoreAnnotations.GoldAnswerAnnotation.class);
  out.println(wi.word() + "\t" + goldAnswer + "\t" + answer);
 }
 out.println();
}

 private static List<String> getContentWords(Mention m) {
  List<String> words = new ArrayList<>();
  for (int i = m.startIndex; i < m.endIndex; i++) {
   CoreLabel cl = m.sentenceWords.get(i);
   String POS = cl.get(CoreAnnotations.PartOfSpeechAnnotation.class);
   if (POS.equals("NN") || POS.equals("NNS") || POS.equals("NNP") || POS.equals("NNPS")) {
    words.add(cl.word().toLowerCase());
   }
  }
  return words;
 }
}

private static List<String> getTokenStrs(List<CoreLabel> tokens) {
 List<String> mainTokenStrs = new ArrayList<>(tokens.size());
 for (CoreLabel token:tokens) {
  String text = token.get(CoreAnnotations.TextAnnotation.class);
  mainTokenStrs.add(text);
 }
 return mainTokenStrs;
}

private List<String> getMentionString() {
 List<String> mStr = new ArrayList<>();
 for(CoreLabel l : this.originalSpan) {
  mStr.add(l.get(CoreAnnotations.TextAnnotation.class).toLowerCase());
  if(l==this.headWord) break;   // remove words after headword
 }
 return mStr;
}

public int tokenToLocation(CoreLabel token) {
 CoreMap sentence = doc.get(CoreAnnotations.SentencesAnnotation.class).get(
   token.get(CoreAnnotations.SentenceIndexAnnotation.class));
 return sentence.get(CoreAnnotations.TokenBeginAnnotation.class) +
   token.get(CoreAnnotations.IndexAnnotation.class) - 1;
}

private int sample(Map<String, DataInstance> sents, Random r, Random rneg, double perSelectNeg, double perSelectRand, int numrand, List<Pair<String, Integer>> chosen, RVFDataset<String, String> dataset){
 for (Entry<String, DataInstance> en : sents.entrySet()) {
  CoreLabel[] sent = en.getValue().getTokens().toArray(new CoreLabel[0]);
  for (int i = 0; i < sent.length; i++) {
   CoreLabel l = sent[i];
   boolean chooseThis = false;
   if (l.get(answerClass).equals(answerLabel)){
    chooseThis = true;
    }
   else if ((!l.get(answerClass).equals("O") || negativeWords.contains(l
     .word().toLowerCase())) && getRandomBoolean(r, perSelectNeg)) {
    chooseThis = true;
   } else if (getRandomBoolean(r, perSelectRand)) {
    numrand++;
    chooseThis = true;
   } else
    chooseThis = false;
   if (chooseThis) {
    chosen.add(new Pair(en.getKey(), i));
    RVFDatum<String, String> d = getDatum(sent, i);
    dataset.add(d, en.getKey(), Integer.toString(i));
   }
  }
 }
 return numrand;
}

protected static void setBarePlural(List<Mention> mentions) {
 for (Mention m : mentions) {
  String pos = m.headWord.get(CoreAnnotations.PartOfSpeechAnnotation.class);
  if(m.originalSpan.size()==1 && pos.equals("NNS")) m.generic = true;
 }
}

protected static void extractPremarkedEntityMentions(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
 List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
 SemanticGraph dependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
 int beginIndex = -1;
 for(CoreLabel w : sent) {
  MultiTokenTag t = w.get(CoreAnnotations.MentionTokenAnnotation.class);
  if (t != null) {
    beginIndex = w.get(CoreAnnotations.IndexAnnotation.class) - 1;
    int endIndex = w.get(CoreAnnotations.IndexAnnotation.class);
    if (beginIndex >= 0) {
     IntPair mSpan = new IntPair(beginIndex, endIndex);
     int dummyMentionId = -1;
     Mention m = new Mention(dummyMentionId, beginIndex, endIndex, dependency, new ArrayList<>(sent.subList(beginIndex, endIndex)));
     mentions.add(m);
     mentionSpanSet.add(mSpan);
     beginIndex = -1;
    } else {
     SieveCoreferenceSystem.logger.warning("Start of marked mention not found in sentence: "
         + t + " at tokenIndex=" + (w.get(CoreAnnotations.IndexAnnotation.class)-1)+ " for "
         + s.get(CoreAnnotations.TextAnnotation.class));

private static Set<String> getPropers(Mention m) {
 Set<String> propers = new HashSet<>();
 for (int i = m.startIndex; i < m.endIndex; i++) {
  CoreLabel cl = m.sentenceWords.get(i);
  String POS = cl.get(CoreAnnotations.PartOfSpeechAnnotation.class);
  String word = cl.word().toLowerCase();
  if (PROPERS.contains(POS)) {
   propers.add(word);
  }
 }
 return propers;
}

private static boolean findSpeaker(Document doc, int utterNum, int sentNum, List<CoreMap> sentences,
  int startIndex, int endIndex, Dictionaries dict) {
 List<CoreLabel> sent = sentences.get(sentNum).get(CoreAnnotations.TokensAnnotation.class);
 for(int i = startIndex ; i < endIndex ; i++) {
  CoreLabel cl = sent.get(i);
  if(cl.get(CoreAnnotations.UtteranceAnnotation.class)!=0) continue;
  String lemma = cl.lemma();
  String word = cl.word();
  if(dict.reportVerb.contains(lemma) && cl.tag().startsWith("V")) {
   // find subject
   SemanticGraph dependency = sentences.get(sentNum).get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
   if (dependency == null) {
    dependency = sentences.get(sentNum).get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
   }
   IndexedWord w = dependency.getNodeByWordPattern(word);
   if (w != null) {
    if(findSubject(doc, dependency, w, sentNum, utterNum)) return true;
    for(IndexedWord p : dependency.getPathToRoot(w)) {
     if(!p.tag().startsWith("V") && !p.tag().startsWith("MD")) break;
     if(findSubject(doc, dependency, p, sentNum, utterNum)) return true;    // handling something like "was talking", "can tell"
    }
   } else {
    Redwood.log("debug-preprocessor", "Cannot find node in dependency for word " + word);
   }
  }
 }
 return false;
}

private static boolean hasSpeakerAnnotations(Annotation annotation) {
 for (CoreMap sentence : annotation.get(CoreAnnotations.SentencesAnnotation.class)) {
  for (CoreLabel t : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
   if (t.get(CoreAnnotations.SpeakerAnnotation.class) != null) {
    return true;
   }
  }
 }
 return false;
}

/**
 * Outputs a partial CONLL-U file with token information (form, lemma, POS)
 * but without any dependency information.
 *
 * @param sentence
 * @return
 */
public String printPOSAnnotations(CoreMap sentence) {
  StringBuilder sb = new StringBuilder();
  for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
    String upos = token.getString(CoreAnnotations.CoarseTagAnnotation.class, "_");
    String lemma = token.getString(CoreAnnotations.LemmaAnnotation.class, "_");
    String pos = token.getString(CoreAnnotations.PartOfSpeechAnnotation.class, "_");
    String featuresString = CoNLLUUtils.toFeatureString(token.get(CoreAnnotations.CoNLLUFeats.class));
    String misc = token.getString(CoreAnnotations.CoNLLUMisc.class, "_");
    sb.append(String.format("%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s%n", token.index(), token.word(),
      lemma, upos , pos, featuresString, "_", "_", "_", misc));
  }
  sb.append("\n");
  return sb.toString();
 }

PrintStream os = null;
for (CoreMap sentence : sentences) {
  String myDocid = sentence.get(CoreAnnotations.DocIDAnnotation.class);
  if(docid == null || ! myDocid.equals(docid)){
    if(os != null){
  String w = word.word().replaceAll("[ \t\n]+", "_");
  String t = word.get(CoreAnnotations.PartOfSpeechAnnotation.class);
  String l = word.get(CoreAnnotations.AnswerAnnotation.class);
  String nl = l;
  if(! alreadyBIO && ! l.equals("O")){

/** Write a standard CoNLL format output file.
 *
 *  @param doc The document: A List of CoreLabel
 *  @param out Where to send the answers to
 */
@Override
@SuppressWarnings({"StringEquality", "StringContatenationInLoop"})
public void printAnswers(List<CoreLabel> doc, PrintWriter out) {
 // boolean tagsMerged = flags.mergeTags;
 // boolean useHead = flags.splitOnHead;
 if ( ! "iob1".equalsIgnoreCase(flags.entitySubclassification)) {
  deEndify(doc);
 }
 for (CoreLabel fl : doc) {
  String word = fl.word();
  if (word == BOUNDARY) { // Using == is okay, because it is set to constant
   out.println();
  } else {
   String gold = fl.getString(CoreAnnotations.GoldAnswerAnnotation.class);
   String guess = fl.get(CoreAnnotations.AnswerAnnotation.class);
   // log.info(word + "\t" + gold + "\t" + guess));
   String pos = fl.getString(CoreAnnotations.PartOfSpeechAnnotation.class);
   String chunk = fl.getString(CoreAnnotations.ChunkAnnotation.class);
   out.println(fl.word() + '\t' + pos + '\t' + chunk + '\t' +
         gold + '\t' + guess);
  }
 }
}

private Gender getGender(Dictionaries dict, List<String> mStr) {
 int len = mStr.size();
 char firstLetter = headWord.get(CoreAnnotations.TextAnnotation.class).charAt(0);
 if(len > 1 && Character.isUpperCase(firstLetter) && nerString.startsWith("PER")) {
  int firstNameIdx = len-2;
  String secondToLast = mStr.get(firstNameIdx);
  if(firstNameIdx > 1 && (secondToLast.length()==1 || (secondToLast.length()==2 && secondToLast.endsWith(".")))) {
   firstNameIdx--;
  }
  for(int i = 0 ; i <= firstNameIdx ; i++){
   if(dict.genderNumber.containsKey(mStr.subList(i, len))) return dict.genderNumber.get(mStr.subList(i, len));
  }
  // find converted string with ! (e.g., "dr. martin luther king jr. boulevard" -> "dr. !")
  List<String> convertedStr = new ArrayList<>(2);
  convertedStr.add(mStr.get(firstNameIdx));
  convertedStr.add("!");
  if (dict.genderNumber.containsKey(convertedStr)) return dict.genderNumber.get(convertedStr);
  if (dict.genderNumber.containsKey(mStr.subList(firstNameIdx, firstNameIdx+1))) return dict.genderNumber.get(mStr.subList(firstNameIdx, firstNameIdx+1));
 }
 if (mStr.size() > 0 && dict.genderNumber.containsKey(mStr.subList(len-1, len))) return dict.genderNumber.get(mStr.subList(len-1, len));
 return null;
}

private static int findEndOfNERAnnotation(List<CoreLabel> tokens, int start) {
 String type = tokens.get(start).get(CoreAnnotations.NamedEntityTagAnnotation.class);
 while (start < tokens.size() && type.equals(tokens.get(start).get(CoreAnnotations.NamedEntityTagAnnotation.class)))
  start++;
 return start;
}

private static List<String> getMainTokenStrs(List<CoreLabel> tokens) {
 List<String> mainTokenStrs = new ArrayList<>(tokens.size());
 for (CoreLabel token:tokens) {
  String text = token.get(CoreAnnotations.TextAnnotation.class);
  if (!text.isEmpty() && ( text.length() >= 4 || Character.isUpperCase(text.charAt(0))) ) {
   mainTokenStrs.add(text);
  }
 }
 return mainTokenStrs;
}

public String longestNNPEndsWithHead (){
 String ret = "";
 for (int i = headIndex; i >=startIndex ; i--){
  String pos = sentenceWords.get(i).get(CoreAnnotations.PartOfSpeechAnnotation.class);
  if(!pos.startsWith("NNP")) break;
  if(!ret.equals("")) ret = " "+ret;
  ret = sentenceWords.get(i).get(CoreAnnotations.TextAnnotation.class)+ret;
 }
 return ret;
}

Popular methods of CoreLabel

set
word
beginPosition
endPosition
originalText
<init>
This constructor attempts to parse the String keys into Class keys. It's mainly useful for reading f
index
lemma
setWord
Set the word value for the label. Also, clears the lemma, since that may have changed if the word ch
getString
setIndex
setTag

Popular in Java

Parsing JSON documents to java classes using gson
getContentResolver (Context)
getApplicationContext (Context)
getExternalFilesDir (Context)
Socket (java.net)
Provides a client-side TCP socket.
SQLException (java.sql)
An exception that indicates a failed JDBC operation. It provides the following information about pro
BitSet (java.util)
The BitSet class implements abit array [http://en.wikipedia.org/wiki/Bit_array]. Each element is eit
Calendar (java.util)
Calendar is an abstract base class for converting between a Date object and a set of integer fields
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
ImageIO (javax.imageio)
Top Sublime Text plugins

How to use getmethodin edu.stanford.nlp.ling.CoreLabel

Best Java code snippets using edu.stanford.nlp.ling.CoreLabel.get (Showing top 20 results out of 315)

Refine search

How to use
get
method
in
edu.stanford.nlp.ling.CoreLabel