edu.stanford.nlp.ling.CoreLabel.setTag java code examples

@Override
public void setTag(String tag) {
 label.setTag(tag);
}

public static List<CoreLabel> toCoreLabelList(String[] words, String[] tags) {
 assert tags.length == words.length;
 List<CoreLabel> tokens = new ArrayList<>(words.length);
 for (int i = 0, sz = words.length; i < sz; i++) {
  CoreLabel cl = new CoreLabel();
  cl.setWord(words[i]);
  cl.setTag(tags[i]);
  tokens.add(cl);
 }
 return tokens;
}

/**
 * Takes a sentence composed of CoreLabels and add the tags to the
 * CoreLabels, modifying the input sentence.  If reuseTags is set to
 * true, any tags supplied with the CoreLabels are taken as correct.
 */
public void tagCoreLabels(List<CoreLabel> sentence,
             boolean reuseTags) {
 List<TaggedWord> taggedWords = tagSentence(sentence, reuseTags);
 if (taggedWords.size() != sentence.size())
  throw new AssertionError("Tagged word list not the same length " +
               "as the original sentence");
 for (int i = 0, size = sentence.size(); i < size; ++i) {
  sentence.get(i).setTag(taggedWords.get(i).tag());
 }
}

public static List<CoreLabel> toCoreLabelList(String[] words,
                       String[] tags,
                       String[] answers) {
 assert tags.length == words.length;
 assert answers.length == words.length;
 List<CoreLabel> tokens = new ArrayList<>(words.length);
 for (int i = 0, sz = words.length; i < sz; i++) {
  CoreLabel cl = new CoreLabel();
  cl.setWord(words[i]);
  cl.setTag(tags[i]);
  cl.set(CoreAnnotations.AnswerAnnotation.class, answers[i]);
  tokens.add(cl);
 }
 return tokens;
}

public static List<CoreLabel> toCoreLabelListWithCharacterOffsets(String[] words, String[] tags) {
 assert tags.length == words.length;
 List<CoreLabel> tokens = new ArrayList<>(words.length);
 int offset = 0;
 for (int i = 0, sz = words.length; i < sz; i++) {
  CoreLabel cl = new CoreLabel();
  cl.setWord(words[i]);
  cl.setTag(tags[i]);
  cl.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, offset);
  offset += words[i].length();
  cl.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, offset);
  offset++; // assume one space between words :-)
  tokens.add(cl);
 }
 return tokens;
}

@Override
public void doOneFailedSentence(Annotation annotation, CoreMap sentence) {
 final List<CoreLabel> words = sentence.get(CoreAnnotations.TokensAnnotation.class);
 Tree tree = ParserUtils.xTree(words);
 for (CoreLabel word : words) {
  if (word.tag() == null) {
   word.setTag("XX");
  }
 }
 List<Tree> trees = Generics.newArrayList(1);
 trees.add(tree);
 finishSentence(sentence, trees);
}

/**
 * Set the tags of the original tokens and the leaves if they
 * aren't already set.
 */
private static void setMissingTags(CoreMap sentence, Tree tree) {
 List<TaggedWord> taggedWords = null;
 List<Label> leaves = null;
 List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
 for (int i = 0, size = tokens.size(); i < size; ++i) {
  CoreLabel token = tokens.get(i);
  if (token.tag() == null) {
   if (taggedWords == null) {
    taggedWords = tree.taggedYield();
   }
   if (leaves == null) {
    leaves = tree.yield();
   }
   token.setTag(taggedWords.get(i).tag());
   Label leaf = leaves.get(i);
   if (leaf instanceof HasTag) {
    ((HasTag) leaf).setTag(taggedWords.get(i).tag());
   }
  }
 }
}

private static void replacePOSTags(Tree tree) {
 List<Label> yield = tree.yield();
 List<Label> preYield = tree.preTerminalYield();
 assert yield.size() == preYield.size();
 MorphoFeatureSpecification spec = new FrenchMorphoFeatureSpecification();
 for(int i = 0; i < yield.size(); i++) {
  // Morphological Analysis
  String morphStr = ((CoreLabel) yield.get(i)).originalText();
  if (morphStr == null || morphStr.equals("")) {
   morphStr = preYield.get(i).value();
   // POS subcategory
   String subCat = ((CoreLabel) yield.get(i)).category();
   if (subCat != null && subCat != "") {
    morphStr += "-" + subCat + "--";
   } else {
    morphStr += "---";
   }
  }
  MorphoFeatures feats = spec.strToFeatures(morphStr);
  if(feats.getAltTag() != null && !feats.getAltTag().equals("")) {
   CoreLabel cl = (CoreLabel) preYield.get(i);
   cl.setValue(feats.getAltTag());
   cl.setTag(feats.getAltTag());
  }
 }
}

private <TOKEN extends CoreLabel> void annotateTokens(List<TOKEN> tokens) {
 // Make a copy of the tokens before annotating because QuantifiableEntityNormalizer may change the POS too
 List<CoreLabel> words = new ArrayList<>();
 for (CoreLabel token : tokens) {
  CoreLabel word = new CoreLabel();
  word.setWord(token.word());
  word.setNER(token.ner());
  word.setTag(token.tag());
  // copy fields potentially set by SUTime
  NumberSequenceClassifier.transferAnnotations(token, word);
  words.add(word);
 }
 doOneSentence(words);
 // TODO: If collapsed is set, tokens for entities are collapsed into one node then
 // (words.size() != tokens.size() and the logic below just don't work!!!
 for (int i = 0; i < words.size(); i++) {
  String ner = words.get(i).ner();
  tokens.get(i).setNER(ner);
  tokens.get(i).set(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class,
      words.get(i).get(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class));
 }
}

/**
 * Create a sentence as a List of {@code CoreLabel} objects from
 * a List of other label objects.
 *
 * @param words The words to make it from
 * @return The Sentence
 */
public static List<CoreLabel> toCoreLabelList(List<? extends HasWord> words) {
 List<CoreLabel> sent = new ArrayList<>(words.size());
 for (HasWord word : words) {
  CoreLabel cl = new CoreLabel();
  if (word instanceof Label) {
   cl.setValue(((Label) word).value());
  }
  cl.setWord(word.word());
  if (word instanceof HasTag) {
   cl.setTag(((HasTag) word).tag());
  }
  if (word instanceof HasLemma) {
   cl.setLemma(((HasLemma) word).lemma());
  }
  sent.add(cl);
 }
 return sent;
}

   "with part-of-speech tag annotations");
label.setTag(((HasTag) wd).tag());

if(feats.getAltTag() != null && !feats.getAltTag().equals("")) {
 label.setValue(feats.getAltTag());
 label.setTag(feats.getAltTag());

/**
 * Create a mock node, to be added to the dependency tree but which is not part of the original sentence.
 *
 * @param toCopy The CoreLabel to copy from initially.
 * @param word   The new word to add.
 * @param POS    The new part of speech to add.
 *
 * @return A CoreLabel copying most fields from toCopy, but with a new word and POS tag (as well as a new index).
 */
@SuppressWarnings("UnusedDeclaration")
private CoreLabel mockNode(CoreLabel toCopy, String word, String POS) {
 CoreLabel mock = new CoreLabel(toCopy);
 mock.setWord(word);
 mock.setLemma(word);
 mock.setValue(word);
 mock.setNER("O");
 mock.setTag(POS);
 mock.setIndex(sentenceLength + 5);
 return mock;
}

/**
 * Only works on English, as it is hard coded for using the
 * Morphology class, which is English-only
 */
public List<CoreLabel> lemmatize(List<? extends HasWord> tokens) {
 List<TaggedWord> tagged;
 if (getOp().testOptions.preTag) {
  Function<List<? extends HasWord>, List<TaggedWord>> tagger = loadTagger();
  tagged = tagger.apply(tokens);
 } else {
  Tree tree = parse(tokens);
  tagged = tree.taggedYield();
 }
 Morphology morpha = new Morphology();
 List<CoreLabel> lemmas = Generics.newArrayList();
 for (TaggedWord token : tagged) {
  CoreLabel label = new CoreLabel();
  label.setWord(token.word());
  label.setTag(token.tag());
  morpha.stem(label);
  lemmas.add(label);
 }
 return lemmas;
}

/**
 * Create a new {@code TreeGraphNode} having the same tree
 * structure and label values as an existing tree (but no shared
 * storage).  Operates recursively to construct an entire
 * subtree.
 *
 * @param t      the tree to copy
 * @param parent the parent node
 */
protected TreeGraphNode(Tree t, TreeGraphNode parent) {
 this.parent = parent;
 Tree[] tKids = t.children();
 int numKids = tKids.length;
 children = new TreeGraphNode[numKids];
 for (int i = 0; i < numKids; i++) {
  children[i] = new TreeGraphNode(tKids[i], this);
  if (t.isPreTerminal()) { // add the tags to the leaves
   children[i].label.setTag(t.label().value());
  }
 }
 this.label = (CoreLabel) mlf.newLabel(t.label());
}

private int taggedLabeledYield(List<CoreLabel> ty, int termIdx) {
 if (isPreTerminal()) {
  // usually this will fill in all the usual keys for a token
  CoreLabel taggedWord = new CoreLabel(firstChild().label());
  // but in case this just came from reading a tree that just has a value for words
  if (taggedWord.word() == null) {
   taggedWord.setWord(firstChild().value());
  }
  final String tag = (value() == null) ? "" : value();
  // set value and tag to the tag
  taggedWord.setValue(tag);
  taggedWord.setTag(tag);
  taggedWord.setIndex(termIdx);
  ty.add(taggedWord);
  return termIdx + 1;
 } else {
  for (Tree kid : getChildrenAsList())
   termIdx = kid.taggedLabeledYield(ty, termIdx);
 }
 return termIdx;
}

 label.setTag(pos);
} else if (aggressiveNormalization && isMultiWordCandidate(t)) {

private CoreLabel getCoreLabel(int labelIndex) {
 if (originalCoreLabels[labelIndex] != null) {
  CoreLabel terminalLabel = originalCoreLabels[labelIndex];
  if (terminalLabel.value() == null && terminalLabel.word() != null) {
   terminalLabel.setValue(terminalLabel.word());
  }
  return terminalLabel;
 }
 String wordStr = wordIndex.get(words[labelIndex]);
 CoreLabel terminalLabel = new CoreLabel();
 terminalLabel.setValue(wordStr);
 terminalLabel.setWord(wordStr);
 terminalLabel.setBeginPosition(beginOffsets[labelIndex]);
 terminalLabel.setEndPosition(endOffsets[labelIndex]);
 if (originalTags[labelIndex] != null) {
  terminalLabel.setTag(originalTags[labelIndex].tag());
 }
 return terminalLabel;
}

/**
 * Copies only the fields required for numeric entity extraction into  the new CoreLabel.
 *
 * @param src Source CoreLabel to copy.
 */
private static CoreLabel copyCoreLabel(CoreLabel src, Integer startOffset, Integer endOffset) {
 CoreLabel dst = new CoreLabel();
 dst.setWord(src.word());
 dst.setTag(src.tag());
 if (src.containsKey(CoreAnnotations.OriginalTextAnnotation.class)) {
  dst.set(CoreAnnotations.OriginalTextAnnotation.class, src.get(CoreAnnotations.OriginalTextAnnotation.class));
 }
 if(startOffset == null){
  dst.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, src.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class));
 } else {
  dst.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, startOffset);
 }
 if(endOffset == null){
  dst.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, src.get(CoreAnnotations.CharacterOffsetEndAnnotation.class));
 } else {
  dst.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, endOffset);
 }
 transferAnnotations(src, dst);
 return dst;
}

@Override
public Label newLabel(Label oldLabel) {
 if (oldLabel instanceof CoreLabel) {
  return new CoreLabel((CoreLabel)oldLabel);
 } else {
  //Map the old interfaces to the correct key/value pairs
  //Don't need to worry about HasIndex, which doesn't appear in any legacy code
  CoreLabel label = new CoreLabel();
  if (oldLabel instanceof HasWord)
   label.setWord(((HasWord) oldLabel).word());
  if (oldLabel instanceof HasTag)
   label.setTag(((HasTag) oldLabel).tag());
  if (oldLabel instanceof HasOffset) {
   label.setBeginPosition(((HasOffset) oldLabel).beginPosition());
   label.setEndPosition(((HasOffset) oldLabel).endPosition());
  }
  if (oldLabel instanceof HasCategory)
   label.setCategory(((HasCategory) oldLabel).category());
  if (oldLabel instanceof HasIndex)
   label.setIndex(((HasIndex) oldLabel).index());
  label.setValue(oldLabel.value());
  return label;
 }
}

Popular methods of CoreLabel

get
set
word
beginPosition
endPosition
originalText
<init>
This constructor attempts to parse the String keys into Class keys. It's mainly useful for reading f
index
lemma
setWord
Set the word value for the label. Also, clears the lemma, since that may have changed if the word ch
getString
setIndex

Popular in Java

Reactive rest calls using spring rest template
addToBackStack (FragmentTransaction)
getExternalFilesDir (Context)
onRequestPermissionsResult (Fragment)
FileReader (java.io)
A specialized Reader that reads from a file in the file system. All read requests made by calling me
OutputStream (java.io)
A writable sink for bytes.Most clients will use output streams that write data to the file system (
System (java.lang)
Provides access to system-related information and resources including standard input and output. Ena
ReentrantLock (java.util.concurrent.locks)
A reentrant mutual exclusion Lock with the same basic behavior and semantics as the implicit monitor
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
JList (javax.swing)
Top Vim plugins

How to use setTagmethodin edu.stanford.nlp.ling.CoreLabel

Best Java code snippets using edu.stanford.nlp.ling.CoreLabel.setTag (Showing top 20 results out of 315)

How to use
setTag
method
in
edu.stanford.nlp.ling.CoreLabel