edu.stanford.nlp.ling.TaggedWord java code examples

Refine search

/**
 * Only works on English, as it is hard coded for using the
 * Morphology class, which is English-only
 */
public List<CoreLabel> lemmatize(List<? extends HasWord> tokens) {
 List<TaggedWord> tagged;
 if (getOp().testOptions.preTag) {
  Function<List<? extends HasWord>, List<TaggedWord>> tagger = loadTagger();
  tagged = tagger.apply(tokens);
 } else {
  Tree tree = parse(tokens);
  tagged = tree.taggedYield();
 }
 Morphology morpha = new Morphology();
 List<CoreLabel> lemmas = Generics.newArrayList();
 for (TaggedWord token : tagged) {
  CoreLabel label = new CoreLabel();
  label.setWord(token.word());
  label.setTag(token.tag());
  morpha.stem(label);
  lemmas.add(label);
 }
 return lemmas;
}

private static List<TaggedWord> cleanTags(List<TaggedWord> twList, TreebankLanguagePack tlp) {
 int sz = twList.size();
 List<TaggedWord> l = new ArrayList<>(sz);
 for (TaggedWord tw : twList) {
  TaggedWord tw2 = new TaggedWord(tw.word(), tlp.basicCategory(tw.tag()));
  l.add(tw2);
 }
 return l;
}

public void setFromString(String taggedWord, String divider) {  
 int where = taggedWord.lastIndexOf(divider);
 if (where >= 0) {
  setWord(taggedWord.substring(0, where));
  setTag(taggedWord.substring(where + 1));
 } else {
  setWord(taggedWord);
  setTag(null);
 }
}

private static WordTag toWordTag(TaggedWord tw) {
 return new WordTag(tw.word(), tw.tag());
}

outputTree = outputTree.prune(new BobChrisTreeNormalizer.EmptyFilter());
 ArrayList<Label> sentUnstemmed = outputTree.yield();
 pw.println("  <words>");
 int i = 1;
 String sent = SentenceUtils.listToString(outputTree.yield(), false);
 if(ptb2text) {
  pw.println(PTBTokenizer.ptb2Text(sent));
 int i = 1;
 for (TaggedWord tw : sent) {
  pw.println("    <word ind=\"" + i + "\" pos=\"" + XMLUtils.escapeXML(tw.tag()) + "\">" + XMLUtils.escapeXML(tw.word()) + "</word>");
  i++;
 Tree indexedTree = outputTree.deepCopy(outputTree.treeFactory(),
                      CoreLabel.factory());
 indexedTree.indexLeaves();
 Set<Dependency<Label, Label, Object>> depsSet = indexedTree.mapDependencies(dependencyWordFilter, hf);
 Tree it = outputTree.deepCopy(outputTree.treeFactory(), CoreLabel.factory());
 it.indexLeaves();
  String tag = PTBTokenizer.ptbToken2Text(w.tag());
  String word = PTBTokenizer.ptbToken2Text(w.word());

private Tree outsideBinarizeLocalTree(Tree t, String labelStr, String finalCat, int headNum, TaggedWord head, int leftProcessed, String leftStr, int rightProcessed, String rightStr) {
 List<Tree> newChildren = new ArrayList<>(2);
 Label label = new CategoryWordTag(labelStr, head.word(), head.tag());
 if (t.numChildren() - leftProcessed - rightProcessed <= 2) {
  newChildren.add(t.getChild(leftProcessed));
  if (t.numChildren() - leftProcessed - rightProcessed == 2) {
   newChildren.add(t.getChild(leftProcessed + 1));

/**
 * Set the tags of the original tokens and the leaves if they
 * aren't already set.
 */
private static void setMissingTags(CoreMap sentence, Tree tree) {
 List<TaggedWord> taggedWords = null;
 List<Label> leaves = null;
 List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
 for (int i = 0, size = tokens.size(); i < size; ++i) {
  CoreLabel token = tokens.get(i);
  if (token.tag() == null) {
   if (taggedWords == null) {
    taggedWords = tree.taggedYield();
   }
   if (leaves == null) {
    leaves = tree.yield();
   }
   token.setTag(taggedWords.get(i).tag());
   Label leaf = leaves.get(i);
   if (leaf instanceof HasTag) {
    ((HasTag) leaf).setTag(taggedWords.get(i).tag());
   }
  }
 }
}

private CoreMap doOneSentence(CoreMap sentence) {
 List<CoreLabel> tokens = sentence.get(CoreAnnotations.TokensAnnotation.class);
 List<TaggedWord> tagged = null;
 if (tokens.size() <= maxSentenceLength) {
  try {
   tagged = pos.tagSentence(tokens, this.reuseTags);
  } catch (OutOfMemoryError e) {
   log.error(e); // Beware that we can now get an OOM in logging, too.
   log.warn("Tagging of sentence ran out of memory. " +
             "Will ignore and continue: " +
             SentenceUtils.listToString(tokens));
  }
 }
 if (tagged != null) {
  for (int i = 0, sz = tokens.size(); i < sz; i++) {
   tokens.get(i).set(CoreAnnotations.PartOfSpeechAnnotation.class, tagged.get(i).tag());
  }
 } else {
  for (CoreLabel token : tokens) {
   token.set(CoreAnnotations.PartOfSpeechAnnotation.class, "X");
  }
 }
 return sentence;
}

private static List<WordTag> getNonStemmedWordTagsFromTree(Tree t) {
 List<WordTag> wordTags = Generics.newArrayList();
 ArrayList<TaggedWord> s = t.taggedYield();
 for (TaggedWord w : s) {
  WordTag wt = new WordTag(w.word(), w.tag());
  wordTags.add(wt);
 }
 return wordTags;
}

/**
 * Takes a sentence composed of CoreLabels and add the tags to the
 * CoreLabels, modifying the input sentence.  If reuseTags is set to
 * true, any tags supplied with the CoreLabels are taken as correct.
 */
public void tagCoreLabels(List<CoreLabel> sentence,
             boolean reuseTags) {
 List<TaggedWord> taggedWords = tagSentence(sentence, reuseTags);
 if (taggedWords.size() != sentence.size())
  throw new AssertionError("Tagged word list not the same length " +
               "as the original sentence");
 for (int i = 0, size = sentence.size(); i < size; ++i) {
  sentence.get(i).setTag(taggedWords.get(i).tag());
 }
}

String cat = t.label().value();
if (t.isLeaf()) {
 Label label = new Word(cat);//new CategoryWordTag(cat,cat,"");
 return tf.newLeaf(label);
if (t.isPreTerminal()) {
 Tree childResult = transformTree(t.getChild(0));
 String word = childResult.value();  // would be nicer if Word/CWT ??
 TaggedWord head = new TaggedWord(word, tag);
 result = binarizeLocalTree(result, headNum, head);

   TaggedWord tw = new TaggedWord(word.word(), ((HasTag) word).tag());
   sentenceB.add(tw);
  } else {
 CoreLabel boundary = new CoreLabel();
 boundary.setWord(Lexicon.BOUNDARY);
 boundary.setValue(Lexicon.BOUNDARY);
 boundary.setTag(Lexicon.BOUNDARY_TAG);
 boundary.setIndex(sentence.size()+1);//1-based indexing used in the parser
 sentenceB.add(boundary);
} else {
 sentenceB.add(new TaggedWord(Lexicon.BOUNDARY, Lexicon.BOUNDARY_TAG));

private int initialBetasAndLexicon(Tree tree, int position, double weight) {
 if (tree.isLeaf()) {
 if (tree.isPreTerminal()) {
  String tag = tree.label().value();
  String word = tree.children()[0].label().value();
  TaggedWord tw = new TaggedWord(word, state(tag, 0));
  lex.train(tw, position, weight);
  return (position + 1);

/**
 * Gets the tagged yield of the tree -- that is, get the preterminals
 * as well as the terminals.  The {@code Label} of all leaf nodes
 * is returned
 * as a list ordered by the natural left to right order of the
 * leaves.  Null values, if any, are inserted into the list like any
 * other value.  This has been rewritten to thread, so only one List
 * is used.
 * <p/>
 * <i>Implementation note:</i> when we summon up enough courage, this
 * method will be changed to take and return a {@code List<W extends TaggedWord>}.
 *
 * @param ty The list in which the tagged yield of the tree will be
 *           placed. Normally, this will be empty when the routine is called,
 *           but if not, the new yield is added to the end of the list.
 * @return a {@code List} of the data in the tree's leaves.
 */
public <X extends List<TaggedWord>> X taggedYield(X ty) {
 if (isPreTerminal()) {
  ty.add(new TaggedWord(firstChild().label(), label()));
 } else {
  for (Tree kid : children()) {
   kid.taggedYield(ty);
  }
 }
 return ty;
}

Tree binarizeLocalTree(Tree t, int headNum, TaggedWord head) {
 //System.out.println("Working on: "+headNum+" -- "+t.label());
 if (markovFactor) {
  String topCat = t.label().value();
  Label newLabel = new CategoryWordTag(topCat, head.word(), head.tag());
  t.setLabel(newLabel);
  Tree t2;
  if (insideFactor) {
   t2 = markovInsideBinarizeLocalTreeNew(t, headNum, 0, t.numChildren() - 1, true);
   //          t2 = markovInsideBinarizeLocalTree(t, head, headNum, topCat, false);
  } else {
   t2 = markovOutsideBinarizeLocalTree(t, head, headNum, topCat, new LinkedList<>(), false);
  }
  if (DEBUG) {
   CategoryWordTag.printWordTag = false;
   StringBuilder sb1 = new StringBuilder();
   localTreeString(t, sb1, 0);
   StringBuilder sb2 = new StringBuilder();
   localTreeString(t2, sb2, 0);
   System.out.println("Old Local Tree: " + sb1);
   System.out.println("New Local Tree: " + sb2);
   CategoryWordTag.printWordTag = true;
  }
  return t2;
 }
 if (insideFactor) {
  return insideBinarizeLocalTree(t, headNum, head, 0, 0);
 }
 return outsideBinarizeLocalTree(t, t.label().value(), t.label().value(), headNum, head, 0, "", 0, "");
}

@Override
public Tree transformTree(Tree t) {
 Morphology morphology = new Morphology();
 List<TaggedWord> tagged = null;
 int index = 0;
 for (Tree leaf : t.getLeaves()) {
  Label label = leaf.label();
  if (label == null) {
   continue;
  }
  String tag;
  if (!(label instanceof HasTag) || ((HasTag) label).tag() == null) {
   if (tagged == null) {
    tagged = t.taggedYield();
   }
   tag = tagged.get(index).tag();
  } else {
   tag = ((HasTag) label).tag();
  }
  if (!(label instanceof HasLemma)) {
   throw new IllegalArgumentException("Got a tree with labels which do not support lemma");
  }
  ((HasLemma) label).setLemma(morphology.lemma(label.value(), tag, true));
  ++index;
 }
 return t;
}

public void tagPOS(List<CoreLabel> tokens, Tree tree) {
  try {
    List<TaggedWord> posList = tree.getChild(0).taggedYield();
    for (int i = 0; i < tokens.size(); i++) {
      String pos = posList.get(i).tag();
      tokens.get(i).setTag(pos);
    }
  } catch (Exception e) {
    tagPOS(tokens); // At least gives you something.
    LOG.warn("POS Failed:\n" + tree.pennString());
  }
}

List<TaggedWord> sentence3 = new ArrayList<>();
for (int i = 0; i < sent3.length; i++) {
 sentence3.add(new TaggedWord(sent3[i], tag3[i]));
parse.pennPrint();
parse.pennPrint();
System.out.println();
GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
for (Label lab : parse.yield()) {
 if (lab instanceof CoreLabel) {
  System.out.println(((CoreLabel) lab).toString(CoreLabel.OutputFormat.VALUE_MAP));
 } else {
  System.out.println(lab);

public static void redoTags(Tree tree, Tagger tagger) {
 List<Word> words = tree.yieldWords();
 List<TaggedWord> tagged = tagger.apply(words);
 List<Label> tags = tree.preTerminalYield();
 if (tags.size() != tagged.size()) {
  throw new AssertionError("Tags are not the same size");
 }
 for (int i = 0; i < tags.size(); ++i) {
  tags.get(i).setValue(tagged.get(i).tag());
 }
}

Set<Constituent> goldConstituents = goldTree.constituents(LabeledConstituent.factory());
Set<Constituent> guessConstituents = guessTree.constituents(LabeledConstituent.factory());
List<TaggedWord> goldWords = goldTree.taggedYield();
List<TaggedWord> guessWords = guessTree.taggedYield();
int len = Math.min(goldWords.size(), guessWords.size());
for (int i = 0; i < len; ++i) {
 String goldTag = tlp.basicCategory(goldWords.get(i).tag());
 String guessTag = tlp.basicCategory(guessWords.get(i).tag());
 if (!goldTag.equals(guessTag)) {

Javadoc

A TaggedWord object contains a word and its tag. The value() of a TaggedWord is the Word. The tag is secondary.

Most used methods

Popular in Java

Start an intent from android
findViewById (Activity)
getApplicationContext (Context)
orElseThrow (Optional)
Return the contained value, if present, otherwise throw an exception to be created by the provided s
IOException (java.io)
Signals a general, I/O-related error. Error details may be specified when calling the constructor, a
System (java.lang)
Provides access to system-related information and resources including standard input and output. Ena
Timer (java.util)
Timers schedule one-shot or recurring TimerTask for execution. Prefer java.util.concurrent.Scheduled
IOUtils (org.apache.commons.io)
General IO stream manipulation utilities. This class provides static utility methods for input/outpu
LoggerFactory (org.slf4j)
The LoggerFactory is a utility class producing Loggers for various logging APIs, most notably for lo
ImageIO (javax.imageio)
Top plugins for WebStorm

How to useTaggedWord in edu.stanford.nlp.ling

Best Java code snippets using edu.stanford.nlp.ling.TaggedWord (Showing top 20 results out of 315)

Refine search

How to use
TaggedWord
in
edu.stanford.nlp.ling