edu.stanford.nlp.ling.TaggedWord.tag java code examples

/**
 * Given a line, split it into tagged words and add each word to
 * the given tagWordMap
 */
void addTaggedWords(List<TaggedWord> line,
          Map<String, Set<String>> tagWordMap) {
 for (TaggedWord taggedWord : line) {
  String word = taggedWord.word();
  String tag = taggedWord.tag();
  if (closedTags == null || closedTags.contains(tag)) {
   if (!tagWordMap.containsKey(tag)) {
    tagWordMap.put(tag, new TreeSet<>());
   }
   tagWordMap.get(tag).add(word);
  }
 }
}

 public double score(Tree tree) {
  List<TaggedWord> yield = tree.taggedYield();
  int wrong = 0;
  int len = Math.min(yield.size(), tagged.size());
  for (int i = 0; i < len; ++i) {
   String yieldTag = op.langpack().basicCategory(yield.get(i).tag());
   if (!yieldTag.equals(tagged.get(i).tag())) {
    wrong++;
   }
  }
  return wrong * weight;
 }
}

protected List<IntTaggedWord> listToEvents(List<TaggedWord> taggedWords) {
 List<IntTaggedWord> itwList = new ArrayList<>();
 for (TaggedWord tw : taggedWords) {
  IntTaggedWord iTW = new IntTaggedWord(tw.word(), tw.tag(), wordIndex, tagIndex);
  itwList.add(iTW);
 }
 return itwList;
}

/**
 * Takes a sentence composed of CoreLabels and add the tags to the
 * CoreLabels, modifying the input sentence.  If reuseTags is set to
 * true, any tags supplied with the CoreLabels are taken as correct.
 */
public void tagCoreLabels(List<CoreLabel> sentence,
             boolean reuseTags) {
 List<TaggedWord> taggedWords = tagSentence(sentence, reuseTags);
 if (taggedWords.size() != sentence.size())
  throw new AssertionError("Tagged word list not the same length " +
               "as the original sentence");
 for (int i = 0, size = sentence.size(); i < size; ++i) {
  sentence.get(i).setTag(taggedWords.get(i).tag());
 }
}

public static void redoTags(Tree tree, Tagger tagger) {
 List<Word> words = tree.yieldWords();
 List<TaggedWord> tagged = tagger.apply(words);
 List<Label> tags = tree.preTerminalYield();
 if (tags.size() != tagged.size()) {
  throw new AssertionError("Tags are not the same size");
 }
 for (int i = 0; i < tags.size(); ++i) {
  tags.get(i).setValue(tagged.get(i).tag());
 }
}

private static List<TaggedWord> cleanTags(List<TaggedWord> twList, TreebankLanguagePack tlp) {
 int sz = twList.size();
 List<TaggedWord> l = new ArrayList<>(sz);
 for (TaggedWord tw : twList) {
  TaggedWord tw2 = new TaggedWord(tw.word(), tlp.basicCategory(tw.tag()));
  l.add(tw2);
 }
 return l;
}

private static WordTag toWordTag(TaggedWord tw) {
 return new WordTag(tw.word(), tw.tag());
}

private static void printPunct(Treebank treebank, TreebankLanguagePack tlp, PrintWriter pw) {
 if (tlp == null) {
  log.info("The -punct option requires you to specify -tlp");
 } else {
  Predicate<String> punctTagFilter = tlp.punctuationTagAcceptFilter();
  for (Tree t : treebank) {
   List<TaggedWord> tws = t.taggedYield();
   for (TaggedWord tw : tws) {
    if (punctTagFilter.test(tw.tag())) {
     pw.println(tw);
    }
   }
  }
 }
}

private int[] testOnTreebank(Collection<TaggedWord> testWords) {
 int[] totalAndCorrect = new int[2];
 totalAndCorrect[0] = 0;
 totalAndCorrect[1] = 0;
 for (TaggedWord word : testWords) {
  String goldTag = word.tag();
  String guessTag = ctlp.basicCategory(getTag(word.word()));
  totalAndCorrect[0]++;
  if (goldTag.equals(guessTag)) {
   totalAndCorrect[1]++;
  }
 }
 return totalAndCorrect;
}

private static List<WordTag> getNonStemmedWordTagsFromTree(Tree t) {
 List<WordTag> wordTags = Generics.newArrayList();
 ArrayList<TaggedWord> s = t.taggedYield();
 for (TaggedWord w : s) {
  WordTag wt = new WordTag(w.word(), w.tag());
  wordTags.add(wt);
 }
 return wordTags;
}

public void train(TaggedWord tw, double weight) {
 tokens = tokens + weight;
 String word = tw.word();
 String tag = tw.tag();
 // TaggedWord has crummy equality conditions
 Pair<String,String> wt = new Pair<>(word, tag);
 wtCount.incrementCount(wt, weight);
 tagCount.incrementCount(tag, weight);
 seenWords.add(word);
}

/**
 *
 * @param t a tree
 * @return the WordTags corresponding to the leaves of the tree,
 * stemmed according to their POS tags in the tree.
 */
private static List<WordTag> getStemmedWordTagsFromTree(Tree t) {
 List<WordTag> stemmedWordTags = Generics.newArrayList();
 ArrayList<TaggedWord> s = t.taggedYield();
 for (TaggedWord w : s) {
  WordTag wt = Morphology.stemStatic(w.word(), w.tag());
  stemmedWordTags.add(wt);
 }
 return stemmedWordTags;
}

/**
 * Add the given sentence to the statistics counted.  Can
 * be called multiple times with different sentences.
 */
@Override
public void train(List<TaggedWord> sentence, double weight) {
 featExtractor.train(sentence, weight);
 for (TaggedWord word : sentence) {
  datumCounter.incrementCount(word, weight);
  tagsForWord.add(word.word(), word.tag());
 }
}

@Override
public void train(List<TaggedWord> sentence) {
 lex.train(sentence, 1.0);
 String last = null;
 for (TaggedWord tagLabel : sentence) {
  String tag = tagLabel.tag();
  tagIndex.add(tag);
  if (last == null) {
   initial.incrementCount(tag);
  } else {
   ruleCounter.incrementCount2D(last, tag);
  }
  last = tag;
 }
}

private static void countTaggings(Treebank tb, final PrintWriter pw) {
 final TwoDimensionalCounter<String,String> wtc = new TwoDimensionalCounter<>();
 tb.apply(tree -> {
  List<TaggedWord> tags = tree.taggedYield();
  for (TaggedWord tag : tags)
   wtc.incrementCount(tag.word(), tag.tag());
 });
 for (String key : wtc.firstKeySet()) {
  pw.print(key);
  pw.print('\t');
  Counter<String> ctr = wtc.getCounter(key);
  for (String k2 : ctr.keySet()) {
   pw.print(k2 + '\t' + ctr.getCount(k2) + '\t');
  }
  pw.println();
 }
}

/** Turns a sentence into a flat phrasal tree.
 *  The structure is S -&gt; tag*.  And then each tag goes to a word.
 *  The tag is either found from the label or made "WD".
 *  The tag and phrasal node have a StringLabel.
 *
 *  @param s The Sentence to make the Tree from
 *  @param lf The LabelFactory with which to create the new Tree labels
 *  @return The one phrasal level Tree
 */
public static Tree toFlatTree(List<? extends HasWord> s, LabelFactory lf) {
 List<Tree> daughters = new ArrayList<>(s.size());
 for (HasWord word : s) {
  Tree wordNode = new LabeledScoredTreeNode(lf.newLabel(word.word()));
  if (word instanceof TaggedWord) {
   TaggedWord taggedWord = (TaggedWord) word;
   wordNode = new LabeledScoredTreeNode(new StringLabel(taggedWord.tag()), Collections.singletonList(wordNode));
  } else {
   wordNode = new LabeledScoredTreeNode(lf.newLabel("WD"), Collections.singletonList(wordNode));
  }
  daughters.add(wordNode);
 }
 return new LabeledScoredTreeNode(new StringLabel("S"), daughters);
}

@Override
public void train(TaggedWord tw, int loc, double weight) {
 uwModelTrainer.train(tw, loc, weight);
 IntTaggedWord iTW =
  new IntTaggedWord(tw.word(), tw.tag(), wordIndex, tagIndex);
 seenCounter.incrementCount(iTW, weight);
 IntTaggedWord iT = new IntTaggedWord(nullWord, iTW.tag);
 seenCounter.incrementCount(iT, weight);
 IntTaggedWord iW = new IntTaggedWord(iTW.word, nullTag);
 seenCounter.incrementCount(iW, weight);
 IntTaggedWord i = new IntTaggedWord(nullWord, nullTag);
 seenCounter.incrementCount(i, weight);
 // rules.add(iTW);
 tags.add(iT);
 words.add(iW);
 String tag = tw.tag();
 String baseTag = op.langpack().basicCategory(tag);
 Counter<String> counts = baseTagCounts.get(baseTag);
 if (counts == null) {
  counts = new ClassicCounter<>();
  baseTagCounts.put(baseTag, counts);
 }
 counts.incrementCount(tag, weight);
}

/**
 * Only works on English, as it is hard coded for using the
 * Morphology class, which is English-only
 */
public List<CoreLabel> lemmatize(List<? extends HasWord> tokens) {
 List<TaggedWord> tagged;
 if (getOp().testOptions.preTag) {
  Function<List<? extends HasWord>, List<TaggedWord>> tagger = loadTagger();
  tagged = tagger.apply(tokens);
 } else {
  Tree tree = parse(tokens);
  tagged = tree.taggedYield();
 }
 Morphology morpha = new Morphology();
 List<CoreLabel> lemmas = Generics.newArrayList();
 for (TaggedWord token : tagged) {
  CoreLabel label = new CoreLabel();
  label.setWord(token.word());
  label.setTag(token.tag());
  morpha.stem(label);
  lemmas.add(label);
 }
 return lemmas;
}

@Override
public final void trainUnannotated(List<TaggedWord> sentence,
                  double weight) {
 uwModelTrainer.incrementTreesRead(weight);
 int loc = 0;
 for (TaggedWord tw : sentence) {
  String baseTag = op.langpack().basicCategory(tw.tag());
  Counter<String> counts = baseTagCounts.get(baseTag);
  if (counts == null) {
   ++loc;
   continue;
  }
  double totalCount = counts.totalCount();
  if (totalCount == 0) {
   ++loc;
   continue;
  }
  for (String tag : counts.keySet()) {
   TaggedWord newTW = new TaggedWord(tw.word(), tag);
   train(newTW, loc, weight * counts.getCount(tag) / totalCount);
  }
  ++loc;
 }
}

/**
 * Trains this lexicon on the Collection of trees.
 */
public void train(TaggedWord tw, int loc, double weight) {
 IntTaggedWord iTW =
  new IntTaggedWord(tw.word(), tw.tag(), wordIndex, tagIndex);
 IntTaggedWord iT = new IntTaggedWord(nullWord, iTW.tag);
 IntTaggedWord iW = new IntTaggedWord(iTW.word, nullTag);
 seenCounter.incrementCount(iW, weight);
 IntTaggedWord i = NULL_ITW;
 if (treesRead > indexToStartUnkCounting) {
  // start doing this once some way through trees;
  // treesRead is 1 based counting
  if (seenCounter.getCount(iW) < 2) {
   // it's an entirely unknown word
   int s = model.getSignatureIndex(iTW.word, loc,
                   wordIndex.get(iTW.word));
   IntTaggedWord iTS = new IntTaggedWord(s, iTW.tag);
   IntTaggedWord iS = new IntTaggedWord(s, nullTag);
   unSeenCounter.incrementCount(iTS, weight);
   unSeenCounter.incrementCount(iT, weight);
   unSeenCounter.incrementCount(iS, weight);
   unSeenCounter.incrementCount(i, weight);
  }
 }
}

Popular methods of TaggedWord

Popular in Java

Start an intent from android
scheduleAtFixedRate (ScheduledExecutorService)
setContentView (Activity)
getSharedPreferences (Context)
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
Component (java.awt)
A component is an object having a graphical representation that can be displayed on the screen and t
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
Runner (org.openjdk.jmh.runner)
Top plugins for Android Studio

How to use tagmethodin edu.stanford.nlp.ling.TaggedWord

Best Java code snippets using edu.stanford.nlp.ling.TaggedWord.tag (Showing top 20 results out of 315)

How to use
tag
method
in
edu.stanford.nlp.ling.TaggedWord