edu.stanford.nlp.ling.TaggedWord.word java code examples

public String toString(String divider) {
 return word() + divider + tag;
}

@Override
public void train(List<TaggedWord> sentence) {
 for (TaggedWord word : sentence) {
  if (word.word().length() <= maxLength) {
   words.add(word.word());
  }
 }
}

@Override
public void train(List<TaggedWord> sentence) {
 for (TaggedWord word : sentence) {
  if (word.word().length() <= maxLength) {
   addStringToLexicon(word.word());
  }
 }
}

private static String mergeLeavesIntoCollocatedString(Tree t) {
 StringBuilder sb = new StringBuilder(160);
 ArrayList<TaggedWord> sent = t.taggedYield();
 for (TaggedWord aSent : sent) {
  sb.append(aSent.word()).append('_');
 }
 return sb.substring(0,sb.length() -1);
}

public void train(List<TaggedWord> sentence, double weight) {
 for (TaggedWord word : sentence) {
  String wordString = word.word();
  wordCounter.incrementCount(wordString, weight);
 }
}

private static String mergeLeavesIntoCollocatedString(Tree[] trees) {
 StringBuilder sb = new StringBuilder(160);
 for (Tree t: trees) {
  ArrayList<TaggedWord> sent = t.taggedYield();
  for (TaggedWord aSent : sent) {
   sb.append(aSent.word()).append('_');
  }
 }
 return sb.substring(0,sb.length() -1);
}

/**
 * Given a line, split it into tagged words and add each word to
 * the given tagWordMap
 */
void addTaggedWords(List<TaggedWord> line,
          Map<String, Set<String>> tagWordMap) {
 for (TaggedWord taggedWord : line) {
  String word = taggedWord.word();
  String tag = taggedWord.tag();
  if (closedTags == null || closedTags.contains(tag)) {
   if (!tagWordMap.containsKey(tag)) {
    tagWordMap.put(tag, new TreeSet<>());
   }
   tagWordMap.get(tag).add(word);
  }
 }
}

protected List<IntTaggedWord> listToEvents(List<TaggedWord> taggedWords) {
 List<IntTaggedWord> itwList = new ArrayList<>();
 for (TaggedWord tw : taggedWords) {
  IntTaggedWord iTW = new IntTaggedWord(tw.word(), tw.tag(), wordIndex, tagIndex);
  itwList.add(iTW);
 }
 return itwList;
}

private static List<TaggedWord> cleanTags(List<TaggedWord> twList, TreebankLanguagePack tlp) {
 int sz = twList.size();
 List<TaggedWord> l = new ArrayList<>(sz);
 for (TaggedWord tw : twList) {
  TaggedWord tw2 = new TaggedWord(tw.word(), tlp.basicCategory(tw.tag()));
  l.add(tw2);
 }
 return l;
}

private static WordTag toWordTag(TaggedWord tw) {
 return new WordTag(tw.word(), tw.tag());
}

private int[] testOnTreebank(Collection<TaggedWord> testWords) {
 int[] totalAndCorrect = new int[2];
 totalAndCorrect[0] = 0;
 totalAndCorrect[1] = 0;
 for (TaggedWord word : testWords) {
  String goldTag = word.tag();
  String guessTag = ctlp.basicCategory(getTag(word.word()));
  totalAndCorrect[0]++;
  if (goldTag.equals(guessTag)) {
   totalAndCorrect[1]++;
  }
 }
 return totalAndCorrect;
}

private static List<WordTag> getNonStemmedWordTagsFromTree(Tree t) {
 List<WordTag> wordTags = Generics.newArrayList();
 ArrayList<TaggedWord> s = t.taggedYield();
 for (TaggedWord w : s) {
  WordTag wt = new WordTag(w.word(), w.tag());
  wordTags.add(wt);
 }
 return wordTags;
}

public void train(TaggedWord tw, double weight) {
 tokens = tokens + weight;
 String word = tw.word();
 String tag = tw.tag();
 // TaggedWord has crummy equality conditions
 Pair<String,String> wt = new Pair<>(word, tag);
 wtCount.incrementCount(wt, weight);
 tagCount.incrementCount(tag, weight);
 seenWords.add(word);
}

/**
 *
 * @param t a tree
 * @return the WordTags corresponding to the leaves of the tree,
 * stemmed according to their POS tags in the tree.
 */
private static List<WordTag> getStemmedWordTagsFromTree(Tree t) {
 List<WordTag> stemmedWordTags = Generics.newArrayList();
 ArrayList<TaggedWord> s = t.taggedYield();
 for (TaggedWord w : s) {
  WordTag wt = Morphology.stemStatic(w.word(), w.tag());
  stemmedWordTags.add(wt);
 }
 return stemmedWordTags;
}

/**
 * Add the given sentence to the statistics counted.  Can
 * be called multiple times with different sentences.
 */
@Override
public void train(List<TaggedWord> sentence, double weight) {
 featExtractor.train(sentence, weight);
 for (TaggedWord word : sentence) {
  datumCounter.incrementCount(word, weight);
  tagsForWord.add(word.word(), word.tag());
 }
}

private static void countTaggings(Treebank tb, final PrintWriter pw) {
 final TwoDimensionalCounter<String,String> wtc = new TwoDimensionalCounter<>();
 tb.apply(tree -> {
  List<TaggedWord> tags = tree.taggedYield();
  for (TaggedWord tag : tags)
   wtc.incrementCount(tag.word(), tag.tag());
 });
 for (String key : wtc.firstKeySet()) {
  pw.print(key);
  pw.print('\t');
  Counter<String> ctr = wtc.getCounter(key);
  for (String k2 : ctr.keySet()) {
   pw.print(k2 + '\t' + ctr.getCount(k2) + '\t');
  }
  pw.println();
 }
}

@Override
public void train(TaggedWord tw, int loc, double weight) {
 uwModelTrainer.train(tw, loc, weight);
 IntTaggedWord iTW =
  new IntTaggedWord(tw.word(), tw.tag(), wordIndex, tagIndex);
 seenCounter.incrementCount(iTW, weight);
 IntTaggedWord iT = new IntTaggedWord(nullWord, iTW.tag);
 seenCounter.incrementCount(iT, weight);
 IntTaggedWord iW = new IntTaggedWord(iTW.word, nullTag);
 seenCounter.incrementCount(iW, weight);
 IntTaggedWord i = new IntTaggedWord(nullWord, nullTag);
 seenCounter.incrementCount(i, weight);
 // rules.add(iTW);
 tags.add(iT);
 words.add(iW);
 String tag = tw.tag();
 String baseTag = op.langpack().basicCategory(tag);
 Counter<String> counts = baseTagCounts.get(baseTag);
 if (counts == null) {
  counts = new ClassicCounter<>();
  baseTagCounts.put(baseTag, counts);
 }
 counts.incrementCount(tag, weight);
}

/**
 * Only works on English, as it is hard coded for using the
 * Morphology class, which is English-only
 */
public List<CoreLabel> lemmatize(List<? extends HasWord> tokens) {
 List<TaggedWord> tagged;
 if (getOp().testOptions.preTag) {
  Function<List<? extends HasWord>, List<TaggedWord>> tagger = loadTagger();
  tagged = tagger.apply(tokens);
 } else {
  Tree tree = parse(tokens);
  tagged = tree.taggedYield();
 }
 Morphology morpha = new Morphology();
 List<CoreLabel> lemmas = Generics.newArrayList();
 for (TaggedWord token : tagged) {
  CoreLabel label = new CoreLabel();
  label.setWord(token.word());
  label.setTag(token.tag());
  morpha.stem(label);
  lemmas.add(label);
 }
 return lemmas;
}

@Override
public final void trainUnannotated(List<TaggedWord> sentence,
                  double weight) {
 uwModelTrainer.incrementTreesRead(weight);
 int loc = 0;
 for (TaggedWord tw : sentence) {
  String baseTag = op.langpack().basicCategory(tw.tag());
  Counter<String> counts = baseTagCounts.get(baseTag);
  if (counts == null) {
   ++loc;
   continue;
  }
  double totalCount = counts.totalCount();
  if (totalCount == 0) {
   ++loc;
   continue;
  }
  for (String tag : counts.keySet()) {
   TaggedWord newTW = new TaggedWord(tw.word(), tag);
   train(newTW, loc, weight * counts.getCount(tag) / totalCount);
  }
  ++loc;
 }
}

/**
 * Trains this lexicon on the Collection of trees.
 */
public void train(TaggedWord tw, int loc, double weight) {
 IntTaggedWord iTW =
  new IntTaggedWord(tw.word(), tw.tag(), wordIndex, tagIndex);
 IntTaggedWord iT = new IntTaggedWord(nullWord, iTW.tag);
 IntTaggedWord iW = new IntTaggedWord(iTW.word, nullTag);
 seenCounter.incrementCount(iW, weight);
 IntTaggedWord i = NULL_ITW;
 if (treesRead > indexToStartUnkCounting) {
  // start doing this once some way through trees;
  // treesRead is 1 based counting
  if (seenCounter.getCount(iW) < 2) {
   // it's an entirely unknown word
   int s = model.getSignatureIndex(iTW.word, loc,
                   wordIndex.get(iTW.word));
   IntTaggedWord iTS = new IntTaggedWord(s, iTW.tag);
   IntTaggedWord iS = new IntTaggedWord(s, nullTag);
   unSeenCounter.incrementCount(iTS, weight);
   unSeenCounter.incrementCount(iT, weight);
   unSeenCounter.incrementCount(iS, weight);
   unSeenCounter.incrementCount(i, weight);
  }
 }
}

Popular methods of TaggedWord

Popular in Java

Start an intent from android
scheduleAtFixedRate (ScheduledExecutorService)
setContentView (Activity)
getSharedPreferences (Context)
HashMap (java.util)
HashMap is an implementation of Map. All optional operations are supported.All elements are permitte
LinkedHashMap (java.util)
LinkedHashMap is an implementation of Map that guarantees iteration order. All optional operations a
ConcurrentHashMap (java.util.concurrent)
A plug-in replacement for JDK1.5 java.util.concurrent.ConcurrentHashMap. This version is based on or
Component (java.awt)
A component is an object having a graphical representation that can be displayed on the screen and t
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
Runner (org.openjdk.jmh.runner)
Top PhpStorm plugins

How to use wordmethodin edu.stanford.nlp.ling.TaggedWord

Best Java code snippets using edu.stanford.nlp.ling.TaggedWord.word (Showing top 20 results out of 315)

How to use
word
method
in
edu.stanford.nlp.ling.TaggedWord