edu.stanford.nlp.ling.Word.word java code examples

/**
 * Returns a "pretty" version of the words in this Document suitable for
 * display. The default implementation returns each of the words in
 * this Document separated
 * by spaces. Specifically, each element that implements {@link HasWord}
 * has its
 * {@link HasWord#word} printed, and other elements are skipped.
 *
 * Subclasses that maintain additional information may which to
 * override this method.
 */
public String presentableText() {
 StringBuilder sb = new StringBuilder();
 for (Word cur : this) {
  if (sb.length() > 0) {
   sb.append(' ');
  }
  sb.append(cur.word());
 }
 return (sb.toString());
}

public static String tokensToString(Word [] tokens) {
 StringBuilder  sb = new StringBuilder(512);
 for(int i = 0; i < tokens.length; i ++){
  if(i > 0) sb.append(" ");
  Word l = tokens[i];
  sb.append(l.word() + "{" + l.beginPosition() + ", " + l.endPosition() + "}");
 }
 return sb.toString();
}

/**
 * Stems <code>w</code> and returns stemmed <code>Word</code>.
 */
public Word stem(Word w) {
 return (new Word(stem(w.word())));
}

boolean justInsertedNewline = false; // to prevent contiguous newlines
for (Word w : in) {
 String ws = w.word();
 if (ws.startsWith("<") && ws.endsWith(">")) {
  if (markLineBreaks && !justInsertedNewline) {

/** Return the tokens using PTB tokenizer.
 *
 *  @param str String to tokenize
 *  @return List of tokens
 */
private String[] ptbTokenize(String str) {
 // todo [cdm 2017]: Someday should generalize this to allow use of other tokenizers
 if (ptbFactory==null) {
  ptbFactory = PTBTokenizer.factory();
 }
 Tokenizer<Word> tokenizer = ptbFactory.getTokenizer(new StringReader(str));
 List<Word> words = tokenizer.tokenize();
 String[] res = new String[words.size()];
 for (int i = 0, sz = words.size(); i < sz; i++) {
  res[i] = words.get(i).word();
 }
 return res;
}

Matcher hasArabic = utf8ArabicChart.matcher(token.word());
if(hasArabic.find()) {
 token.setWord(escaper.apply(token.word()));
 token.setWord(lexMapper.map(null, token.word()));

/**
 * Test program for demonstrating the Stemmer.  It reads text from a
 * a list of files, stems each word, and writes the result to standard
 * output. Note that the word stemmed is expected to be in lower case:
 * forcing lower case must be done outside the Stemmer class.
 * Usage: Stemmer file-name file-name ...
 */
public static void main(String[] args) throws IOException {
 Stemmer s = new Stemmer();
 if (args[0].equals("-file")) {
  Iterator<Word> it = PTBTokenizer.newPTBTokenizer(new InputStreamReader(new FileInputStream(args[1]), "utf-8"));
  while (it.hasNext()) {
   Word token = it.next();
   System.out.print(s.stem(token.word()));
   System.out.print(' ');
  }
 } else {
  for (String arg : args) {
   System.out.print(s.stem(arg));
   System.out.print(' ');
  }
 }
 System.out.println();
}

int numAdded = 0;
while (tok.hasNext()) {
 String s = tok.next().word();

DFSAState<Word, Integer> fromState = tr.getSource();
Word word = tr.getInput();
if (!word.word().equals(" "))
 segmentedWords.add(0, word);
i = fromState.stateID();

for (; ;) {
 Word word = (Word) sentIter.next();
 pw.print(word.word());
 if (sentIter.hasNext()) {
  pw.print(" ");

 import edu.stanford.nlp.ling.Word;

List<Word> words = ...
for (Word word : words) {
 if (word.word().equals(args(1))) {
  System.err.println("Yes!");
 }
}

public List<String> tokenizeString(String string)
{ 
  final List<String> tokens = new ArrayList<String>();
  for (Word w : tokenize(string))
  {
    tokens.add(w.word());
  }
  return tokens;
}

/**
 * Returns a "pretty" version of the words in this Document suitable for
 * display. The default implementation returns each of the words in
 * this Document separated
 * by spaces. Specifically, each element that implements {@link HasWord}
 * has its
 * {@link HasWord#word} printed, and other elements are skipped.
 *
 * Subclasses that maintain additional information may which to
 * override this method.
 */
public String presentableText() {
 StringBuilder sb = new StringBuilder();
 for (Word cur : this) {
  if (sb.length() > 0) {
   sb.append(' ');
  }
  sb.append(cur.word());
 }
 return (sb.toString());
}

/**
 * Returns a "pretty" version of the words in this Document suitable for
 * display. The default implementation returns each of the words in
 * this Document separated
 * by spaces. Specifically, each element that implements {@link HasWord}
 * has its
 * {@link HasWord#word} printed, and other elements are skipped.
 * <p/>
 * <p>Subclasses that maintain additional information may which to
 * override this method.</p>
 */
public String presentableText() {
 StringBuilder sb = new StringBuilder();
 for (Word cur : this) {
  if (sb.length() > 0) {
   sb.append(' ');
  }
  sb.append(cur.word());
 }
 return (sb.toString());
}

@Override
public String[] tokenize(String sentence) {
  Reader r=new StringReader(sentence);
  PTBTokenizer<Word> tokenizer=PTBTokenizer.newPTBTokenizer(r);
  List<String> l=new ArrayList<String>();
  while(tokenizer.hasNext())
    l.add(tokenizer.next().word());
  
  String[] tok=new String[l.size()+1];
  tok[0]=is2.io.CONLLReader09.ROOT;
  int i=1;
  for(String s:l)
    tok[i++]=s;
  return tok;
}

@Override
public String[] tokenize(String sentence) {
  Reader r = new StringReader(sentence);
  PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(r);
  List<String> l = new ArrayList<>();
  while (tokenizer.hasNext()) {
    Word w = tokenizer.next();
    l.add(w.word());
  }
  String[] tok = new String[l.size() + 1];
  tok[0] = is2.io.CONLLReader09.ROOT;
  int i = 1;
  for (String s : l)
    tok[i++] = s;
  return tok;
}

/** Return the tokens using PTB tokenizer.
 *
 *  @param str String to tokenize
 *  @return List of tokens
 */
private String[] ptbTokenize(String str) {
 // todo [cdm 2017]: Someday should generalize this to allow use of other tokenizers
 if (ptbFactory==null) {
  ptbFactory = PTBTokenizer.factory();
 }
 Tokenizer<Word> tokenizer = ptbFactory.getTokenizer(new StringReader(str));
 List<Word> words = tokenizer.tokenize();
 String[] res = new String[words.size()];
 for (int i = 0, sz = words.size(); i < sz; i++) {
  res[i] = words.get(i).word();
 }
 return res;
}

@Override
protected Word getNext() {
 while (wordIter == null || ! wordIter.hasNext()) {
  if ( ! tok.hasNext()) {
   return null;
  }
  String s = tok.next().word();
  if (s == null) {
   return null;
  }
  ArrayList<Word> se = segmentWords(s);
  wordIter = se.iterator();
 }
 return wordIter.next();
}

  @Override
  public StringInText[] tokenizeplus(String sentence) {
    Reader r = new StringReader(sentence);
    List<StringInText> l = new ArrayList<>();
    for (String s : tokenize(sentence)) {
      Word w = new Word(s);
      l.add(new StringInText(w.word(), w.beginPosition() + startpos, w
          .endPosition() + startpos));
    }
    StringInText[] tok = new StringInText[l.size()];
    // tok[0]=new StringInText(is2.io.CONLLReader09.ROOT,0,0);
    int i = 0;
    for (StringInText s : l)
      tok[i++] = s;

    startpos += (1 + sentence.length());

    return tok;
  }
}

public StringInText[] tokenizeplus(String sentence) {
  Reader r = new StringReader(sentence);
  PTBTokenizer<Word> tokenizer = PTBTokenizer.newPTBTokenizer(r);
  List<StringInText> l = new ArrayList<>();
  while (tokenizer.hasNext()) {
    Word w = tokenizer.next();
    l.add(new StringInText(w.word(), w.beginPosition() + startpos, w
        .endPosition() + startpos));
  }
  StringInText[] tok = new StringInText[l.size() + 1];
  tok[0] = new StringInText(is2.io.CONLLReader09.ROOT, 0, 0);
  int i = 1;
  for (StringInText s : l)
    tok[i++] = s;
  startpos += (1 + sentence.length());
  return tok;
}

Popular methods of Word

<init>
Construct a new word, with the given value.
beginPosition
endPosition
setValue
toString
value
setBeginPosition
setEndPosition
setWord
addSynonym
counterIncrease
getAttr1

Popular in Java

Reading from database using SQL prepared statement
getApplicationContext (Context)
addToBackStack (FragmentTransaction)
scheduleAtFixedRate (ScheduledExecutorService)
PrintWriter (java.io)
Wraps either an existing OutputStream or an existing Writerand provides convenience methods for prin
ServerSocket (java.net)
This class represents a server-side socket that waits for incoming client connections. A ServerSocke
Permission (java.security)
Legacy security code; do not use.
SimpleDateFormat (java.text)
Formats and parses dates in a locale-sensitive manner. Formatting turns a Date into a String, and pa
FileUtils (org.apache.commons.io)
General file manipulation utilities. Facilities are provided in the following areas: * writing to a
Graphics2D (java.awt)
This Graphics2D class extends the Graphics class to provide more sophisticated control overgraphics
Top plugins for WebStorm

How to use wordmethodin edu.stanford.nlp.ling.Word

Best Java code snippets using edu.stanford.nlp.ling.Word.word (Showing top 20 results out of 315)

How to use
word
method
in
edu.stanford.nlp.ling.Word