edu.stanford.nlp.process java code examples

public static WhitespaceTokenizer<Word>
 newWordWhitespaceTokenizer(Reader r, boolean eolIsSignificant)
{
 return new WhitespaceTokenizer<>(new WordTokenFactory(), r,
     eolIsSignificant);
}

/**
 * Constructs a new TokenizerFactory that returns Word objects and
 * treats carriage returns as normal whitespace.
 * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP
 * CODE TO LOAD A TOKENIZER FACTORY.  IT SHOULD BE PRESENT IN A
 * TokenizerFactory.
 *
 * @return A TokenizerFactory that returns Word objects
 */
public static TokenizerFactory<Word> newTokenizerFactory() {
 return newPTBTokenizerFactory(new WordTokenFactory(), "");
}

/**
 * Constructs a new PTBTokenizer that returns CoreLabel objects and
 * uses the options passed in.
 *
 * @param options A String of options. For the default, recommended
 *                options for PTB-style tokenization compatibility, pass
 *                in an empty String.
 * @return A TokenizerFactory that returns CoreLabel objects o
 */
public static PTBTokenizerFactory<CoreLabel> newCoreLabelTokenizerFactory(String options) {
 return new PTBTokenizerFactory<>(new CoreLabelTokenFactory(), options);
}

/** @return A PTBTokenizerFactory that vends CoreLabel tokens with default tokenization. */
public static TokenizerFactory<CoreLabel> coreLabelFactory(String options) {
 return PTBTokenizerFactory.newPTBTokenizerFactory(new CoreLabelTokenFactory(), options);
}

/**
 * Constructs a new PTBTokenizer that returns Word tokens and which treats
 * carriage returns as normal whitespace.
 *
 * @param r The Reader whose contents will be tokenized
 * @return A PTBTokenizer that tokenizes a stream to objects of type
 *          {@link Word}
 */
public static PTBTokenizer<Word> newPTBTokenizer(Reader r) {
 return new PTBTokenizer<>(r, new WordTokenFactory(), "");
}

/**
 * Constructs a new TokenizerFactory that returns Word objects and
 * treats carriage returns as normal whitespace.
 * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP
 * CODE TO LOAD A TOKENIZER FACTORY.  IT SHOULD BE PRESENT IN A
 * TokenizerFactory.
 *
 * @return A TokenizerFactory that returns Word objects
 */
public static TokenizerFactory<Word> newTokenizerFactory() {
 return new WhitespaceTokenizerFactory<>(new WordTokenFactory(),
     false);
}

/**
 * Constructs a new PTBTokenizer that returns Word objects and
 * uses the options passed in.
 * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP
 * CODE TO LOAD A TOKENIZER FACTORY.  IT SHOULD BE PRESENT IN A
 * TokenizerFactory.
 *
 * @param options A String of options
 * @return A TokenizerFactory that returns Word objects
 */
public static PTBTokenizerFactory<Word> newWordTokenizerFactory(String options) {
 return new PTBTokenizerFactory<>(new WordTokenFactory(), options);
}

public String lemma(String word, String tag) {
 return lemmatize(word, tag, lexer, lexer.option(1));
}

/**
 * Specify the String and the int identifying which word shaper to
 * use and this returns the result of using that wordshaper on the String.
 *
 * @param inStr String to calculate word shape of
 * @param wordShaper Constant for which shaping formula to use
 * @return The wordshape String
 */
public static String wordShape(String inStr, int wordShaper) {
 return wordShape(inStr, wordShaper, null);
}

/** Lemmatize the word, being sensitive to the tag.
 *  Words other than proper nouns will be changed to all lowercase.
 *
 *  @param word The word to lemmatize
 *  @param tag What part of speech to assume for it.
 *  @return The lemma for the word
 */
public static synchronized String lemmaStatic(String word, String tag) {
 return lemmaStatic(word, tag, true);
}

ExtractorWordShapeConjunction(int left, int right, String wsc) {
 super();
 this.left = left;
 this.right = right;
 wordShaper = WordShapeClassifier.lookupShaper(wsc);
 name = "ExtractorWordShapeConjunction(" + left + ',' + right + ',' + wsc + ')';
}

private static int [] zzUnpackTrans() {
 int [] result = new int[6];
 int offset = 0;
 offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result);
 return result;
}

private static int [] zzUnpackAttribute() {
 int [] result = new int[3];
 int offset = 0;
 offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result);
 return result;
}

/**
 * @param contentString Chinese document text
 * @return a List of sentence strings
 * @throws IOException
 */
public static List<String> fromPlainText(String contentString) throws IOException {
 return fromPlainText(contentString, false);
}

private final boolean vowelinstem() {
 int i;
 for (i = 0; i <= j; i++) {
  if (!cons(i)) {
   return true;
  }
 }
 return false;
}

/**
 * Create a new StopListFilter with a small default stoplist
 */
public StoplistFilter() {
 this(new StopList());
}

public static PTBTokenizerFactory<CoreLabel> newPTBTokenizerFactory(boolean tokenizeNLs, boolean invertible) {
 return new PTBTokenizerFactory<>(tokenizeNLs, invertible, false, new CoreLabelTokenFactory());
}

public static TokenizerFactory<Word> factory(boolean eolIsSignificant) {
 return new WhitespaceTokenizerFactory<>(new WordTokenFactory(),
     eolIsSignificant);
}

private final boolean cvc(int i) {
 if (i < 2 || !cons(i) || cons(i - 1) || !cons(i - 2)) {
  return false;
 }
 {
  int ch = b[i];
  if (ch == 'w' || ch == 'x' || ch == 'y') {
   return false;
  }
 }
 return true;
}

public static TokenizerFactory<Word> factory() {
 return new WhitespaceTokenizerFactory<>(new WordTokenFactory(),
     false);
}

Most used classes

CoreLabelTokenFactory
Constructs CoreLabels from Strings optionally with beginning and ending (character after the end) of
PTBTokenizer
A fast, rule-based tokenizer implementation, which produces Penn Treebank style tokenization of Engl
TokenizerFactory
A TokenizerFactory is used to convert a java.io.Reader into a Tokenizer (an extension of Iterator) o
Tokenizer
Tokenizers break up text into individual Objects. These objects may be Strings, Words, or other Obje
DocumentPreprocessor
Produces a list of sentences from either a plain text or XML document. This class acts like a Reader

How to use edu.stanford.nlp.process

Best Java code snippets using edu.stanford.nlp.process (Showing top 20 results out of 315)