public static WhitespaceTokenizer<Word> newWordWhitespaceTokenizer(Reader r, boolean eolIsSignificant) { return new WhitespaceTokenizer<>(new WordTokenFactory(), r, eolIsSignificant); }
/** * Constructs a new TokenizerFactory that returns Word objects and * treats carriage returns as normal whitespace. * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP * CODE TO LOAD A TOKENIZER FACTORY. IT SHOULD BE PRESENT IN A * TokenizerFactory. * * @return A TokenizerFactory that returns Word objects */ public static TokenizerFactory<Word> newTokenizerFactory() { return newPTBTokenizerFactory(new WordTokenFactory(), ""); }
/** * Constructs a new PTBTokenizer that returns CoreLabel objects and * uses the options passed in. * * @param options A String of options. For the default, recommended * options for PTB-style tokenization compatibility, pass * in an empty String. * @return A TokenizerFactory that returns CoreLabel objects o */ public static PTBTokenizerFactory<CoreLabel> newCoreLabelTokenizerFactory(String options) { return new PTBTokenizerFactory<>(new CoreLabelTokenFactory(), options); }
/** @return A PTBTokenizerFactory that vends CoreLabel tokens with default tokenization. */ public static TokenizerFactory<CoreLabel> coreLabelFactory(String options) { return PTBTokenizerFactory.newPTBTokenizerFactory(new CoreLabelTokenFactory(), options); }
/** * Constructs a new PTBTokenizer that returns Word tokens and which treats * carriage returns as normal whitespace. * * @param r The Reader whose contents will be tokenized * @return A PTBTokenizer that tokenizes a stream to objects of type * {@link Word} */ public static PTBTokenizer<Word> newPTBTokenizer(Reader r) { return new PTBTokenizer<>(r, new WordTokenFactory(), ""); }
/** * Constructs a new TokenizerFactory that returns Word objects and * treats carriage returns as normal whitespace. * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP * CODE TO LOAD A TOKENIZER FACTORY. IT SHOULD BE PRESENT IN A * TokenizerFactory. * * @return A TokenizerFactory that returns Word objects */ public static TokenizerFactory<Word> newTokenizerFactory() { return new WhitespaceTokenizerFactory<>(new WordTokenFactory(), false); }
/** * Constructs a new PTBTokenizer that returns Word objects and * uses the options passed in. * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP * CODE TO LOAD A TOKENIZER FACTORY. IT SHOULD BE PRESENT IN A * TokenizerFactory. * * @param options A String of options * @return A TokenizerFactory that returns Word objects */ public static PTBTokenizerFactory<Word> newWordTokenizerFactory(String options) { return new PTBTokenizerFactory<>(new WordTokenFactory(), options); }
/** * Specify the String and the int identifying which word shaper to * use and this returns the result of using that wordshaper on the String. * * @param inStr String to calculate word shape of * @param wordShaper Constant for which shaping formula to use * @return The wordshape String */ public static String wordShape(String inStr, int wordShaper) { return wordShape(inStr, wordShaper, null); }
/** Lemmatize the word, being sensitive to the tag. * Words other than proper nouns will be changed to all lowercase. * * @param word The word to lemmatize * @param tag What part of speech to assume for it. * @return The lemma for the word */ public static synchronized String lemmaStatic(String word, String tag) { return lemmaStatic(word, tag, true); }
ExtractorWordShapeConjunction(int left, int right, String wsc) { super(); this.left = left; this.right = right; wordShaper = WordShapeClassifier.lookupShaper(wsc); name = "ExtractorWordShapeConjunction(" + left + ',' + right + ',' + wsc + ')'; }
private static int [] zzUnpackTrans() { int [] result = new int[6]; int offset = 0; offset = zzUnpackTrans(ZZ_TRANS_PACKED_0, offset, result); return result; }
private static int [] zzUnpackAttribute() { int [] result = new int[3]; int offset = 0; offset = zzUnpackAttribute(ZZ_ATTRIBUTE_PACKED_0, offset, result); return result; }
/** * @param contentString Chinese document text * @return a List of sentence strings * @throws IOException */ public static List<String> fromPlainText(String contentString) throws IOException { return fromPlainText(contentString, false); }
private final boolean vowelinstem() { int i; for (i = 0; i <= j; i++) { if (!cons(i)) { return true; } } return false; }
/** * Create a new StopListFilter with a small default stoplist */ public StoplistFilter() { this(new StopList()); }
public static PTBTokenizerFactory<CoreLabel> newPTBTokenizerFactory(boolean tokenizeNLs, boolean invertible) { return new PTBTokenizerFactory<>(tokenizeNLs, invertible, false, new CoreLabelTokenFactory()); }
public static TokenizerFactory<Word> factory(boolean eolIsSignificant) { return new WhitespaceTokenizerFactory<>(new WordTokenFactory(), eolIsSignificant); }
public static TokenizerFactory<Word> factory() { return new WhitespaceTokenizerFactory<>(new WordTokenFactory(), false); }