/** * todo [cdm 2013]: But we should change it to a method that can return any kind of Label and return CoreLabel here * * @param options A String of options * @return A TokenizerFactory that returns Word objects */ public static TokenizerFactory<Word> newWordTokenizerFactory(String options) { return new FrenchTokenizerFactory<>(new WordTokenFactory(), options); }
public static WhitespaceTokenizer<Word> newWordWhitespaceTokenizer(Reader r, boolean eolIsSignificant) { return new WhitespaceTokenizer<>(new WordTokenFactory(), r, eolIsSignificant); }
/** * Constructs a new TokenizerFactory that returns Word objects and * treats carriage returns as normal whitespace. * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP * CODE TO LOAD A TOKENIZER FACTORY. IT SHOULD BE PRESENT IN A * TokenizerFactory. * * @return A TokenizerFactory that returns Word objects */ public static TokenizerFactory<Word> newTokenizerFactory() { return new WhitespaceTokenizerFactory<>(new WordTokenFactory(), false); }
public static TokenizerFactory<Word> factory(boolean eolIsSignificant) { return new WhitespaceTokenizerFactory<>(new WordTokenFactory(), eolIsSignificant); }
/** * Constructs a new PTBTokenizer that returns Word tokens and which treats * carriage returns as normal whitespace. * * @param r The Reader whose contents will be tokenized * @return A PTBTokenizer that tokenizes a stream to objects of type * {@link Word} */ public static PTBTokenizer<Word> newPTBTokenizer(Reader r) { return new PTBTokenizer<>(r, new WordTokenFactory(), ""); }
public static TokenizerFactory<Word> factory() { return new WhitespaceTokenizerFactory<>(new WordTokenFactory(), false); }
/** * Constructs a new PTBTokenizer that returns Word objects and * uses the options passed in. * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP * CODE TO LOAD A TOKENIZER FACTORY. IT SHOULD BE PRESENT IN A * TokenizerFactory. * * @param options A String of options * @return A TokenizerFactory that returns Word objects */ public static PTBTokenizerFactory<Word> newWordTokenizerFactory(String options) { return new PTBTokenizerFactory<>(new WordTokenFactory(), options); }
/** * Constructs a new TokenizerFactory that returns Word objects and * treats carriage returns as normal whitespace. * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP * CODE TO LOAD A TOKENIZER FACTORY. IT SHOULD BE PRESENT IN A * TokenizerFactory. * * @return A TokenizerFactory that returns Word objects */ public static TokenizerFactory<Word> newTokenizerFactory() { return newPTBTokenizerFactory(new WordTokenFactory(), ""); }
new InputStreamReader(System.in, "UTF-8")); WhitespaceTokenizer<Word> tokenizer = new WhitespaceTokenizer<>(new WordTokenFactory(), reader, eolIsSignificant); PrintWriter pw =
/** * Constructs a new PTBTokenizer that returns Word tokens and which treats * carriage returns as normal whitespace. * * @param r The Reader whose contents will be tokenized * @return A PTBTokenizer that tokenizes a stream to objects of type * {@link Word} */ public static PTBTokenizer<Word> newPTBTokenizer(Reader r) { return new PTBTokenizer<>(r, new WordTokenFactory(), ""); }
public static WhitespaceTokenizer<Word> newWordWhitespaceTokenizer(Reader r, boolean eolIsSignificant) { return new WhitespaceTokenizer<>(new WordTokenFactory(), r, eolIsSignificant); }
/** * Constructs a new PTBTokenizer that returns Word tokens and which treats * carriage returns as normal whitespace. * * @param r The Reader whose contents will be tokenized * @return A PTBTokenizer that tokenizes a stream to objects of type * {@link Word} */ public static PTBTokenizer<Word> newPTBTokenizer(Reader r) { return new PTBTokenizer<>(r, new WordTokenFactory(), ""); }
public static WhitespaceTokenizer<Word> newWordWhitespaceTokenizer(Reader r, boolean eolIsSignificant) { return new WhitespaceTokenizer<Word>(new WordTokenFactory(), r, eolIsSignificant); }
/** * Constructs a new PTBTokenizer that optionally returns carriage returns * as their own token. * * @param tokenizeNLs If true, newlines come back as Words whose text is * the value of <code>PTBLexer.NEWLINE_TOKEN</code>. * @return A TokenizerFactory that returns Word objects */ public static PTBTokenizerFactory<Word> newPTBTokenizerFactory(boolean tokenizeNLs) { return new PTBTokenizerFactory<Word>(tokenizeNLs, false, false, new WordTokenFactory()); }
/** * Constructs a new TokenizerFactory that returns Word objects and * treats carriage returns as normal whitespace. * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP * CODE TO LOAD A TOKENIZER FACTORY. IT SHOULD BE PRESENT IN A * TokenizerFactory. * * @return A TokenizerFactory that returns Word objects */ public static TokenizerFactory<Word> newTokenizerFactory() { return new WhitespaceTokenizerFactory<>(new WordTokenFactory(), false); }
/** * Constructs a new PTBTokenizer that returns Word objects and * uses the options passed in. * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP * CODE TO LOAD A TOKENIZER FACTORY. IT SHOULD BE PRESENT IN A * TokenizerFactory. * * @param options A String of options * @return A TokenizerFactory that returns Word objects */ public static PTBTokenizerFactory<Word> newWordTokenizerFactory(String options) { return new PTBTokenizerFactory<Word>(new WordTokenFactory(), options); }
public static TokenizerFactory<Word> factory(boolean eolIsSignificant) { return new WhitespaceTokenizerFactory<Word>(new WordTokenFactory(), eolIsSignificant); }
/** * Constructs a new PTBTokenizer that returns Word objects and * uses the options passed in. * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP * CODE TO LOAD A TOKENIZER FACTORY. IT SHOULD BE PRESENT IN A * TokenizerFactory. * * @param options A String of options * @return A TokenizerFactory that returns Word objects */ public static PTBTokenizerFactory<Word> newWordTokenizerFactory(String options) { return new PTBTokenizerFactory<>(new WordTokenFactory(), options); }
/** * Constructs a new TokenizerFactory that returns Word objects and * treats carriage returns as normal whitespace. * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP * CODE TO LOAD A TOKENIZER FACTORY. IT SHOULD BE PRESENT IN A * TokenizerFactory. * * @return A TokenizerFactory that returns Word objects */ public static TokenizerFactory<Word> newTokenizerFactory() { return newPTBTokenizerFactory(new WordTokenFactory(), ""); }
/** * Constructs a new TokenizerFactory that returns Word objects and * treats carriage returns as normal whitespace. * THIS METHOD IS INVOKED BY REFLECTION BY SOME OF THE JAVANLP * CODE TO LOAD A TOKENIZER FACTORY. IT SHOULD BE PRESENT IN A * TokenizerFactory. * * @return A TokenizerFactory that returns Word objects */ public static TokenizerFactory<Word> newTokenizerFactory() { return newPTBTokenizerFactory(new WordTokenFactory(), ""); }