@Override public PorterStemFilter create(TokenStream input) { return new PorterStemFilter(input); } }
@Override public TokenStream create(TokenStream tokenStream) { return new PorterStemFilter(tokenStream); } }
@Override public PorterStemFilter create(TokenStream input) { return new PorterStemFilter(input); } }
@Override public TokenStream create(TokenStream tokenStream, Version version) { return new PorterStemFilter(tokenStream); } },
@Override public TokenStream create(TokenStream tokenStream, Version version) { return new PorterStemFilter(tokenStream); } },
@Override public TokenStream create(TokenStream tokenStream) { return new PorterStemFilter(tokenStream); } }
/** * Creates a * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * which tokenizes all the text in the provided {@link Reader}. * * @return A * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link EnglishPossessiveFilter}, * {@link LowerCaseFilter}, {@link StopFilter} * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link PorterStemFilter}. */ @Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new EnglishPossessiveFilter(source); result = new LowerCaseFilter(result); result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); }
@Override protected TokenStreamComponents createComponents(String s) { Tokenizer source = new LowerCaseTokenizer(); return new TokenStreamComponents(source, new PorterStemFilter(source)); }
static public TokenStream english(TokenStream result) { result = new EnglishPossessiveFilter(result); result = new LowerCaseFilter(result); result = new PorterStemFilter(result); return result; }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { Tokenizer source = new WhitespaceTokenizer(matchVersion, reader); TokenStream filter = new LowerCaseEntityPreservingFilter(source); if (stemming) { // Porter stemmer ignores words which are marked as keywords filter = new PorterStemFilter(filter); } return new TokenStreamComponents(source, filter); }
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new WhitespaceTokenizer(); TokenStream filter = new TweetLowerCaseEntityPreservingFilter(source); if (stemming) { // Porter stemmer ignores words which are marked as keywords filter = new PorterStemFilter(filter); } return new TokenStreamComponents(source, filter); }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { LetterTokenizer tokenizer = new LetterTokenizer(BonnieConstants.LUCENE_VERSION, reader); TokenStream result = new LowerCaseFilter(BonnieConstants.LUCENE_VERSION, tokenizer); result = new StopFilter(BonnieConstants.LUCENE_VERSION, result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); result = new PorterStemFilter(result); return new TokenStreamComponents(tokenizer, result); }
@Override public TokenStreamComponents createComponents(String fieldName) { final Tokenizer tokenizer = new HMMChineseTokenizer(); TokenStream result = tokenizer; // result = new LowerCaseFilter(result); // LowerCaseFilter is not needed, as SegTokenFilter lowercases Basic Latin text. // The porter stemming is too strict, this is not a bug, this is a feature:) result = new PorterStemFilter(result); if (!stopWords.isEmpty()) { result = new StopFilter(result, stopWords); } return new TokenStreamComponents(tokenizer, result); }
@Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new ASCIIFoldingFilter(result); result = new LowerCaseFilter(result); result = new EnglishPossessiveFilter(result); result = new StopFilter(result, stopwords); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); } }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new ASCIIFoldingFilter(result); result = new LowerCaseFilter(matchVersion, result); result = new EnglishPossessiveFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); } }
@Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new ASCIIFoldingFilter(result); result = new LowerCaseFilter(result); result = new EnglishPossessiveFilter(result); result = new StopFilter(result, stopwords); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); } }
@Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new ASCIIFoldingFilter(result); result = new EnglishPossessiveFilter(result); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new LowerCaseFilter(result); result = new StopFilter(result, EnglishAnalyzer.getDefaultStopSet()); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new ASCIIFoldingFilter(result); result = new EnglishPossessiveFilter(matchVersion, result); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, EnglishAnalyzer.getDefaultStopSet()); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); }
@Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new ASCIIFoldingFilter(result); result = new EnglishPossessiveFilter(result); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new LowerCaseFilter(result); result = new StopFilter(result, EnglishAnalyzer.getDefaultStopSet()); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); }
protected TokenStreamComponents createComponents(String fieldName) { StandardTokenizer source = new StandardTokenizer(); StandardFilter result = new StandardFilter(source); EnglishPossessiveFilter result2 = new EnglishPossessiveFilter(result); LowerCaseFilter result3 = new LowerCaseFilter(result2); Object result4 = new StopFilter(result3, this.stopwords); result4 = new ASCIIFoldingFilter((TokenStream) result4); if(!this.stemExclusionSet.isEmpty()) { result4 = new SetKeywordMarkerFilter((TokenStream)result4, this.stemExclusionSet); } PorterStemFilter result1 = new PorterStemFilter((TokenStream)result4); return new TokenStreamComponents(source, result1); }