/** * Creates a * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * which tokenizes all the text in the provided {@link Reader}. * * @return A * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link ElisionFilter}, {@link LowerCaseFilter}, * {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new ElisionFilter(source, DEFAULT_ARTICLES); result = new LowerCaseFilter(result); result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new CatalanStemmer()); return new TokenStreamComponents(source, result); }
@Override public TokenStream apply(final TokenStream input) { return new SnowballFilter(input, new CatalanStemmer()); } };
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { TokenStream stream = new StandardFilter(matchVersion, tokenizer); if (caseInsensitive) stream = new LowerCaseFilter(matchVersion, stream); if (useStopWords) { stream = new ElisionFilter(stream, DEFAULT_ARTICLES); stream = new StopFilter(matchVersion, stream, DanishAnalyzer.getDefaultStopSet()); } if (useStem) { if (!stemExclusionSet.isEmpty()) stream = new SetKeywordMarkerFilter(stream, stemExclusionSet); stream = new SnowballFilter(stream, new CatalanStemmer()); } return stream; } }
/** * Creates a * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents} * which tokenizes all the text in the provided {@link Reader}. * * @return A * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link LowerCaseFilter}, {@link StopFilter} * , {@link KeywordMarkerFilter} if a stem exclusion set is * provided and {@link SnowballFilter}. */ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); if (matchVersion.onOrAfter(Version.LUCENE_36)) { result = new ElisionFilter(matchVersion, result, DEFAULT_ARTICLES); } result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new KeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new CatalanStemmer()); return new TokenStreamComponents(source, result); } }
return new BulgarianStemFilter(tokenStream); } else if ("catalan".equalsIgnoreCase(language)) { return new SnowballFilter(tokenStream, new CatalanStemmer()); } else if ("czech".equalsIgnoreCase(language)) { return new CzechStemFilter(tokenStream);
return new BulgarianStemFilter(tokenStream); } else if ("catalan".equalsIgnoreCase(language)) { return new SnowballFilter(tokenStream, new CatalanStemmer()); } else if ("czech".equalsIgnoreCase(language)) { return new CzechStemFilter(tokenStream);