@Override protected TokenStream normalize(String fieldName, TokenStream in) { return new TurkishLowerCaseFilter(in); } }
@Override public TokenStream create(TokenStream input) { return new TurkishLowerCaseFilter(input); }
@Override public TokenStream create(TokenStream input) { return new TurkishLowerCaseFilter(input); }
/** * Creates a * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * which tokenizes all the text in the provided {@link Reader}. * * @return A * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link TurkishLowerCaseFilter}, * {@link StopFilter}, {@link SetKeywordMarkerFilter} if a stem * exclusion set is provided and {@link SnowballFilter}. */ @Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new ApostropheFilter(source); result = new TurkishLowerCaseFilter(result); result = new StopFilter(result, stopwords); if (!stemExclusionSet.isEmpty()) { result = new SetKeywordMarkerFilter(result, stemExclusionSet); } result = new SnowballFilter(result, new TurkishStemmer()); return new TokenStreamComponents(source, result); }
@Override public TokenStream create(TokenStream tokenStream) { if (lang == null) { return new LowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("greek")) { return new GreekLowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("irish")) { return new IrishLowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("turkish")) { return new TurkishLowerCaseFilter(tokenStream); } else { throw new IllegalArgumentException("language [" + lang + "] not support for lower case"); } }
@Override public TokenStream create(TokenStream tokenStream) { if (lang == null) { return new LowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("greek")) { return new GreekLowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("irish")) { return new IrishLowerCaseFilter(tokenStream); } else if (lang.equalsIgnoreCase("turkish")) { return new TurkishLowerCaseFilter(tokenStream); } else { throw new IllegalArgumentException("language [" + lang + "] not support for lower case"); } }
/** Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, and a {@link SnowballFilter} */ @Override public TokenStreamComponents createComponents(String fieldName) { final Tokenizer tokenizer = new StandardTokenizer(); TokenStream result = tokenizer; // remove the possessive 's for english stemmers if (name.equals("English") || name.equals("Porter") || name.equals("Lovins")) result = new EnglishPossessiveFilter(result); // Use a special lowercase filter for turkish, the stemmer expects it. if (name.equals("Turkish")) result = new TurkishLowerCaseFilter(result); else result = new LowerCaseFilter(result); if (stopSet != null) result = new StopFilter(result, stopSet); result = new SnowballFilter(result, name); return new TokenStreamComponents(tokenizer, result); } }
/** Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, and a {@link SnowballFilter} */ @Override public TokenStreamComponents createComponents(String fieldName) { final Tokenizer tokenizer = new StandardTokenizer(); TokenStream result = tokenizer; // remove the possessive 's for english stemmers if (name.equals("English") || name.equals("Porter") || name.equals("Lovins")) result = new EnglishPossessiveFilter(result); // Use a special lowercase filter for turkish, the stemmer expects it. if (name.equals("Turkish")) result = new TurkishLowerCaseFilter(result); else result = new LowerCaseFilter(result); if (stopSet != null) result = new StopFilter(result, stopSet); result = new SnowballFilter(result, name); return new TokenStreamComponents(tokenizer, result); } }
/** Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, and a {@link SnowballFilter} */ @Override public TokenStreamComponents createComponents(String fieldName) { final Tokenizer tokenizer = new StandardTokenizer(); TokenStream result = tokenizer; // remove the possessive 's for english stemmers if (name.equals("English") || name.equals("Porter") || name.equals("Lovins")) result = new EnglishPossessiveFilter(result); // Use a special lowercase filter for turkish, the stemmer expects it. if (name.equals("Turkish")) result = new TurkishLowerCaseFilter(result); else result = new LowerCaseFilter(result); if (stopSet != null) result = new StopFilter(result, stopSet); result = new SnowballFilter(result, name); return new TokenStreamComponents(tokenizer, result); } }
static public TokenStream turkish(TokenStream result) { result = new ApostropheFilter(result); result = new TurkishLowerCaseFilter(result); result = new SnowballFilter(result, new TurkishStemmer()); return result; }
/** Constructs a {StandardTokenizer} filtered by a {@link StandardFilter}, a {LowerCaseFilter}, a {StopFilter}, and a {SnowballFilter} */ @Override public TokenStreamComponents createComponents(String fieldName) { final Tokenizer tokenizer = new StandardTokenizer(); TokenStream result = tokenizer; // remove the possessive 's for english stemmers if (name.equals("English") || name.equals("Porter") || name.equals("Lovins")) { result = new EnglishPossessiveFilter(result); } // Use a special lowercase filter for turkish, the stemmer expects it. if (name.equals("Turkish")) { result = new TurkishLowerCaseFilter(result); } else { result = new LowerCaseFilter(result); } if (stopSet != null) { result = new StopFilter(result, stopSet); } result = new SnowballFilter(result, name); return new TokenStreamComponents(tokenizer, result); } }
/** Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, and a {@link SnowballFilter} */ @Override public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new StandardTokenizer(matchVersion, reader); result = new StandardFilter(matchVersion, result); // remove the possessive 's for english stemmers if (matchVersion.onOrAfter(Version.LUCENE_31) && (name.equals("English") || name.equals("Porter") || name.equals("Lovins"))) result = new EnglishPossessiveFilter(result); // Use a special lowercase filter for turkish, the stemmer expects it. if (matchVersion.onOrAfter(Version.LUCENE_31) && name.equals("Turkish")) result = new TurkishLowerCaseFilter(result); else result = new LowerCaseFilter(matchVersion, result); if (stopSet != null) result = new StopFilter(matchVersion, result, stopSet); result = new SnowballFilter(result, name); return result; }
/** * Creates a * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents} * which tokenizes all the text in the provided {@link Reader}. * * @return A * {@link org.apache.lucene.analysis.ReusableAnalyzerBase.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link StandardFilter}, {@link TurkishLowerCaseFilter}, * {@link StopFilter}, {@link KeywordMarkerFilter} if a stem * exclusion set is provided and {@link SnowballFilter}. */ @Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new TurkishLowerCaseFilter(result); result = new StopFilter(matchVersion, result, stopwords); if(!stemExclusionSet.isEmpty()) result = new KeywordMarkerFilter(result, stemExclusionSet); result = new SnowballFilter(result, new TurkishStemmer()); return new TokenStreamComponents(source, result); } }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { TokenStream stream = new StandardFilter(matchVersion, tokenizer); if (caseInsensitive) stream = new TurkishLowerCaseFilter(stream); if (useStopWords) stream = new StopFilter(matchVersion, stream, TurkishAnalyzer.getDefaultStopSet()); if (useStem) { if (!stemExclusionSet.isEmpty()) stream = new SetKeywordMarkerFilter(stream, stemExclusionSet); stream = new SnowballFilter(stream, new TurkishStemmer()); } return stream; } }
/** Returns a (possibly reused) {@link StandardTokenizer} filtered by a * {@link StandardFilter}, a {@link LowerCaseFilter}, * a {@link StopFilter}, and a {@link SnowballFilter} */ @Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); streams.source = new StandardTokenizer(matchVersion, reader); streams.result = new StandardFilter(matchVersion, streams.source); // Use a special lowercase filter for turkish, the stemmer expects it. if (matchVersion.onOrAfter(Version.LUCENE_31) && name.equals("Turkish")) streams.result = new TurkishLowerCaseFilter(streams.result); else streams.result = new LowerCaseFilter(matchVersion, streams.result); if (stopSet != null) streams.result = new StopFilter(matchVersion, streams.result, stopSet); streams.result = new SnowballFilter(streams.result, name); setPreviousTokenStream(streams); } else { streams.source.reset(reader); } return streams.result; } }
result = new ApostropheFilter(result); result = new TurkishLowerCaseFilter(result); result = new StopFilter(result, stopwords); if (!stemExclusionSet.isEmpty()) {