@Override public TokenStream create(TokenStream input) { return new EnglishPossessiveFilter(input); } }
@Override public TokenStream create(TokenStream input) { return new EnglishPossessiveFilter(input); } }
/** * Creates a * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * which tokenizes all the text in the provided {@link Reader}. * * @return A * {@link org.apache.lucene.analysis.Analyzer.TokenStreamComponents} * built from an {@link StandardTokenizer} filtered with * {@link EnglishPossessiveFilter}, * {@link LowerCaseFilter}, {@link StopFilter} * , {@link SetKeywordMarkerFilter} if a stem exclusion set is * provided and {@link PorterStemFilter}. */ @Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new EnglishPossessiveFilter(source); result = new LowerCaseFilter(result); result = new StopFilter(result, stopwords); if(!stemExclusionSet.isEmpty()) result = new SetKeywordMarkerFilter(result, stemExclusionSet); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); }
static public TokenStream english(TokenStream result) { result = new EnglishPossessiveFilter(result); result = new LowerCaseFilter(result); result = new PorterStemFilter(result); return result; }
/** Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, and a {@link SnowballFilter} */ @Override public TokenStreamComponents createComponents(String fieldName) { final Tokenizer tokenizer = new StandardTokenizer(); TokenStream result = tokenizer; // remove the possessive 's for english stemmers if (name.equals("English") || name.equals("Porter") || name.equals("Lovins")) result = new EnglishPossessiveFilter(result); // Use a special lowercase filter for turkish, the stemmer expects it. if (name.equals("Turkish")) result = new TurkishLowerCaseFilter(result); else result = new LowerCaseFilter(result); if (stopSet != null) result = new StopFilter(result, stopSet); result = new SnowballFilter(result, name); return new TokenStreamComponents(tokenizer, result); } }
/** Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, and a {@link SnowballFilter} */ @Override public TokenStreamComponents createComponents(String fieldName) { final Tokenizer tokenizer = new StandardTokenizer(); TokenStream result = tokenizer; // remove the possessive 's for english stemmers if (name.equals("English") || name.equals("Porter") || name.equals("Lovins")) result = new EnglishPossessiveFilter(result); // Use a special lowercase filter for turkish, the stemmer expects it. if (name.equals("Turkish")) result = new TurkishLowerCaseFilter(result); else result = new LowerCaseFilter(result); if (stopSet != null) result = new StopFilter(result, stopSet); result = new SnowballFilter(result, name); return new TokenStreamComponents(tokenizer, result); } }
/** Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, and a {@link SnowballFilter} */ @Override public TokenStreamComponents createComponents(String fieldName) { final Tokenizer tokenizer = new StandardTokenizer(); TokenStream result = tokenizer; // remove the possessive 's for english stemmers if (name.equals("English") || name.equals("Porter") || name.equals("Lovins")) result = new EnglishPossessiveFilter(result); // Use a special lowercase filter for turkish, the stemmer expects it. if (name.equals("Turkish")) result = new TurkishLowerCaseFilter(result); else result = new LowerCaseFilter(result); if (stopSet != null) result = new StopFilter(result, stopSet); result = new SnowballFilter(result, name); return new TokenStreamComponents(tokenizer, result); } }
/** Constructs a {@link StandardTokenizer} filtered by a {@link StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter}, and a {@link SnowballFilter} */ @Override public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result = new StandardTokenizer(matchVersion, reader); result = new StandardFilter(matchVersion, result); // remove the possessive 's for english stemmers if (matchVersion.onOrAfter(Version.LUCENE_31) && (name.equals("English") || name.equals("Porter") || name.equals("Lovins"))) result = new EnglishPossessiveFilter(result); // Use a special lowercase filter for turkish, the stemmer expects it. if (matchVersion.onOrAfter(Version.LUCENE_31) && name.equals("Turkish")) result = new TurkishLowerCaseFilter(result); else result = new LowerCaseFilter(matchVersion, result); if (stopSet != null) result = new StopFilter(matchVersion, result, stopSet); result = new SnowballFilter(result, name); return result; }
protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new StandardTokenizer(); TokenStream result = null; result = new StandardFilter(source); result = new EnglishPossessiveFilter(result); result = new LowerCaseFilter(result); result = new StopFilter(result, this.stopwords); if (!this.stemExclusionSet.isEmpty()) { result = new SetKeywordMarkerFilter((TokenStream)result, this.stemExclusionSet); } if (this.stemmer.compareToIgnoreCase("porter") == 0 || this.stemmer.compareToIgnoreCase("p") == 0) { result = new PorterStemFilter((TokenStream)result); } else if (this.stemmer.compareToIgnoreCase("krovetz") == 0 || this.stemmer.compareToIgnoreCase("k") == 0) { result = new KStemFilter((TokenStream)result); } return new TokenStreamComponents(source, result); }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new ASCIIFoldingFilter(result); result = new LowerCaseFilter(matchVersion, result); result = new EnglishPossessiveFilter(matchVersion, result); result = new StopFilter(matchVersion, result, stopwords); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); } }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Version matchVersion = Version.LUCENE_45; final Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new EnglishPossessiveFilter(matchVersion, result); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, DefaultSetHolder.DEFAULT_STOP_SET); result = new ASCIIFoldingFilter(result); result = new KStemFilter(result); return new TokenStreamComponents(source, result); }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new ASCIIFoldingFilter(result); result = new EnglishPossessiveFilter(matchVersion, result); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new StopFilter(matchVersion, result, EnglishAnalyzer.getDefaultStopSet()); result = new LowerCaseFilter(matchVersion, result); // result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); } }
@Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new ASCIIFoldingFilter(result); result = new LowerCaseFilter(result); result = new EnglishPossessiveFilter(result); result = new StopFilter(result, stopwords); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); } }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new ASCIIFoldingFilter(result); result = new EnglishPossessiveFilter(matchVersion, result); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new StopFilter(matchVersion, result, EnglishAnalyzer.getDefaultStopSet()); result = new LowerCaseFilter(matchVersion, result); // result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); } }
@Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new ASCIIFoldingFilter(result); result = new LowerCaseFilter(result); result = new EnglishPossessiveFilter(result); result = new StopFilter(result, stopwords); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); } }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { TokenStream stream = new StandardFilter(matchVersion, tokenizer); if (caseInsensitive) { stream = new LowerCaseFilter(matchVersion, stream); } if (useStopWords) { stream = new StopFilter(matchVersion, stream, EnglishAnalyzer.getDefaultStopSet()); } if (useStem) { if (!stemExclusionSet.isEmpty()) stream = new SetKeywordMarkerFilter(stream, stemExclusionSet); stream = new EnglishPossessiveFilter(matchVersion, stream); stream = new PorterStemFilter(stream); } return stream; } }
@Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new ASCIIFoldingFilter(result); result = new EnglishPossessiveFilter(result); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new LowerCaseFilter(result); result = new StopFilter(result, EnglishAnalyzer.getDefaultStopSet()); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new StandardTokenizer(matchVersion, reader); TokenStream result = new StandardFilter(matchVersion, source); result = new ASCIIFoldingFilter(result); result = new EnglishPossessiveFilter(matchVersion, result); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new LowerCaseFilter(matchVersion, result); result = new StopFilter(matchVersion, result, EnglishAnalyzer.getDefaultStopSet()); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); }
@Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new ASCIIFoldingFilter(result); result = new EnglishPossessiveFilter(result); result = new WordDelimiterFilter(result,WordDelimiterFilter.ALPHA,null); result = new LowerCaseFilter(result); result = new StopFilter(result, EnglishAnalyzer.getDefaultStopSet()); result = new PorterStemFilter(result); return new TokenStreamComponents(source, result); }
protected TokenStreamComponents createComponents(String fieldName) { StandardTokenizer source = new StandardTokenizer(); StandardFilter result = new StandardFilter(source); EnglishPossessiveFilter result2 = new EnglishPossessiveFilter(result); LowerCaseFilter result3 = new LowerCaseFilter(result2); Object result4 = new StopFilter(result3, this.stopwords); result4 = new ASCIIFoldingFilter((TokenStream) result4); if(!this.stemExclusionSet.isEmpty()) { result4 = new SetKeywordMarkerFilter((TokenStream)result4, this.stemExclusionSet); } PorterStemFilter result1 = new PorterStemFilter((TokenStream)result4); return new TokenStreamComponents(source, result1); }