@Override protected TokenStreamComponents createComponents( String fieldName ) { KeywordTokenizer source = new KeywordTokenizer(); return new TokenStreamComponents( source, new LowerCaseFilter( source ) ); }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { TokenStream stream = new StandardFilter(matchVersion, tokenizer); if (caseInsensitive) stream = new LowerCaseFilter(matchVersion, stream); if (useStopWords) stream = new StopFilter(matchVersion, stream, IndonesianAnalyzer.getDefaultStopSet()); if (useStem && !stemExclusionSet.isEmpty()) { stream = new SetKeywordMarkerFilter(stream, stemExclusionSet); } return stream; } }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final StandardTokenizer src = new StandardTokenizer(matchVersion, reader); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new StandardFilter(matchVersion, src); tok = new LowerCaseFilter(matchVersion, tok); tok = new StopFilter(matchVersion, tok, stopwords); return new TokenStreamComponents(src, tok) { @Override protected void setReader(final Reader reader) throws IOException { src.setMaxTokenLength(ReaderStandardAnalyzer.this.maxTokenLength); super.setReader(reader); } }; }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { TokenStream stream = new StandardFilter(matchVersion, tokenizer); if (caseInsensitive) stream = new LowerCaseFilter(matchVersion, stream); return stream; } }
/** * Creates a the TokenStreamComponents used to analyze the stream. * * @param fieldName the field that this lucene analyzer will process * @return the token stream filter chain */ @Override protected TokenStreamComponents createComponents(String fieldName) { //final Tokenizer source = new AlphaNumericTokenizer(); final Tokenizer source = new WhitespaceTokenizer(); TokenStream stream = source; stream = new UrlTokenizingFilter(stream); stream = new AlphaNumericFilter(stream); stream = new WordDelimiterGraphFilter(stream, WordDelimiterGraphFilter.GENERATE_WORD_PARTS | WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS | WordDelimiterGraphFilter.PRESERVE_ORIGINAL | WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterGraphFilter.SPLIT_ON_NUMERICS | WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE, null); stream = new LowerCaseFilter(stream); stream = new StopFilter(stream, stopWords); concatenatingFilter = new TokenPairConcatenatingFilter(stream); return new TokenStreamComponents(source, concatenatingFilter); }
@Override protected TokenStreamComponents createComponents(final String field) { final Tokenizer source = new NGramTokenizer(minNgram(), maxNgram()); final TokenStream result = new StopFilter( new LowerCaseFilter(new StandardFilter(source)), new CharArraySet(asList(stopWords()), true)); return new TokenStreamComponents(source, result); }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { TokenStream stream = new StandardFilter(matchVersion, tokenizer); if (caseInsensitive) stream = new LowerCaseFilter(matchVersion, stream); if (useStopWords) stream = new StopFilter(matchVersion, stream, ThaiAnalyzer.getDefaultStopSet()); if (useStem) { if (!stemExclusionSet.isEmpty()) stream = new SetKeywordMarkerFilter(stream, stemExclusionSet); } return stream; } }
/** {@inheritDoc} */ @Override protected Analyzer.TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new LowerCaseFilter(result); result = new StopFilter(result, stopwords); result = new SnowballFilter(result, language); return new TokenStreamComponents(source, result); } }
@Override protected TokenStreamComponents createComponents(final String fieldName) { final Tokenizer source = new KeywordTokenizer(); TokenStream result = new StandardFilter(source); result = new CharacterFilter(result); result = new ASCIIFoldingFilter(result); result = new LowerCaseFilter(result); // result = new WordDelimiterFilter(result, WordDelimiterFilter.DIGIT, null); return new TokenStreamComponents(source, result); }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { LetterTokenizer tokenizer = new LetterTokenizer(BonnieConstants.LUCENE_VERSION, reader); TokenStream result = new LowerCaseFilter(BonnieConstants.LUCENE_VERSION, tokenizer); result = new StopFilter(BonnieConstants.LUCENE_VERSION, result, StopAnalyzer.ENGLISH_STOP_WORDS_SET); result = new PorterStemFilter(result); return new TokenStreamComponents(tokenizer, result); }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { TokenStream stream = new StandardFilter(matchVersion, tokenizer); if (caseInsensitive) stream = new LowerCaseFilter(matchVersion, stream); if (useStopWords) stream = new StopFilter(matchVersion, stream, PersianAnalyzer.getDefaultStopSet()); if (useStem) { if (!stemExclusionSet.isEmpty()) stream = new SetKeywordMarkerFilter(stream, stemExclusionSet); } return stream; } }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final StandardTokenizer src = new StandardTokenizer(reader); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new StandardFilter(src); tok = new LowerCaseFilter(tok); tok = new StopFilter(tok, stopwords); return new TokenStreamComponents(src, tok) { @Override protected void setReader(final Reader reader) throws IOException { src.setMaxTokenLength(StandardAnalyzer.this.maxTokenLength); super.setReader(reader); } }; } }
@Override protected TokenStreamComponents createComponents( String fieldName ) { Tokenizer source = new WhitespaceTokenizer(); TokenStream filter = new LowerCaseFilter( source ); return new TokenStreamComponents( source, filter ); }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final Tokenizer source = new CharacterShingleTokenizer(reader); TokenStream result = new StandardFilter(matchVersion, source); result = new ASCIIFoldingFilter(result); result = new LowerCaseFilter(matchVersion, result); result = new ShingleFilter(result, 3); // result = new WordDelimiterFilter(result, WordDelimiterFilter.DIGIT, null); return new TokenStreamComponents(source, result); }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { TokenStream stream = new CJKWidthFilter(tokenizer); stream = new CJKBigramFilter(stream); if (caseInsensitive) stream = new LowerCaseFilter(matchVersion, stream); if (useStopWords) stream = new StopFilter(matchVersion, stream, CJKAnalyzer.getDefaultStopSet()); return stream; } }
@Override public TokenStream getTokenStream(Tokenizer tokenizer, CharArraySet stemExclusionSet) { TokenStream stream = new StandardFilter(matchVersion, tokenizer); if (caseInsensitive) stream = new LowerCaseFilter(matchVersion, stream); if (useStopWords) stream = new StopFilter(matchVersion, stream, CzechAnalyzer.getDefaultStopSet()); if (useStem) { if (!stemExclusionSet.isEmpty()) stream = new SetKeywordMarkerFilter(stream, stemExclusionSet); stream = new CzechStemFilter(stream); } return stream; } }
@Override protected TokenStreamComponents createComponents(String arg0) { final org.apache.lucene.analysis.Tokenizer source = new StandardTokenizer(); TokenStream result = new StandardFilter(source); result = new LowerCaseFilter(result); result = new StopFilter(result, stopWords); result = new SetKeywordMarkerFilter(result, CharArraySet.EMPTY_SET); result = new GermanStemFilter(result); return new TokenStreamComponents(source, result); }
@Override protected TokenStreamComponents createComponents( String fieldName ) { called = true; Tokenizer source = new WhitespaceTokenizer(); return new TokenStreamComponents( source, new LowerCaseFilter( source ) ); } }
@Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new CharacterShingleTokenizer(); TokenStream result = new StandardFilter(source); result = new ASCIIFoldingFilter(result); result = new LowerCaseFilter(result); result = new ShingleFilter(result, 3); // result = new WordDelimiterFilter(result, WordDelimiterFilter.DIGIT, null); return new TokenStreamComponents(source, result); }
@Override protected TokenStreamComponents createComponents(final String fieldName) { final ClassicTokenizer src = new ClassicTokenizer(); src.setMaxTokenLength(maxTokenLength); TokenStream tok = new ClassicFilter(src); tok = new LowerCaseFilter(tok); tok = new StopFilter(tok, stopwords); return new TokenStreamComponents(src, tok) { @Override protected void setReader(final Reader reader) { src.setMaxTokenLength(ClassicAnalyzer.this.maxTokenLength); super.setReader(reader); } }; } }