@Override protected TokenStreamComponents createComponents( String fieldName ) { Tokenizer source = new WhitespaceTokenizer(); TokenStream filter = new LowerCaseFilter( source ); return new TokenStreamComponents( source, filter ); }
@Override protected TokenStreamComponents createComponents( String fieldName ) { called = true; Tokenizer source = new WhitespaceTokenizer(); return new TokenStreamComponents( source, new LowerCaseFilter( source ) ); } }
/** * Creates a the TokenStreamComponents used to analyze the stream. * * @param fieldName the field that this lucene analyzer will process * @return the token stream filter chain */ @Override protected TokenStreamComponents createComponents(String fieldName) { //final Tokenizer source = new AlphaNumericTokenizer(); final Tokenizer source = new WhitespaceTokenizer(); TokenStream stream = source; stream = new UrlTokenizingFilter(stream); stream = new AlphaNumericFilter(stream); stream = new WordDelimiterGraphFilter(stream, WordDelimiterGraphFilter.GENERATE_WORD_PARTS | WordDelimiterGraphFilter.GENERATE_NUMBER_PARTS | WordDelimiterGraphFilter.PRESERVE_ORIGINAL | WordDelimiterGraphFilter.SPLIT_ON_CASE_CHANGE | WordDelimiterGraphFilter.SPLIT_ON_NUMERICS | WordDelimiterGraphFilter.STEM_ENGLISH_POSSESSIVE, null); stream = new LowerCaseFilter(stream); stream = new StopFilter(stream, stopWords); concatenatingFilter = new TokenPairConcatenatingFilter(stream); return new TokenStreamComponents(source, concatenatingFilter); }
@Override protected TokenStreamComponents createComponents(final String fieldName) { return new TokenStreamComponents(new WhitespaceTokenizer()); } }
@Override public Tokenizer create(AttributeFactory factory) { switch (rule) { case RULE_JAVA: return new WhitespaceTokenizer(factory, maxTokenLen); case RULE_UNICODE: return new UnicodeWhitespaceTokenizer(factory, maxTokenLen); default: throw new AssertionError(); } } }
@Override protected Analyzer.TokenStreamComponents createComponents(final String fieldName) { Tokenizer source = new WhitespaceTokenizer(); TokenStream filter = new LowerCaseFilter( source ); return new TokenStreamComponents( source, filter ); }
@Override protected TokenStreamComponents createComponents( String fieldName ) { Tokenizer source = new WhitespaceTokenizer(); TokenStream filter = new LowerCaseFilter( source ); return new TokenStreamComponents( source, filter ); }
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer() : factory.create(); TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } };
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer() : factory.create(); TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } };
tokenizerFactoryFactories.get("keyword"), tokenFilterFactoryFactories, charFilterFactoryFactories); processNormalizerFactory(entry.getKey(), entry.getValue(), whitespaceNormalizers, "whitespace", () -> new WhitespaceTokenizer(), tokenFilterFactoryFactories, charFilterFactoryFactories);
@Override public Tokenizer create() { return new WhitespaceTokenizer(TokenStream.DEFAULT_TOKEN_ATTRIBUTE_FACTORY, maxTokenLength); } }
@Override protected Tokenizer create(Version version) { return new WhitespaceTokenizer(); } },
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new WhitespaceTokenizer(); TokenStream result = new PatternReplaceFilter(tokenizer, Pattern.compile("^([\\.!\\?,:;\"'\\(\\)]*)(.*?)([\\.!\\?,:;\"'\\(\\)]*)$"), "$2", true); result = new PatternReplaceFilter(result, Pattern.compile("'s"), "s", true); return new TokenStreamComponents(tokenizer, result); }
@Override protected TokenStreamComponents createComponents(String fieldName) { WhitespaceTokenizer src = new WhitespaceTokenizer(); TokenStream tok = new LowerCaseFilter(src); return new TokenStreamComponents(src, tok); } }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { Tokenizer source = new WhitespaceTokenizer(matchVersion, reader); TokenStream filter = new LowerCaseEntityPreservingFilter(source); if (stemming) { // Porter stemmer ignores words which are marked as keywords filter = new PorterStemFilter(filter); } return new TokenStreamComponents(source, filter); }
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer source = new WhitespaceTokenizer(); TokenStream filter = new TweetLowerCaseEntityPreservingFilter(source); if (stemming) { // Porter stemmer ignores words which are marked as keywords filter = new PorterStemFilter(filter); } return new TokenStreamComponents(source, filter); }
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer() : factory.create(); TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } };
@Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = tokenizerFactory == null ? new WhitespaceTokenizer() : tokenizerFactory.create(); TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } };
@Override protected TokenStreamComponents createComponents(String fieldName) { final Tokenizer source = new WhitespaceTokenizer(); TokenStream result = new InchFilter(source); result = new PatternReplaceFilter(result, Pattern.compile("^\\p{Punct}*(.*?)\\p{Punct}*$"), "$1", true); result = new WordDelimiterGraphFilter(result, GermanSearchAnalyzer.DELIMITER_CONFIG_FLAGS, null); result = new LowerCaseFilter(result); return new TokenStreamComponents(source, result); } };
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final WhitespaceTokenizer t = new WhitespaceTokenizer(LuceneTestCase.TEST_VERSION_CURRENT, reader); final TokenStream ts = new ASCIIFoldingExpansionFilter(t); return new TokenStreamComponents(t, ts); } };