tokReader = new StringReader(field.stringValue()); tokens = analyzer.reusableTokenStream(field.name(), tokReader);
@Override public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { return super.reusableTokenStream(fieldName, reader); }
@Override public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { return super.reusableTokenStream(fieldName, reader); } }
@Override public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { return super.reusableTokenStream(fieldName, reader); }
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { Analyzer analyzer = (Analyzer) analyzerMap.get(fieldName); if (analyzer == null) analyzer = defaultAnalyzer; return analyzer.reusableTokenStream(fieldName, reader); }
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { Analyzer analyzer = (Analyzer) analyzerMap.get(fieldName); if (analyzer == null) analyzer = defaultAnalyzer; return analyzer.reusableTokenStream(fieldName, reader); }
@Override public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { if (indexingConfig != null) { Analyzer propertyAnalyzer = indexingConfig.getPropertyAnalyzer(fieldName); if (propertyAnalyzer != null) { return propertyAnalyzer.reusableTokenStream(fieldName, reader); } } return defaultAnalyzer.reusableTokenStream(fieldName, reader); } }
@Override public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { if (indexingConfig != null) { Analyzer propertyAnalyzer = indexingConfig.getPropertyAnalyzer(fieldName); if (propertyAnalyzer != null) { return propertyAnalyzer.reusableTokenStream(fieldName, reader); } } return defaultAnalyzer.reusableTokenStream(fieldName, reader); } }
@Override public final TokenStream reusableTokenStream(final String fieldName, final Reader reader) throws IOException { if (isPhraseQuerySupportField(fieldName)) { return PHRASE_QUERY_SUPPORT_TEXT_FIELD_ANALYZER.reusableTokenStream(fieldName, reader); } else { return TEXT_FIELD_INDEXING_ANALYZER.reusableTokenStream(fieldName, reader); } } }
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { TokenStream retVal = analyzer.reusableTokenStream(fieldName, reader); return wrapTokenStreamIfNeeded(fieldName, retVal); }
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { return getAnalyzer(fieldName).reusableTokenStream(fieldName,reader); }
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException { Collection<Token> result = new ArrayList<Token>(); Token token = null; TokenStream ts = analyzer.reusableTokenStream("", new StringReader(q)); ts.reset(); while ((token = ts.next()) != null){ result.add(token); } return result; }
@Override public TokenStream reusableTokenStream( String field, Reader reader ) throws IOException { if ( !isTextField( field ) ) { return new CharTokenizer( reader ) { @Override protected boolean isTokenChar( char c ) { return Character.isLetterOrDigit( c ); } @Override protected char normalize( char c ) { return Character.toLowerCase( c ); } }; } else { return DEFAULT_ANALYZER.reusableTokenStream( field, reader ); } }
@Override public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream result; try { result = delegate.reusableTokenStream(fieldName, reader); } catch (IOException e) { result = delegate.tokenStream(fieldName, reader); } Set<String> stopWords = stopWordsPerField.get(fieldName); if (stopWords != null) { result = new StopFilter(matchVersion, result, stopWords); } return result; }
/** * Converts the original query string to a collection of Lucene Tokens. * @param original the original query string * @return a Collection of Lucene Tokens */ public Collection<Token> convert(String original) { if (original == null) { // this can happen with q.alt = and no query return Collections.emptyList(); } Collection<Token> result = new ArrayList<Token>(); //TODO: Extract the words using a simple regex, but not query stuff, and then analyze them to produce the token stream Matcher matcher = QUERY_REGEX.matcher(original); TokenStream stream; while (matcher.find()) { String word = matcher.group(0); if (word.equals("AND") == false && word.equals("OR") == false) { try { stream = analyzer.reusableTokenStream("", new StringReader(word)); Token token; while ((token = stream.next()) != null) { token.setStartOffset(matcher.start()); token.setEndOffset(matcher.end()); result.add(token); } } catch (IOException e) { } } } return result; }
private TokenStream createAnalyzerTStream(IndexSchema schema, String fieldName, String docText) throws IOException { TokenStream tstream; TokenStream ts = schema.getAnalyzer().reusableTokenStream(fieldName, new StringReader(docText)); ts.reset(); tstream = new TokenOrderingFilter(ts, 10); return tstream; } }
String getAnalyzedQuery( String query ) throws IOException { if( analyzer == null ) { return query; } StringBuilder norm = new StringBuilder(); TokenStream tokens = analyzer.reusableTokenStream( "", new StringReader( query ) ); tokens.reset(); Token token = tokens.next(); while( token != null ) { norm.append( new String(token.termBuffer(), 0, token.termLength()) ); token = tokens.next(); } return norm.toString(); }
TokenStream ts = analyzer.reusableTokenStream("", new StringReader(text)); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class);
@Override public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); streams.wrapped = defaultAnalyzer.reusableTokenStream(fieldName, reader); streams.shingle = new ShingleFilter(streams.wrapped); setPreviousTokenStream(streams); } else { TokenStream result = defaultAnalyzer.reusableTokenStream(fieldName, reader); if (result != streams.wrapped) { /* the wrapped analyzer did not, create a new shingle around the new one */ streams.wrapped = result; streams.shingle = new ShingleFilter(streams.wrapped); } } streams.shingle.setMaxShingleSize(maxShingleSize); streams.shingle.setMinShingleSize(minShingleSize); streams.shingle.setTokenSeparator(tokenSeparator); streams.shingle.setOutputUnigrams(outputUnigrams); streams.shingle.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles); return streams.shingle; } }
@Override public TokenStream tokenStream(String fieldName, Reader reader) { TokenStream wrapped; try { wrapped = defaultAnalyzer.reusableTokenStream(fieldName, reader); } catch (IOException e) { wrapped = defaultAnalyzer.tokenStream(fieldName, reader); } ShingleFilter filter = new ShingleFilter(wrapped, minShingleSize, maxShingleSize); filter.setMinShingleSize(minShingleSize); filter.setMaxShingleSize(maxShingleSize); filter.setTokenSeparator(tokenSeparator); filter.setOutputUnigrams(outputUnigrams); filter.setOutputUnigramsIfNoShingles(outputUnigramsIfNoShingles); return filter; }