public void reset(Reader reader) throws IOException { input = reader; reset(); }
public void reset(Reader reader) throws IOException { input = reader; reset(); }
@Override public void reset(Reader reader) throws IOException { super.reset(new HTMLStripReader(reader)); } };
/** * * @param input * @return a list of lower-case tokens which strips accents & punctuation * @throws IOException */ public static List<String> getTokensFromAnalyzer(String input) { StandardTokenizer tokenStream = new StandardTokenizer(new StringReader(input)); TokenStream result = new StandardFilter(tokenStream); result = new LowerCaseFilter(result); result = new ASCIIFoldingFilter(result); CharTermAttribute charTermAttribute = result.addAttribute(CharTermAttribute.class); List<String> termList = new ArrayList<String>(); try { tokenStream.reset(); while (result.incrementToken()) { String term = charTermAttribute.toString(); termList.add(term); } result.close(); } catch (IOException e) { LOGGER.debug(e.getMessage(), e); } return termList; } }
@Override public final TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); setPreviousTokenStream(streams); streams.tokenStream = tokenize(reader); streams.filteredTokenStream = tokenFiltersChain(streams.tokenStream); } else { streams.tokenStream.reset(reader); } return streams.filteredTokenStream; }
tokenizer.close(); tokenizer.setReader(stringReader); tokenizer.reset(); while ( tokenizer.incrementToken() ) { final CharTermAttribute charTermAttribute
try { tokenizer.setReader(stringReader); tokenizer.reset(); while ( tokenizer.incrementToken() ) { final CharTermAttribute charTermAttribute
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); setPreviousTokenStream(streams); streams.tokenStream = new StandardTokenizer(reader); streams.filteredTokenStream = new StandardFilter(streams.tokenStream); streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream); streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet); } else { streams.tokenStream.reset(reader); } streams.tokenStream.setMaxTokenLength(maxTokenLength); streams.tokenStream.setReplaceInvalidAcronym(replaceInvalidAcronym); return streams.filteredTokenStream; }
public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { SavedStreams streams = (SavedStreams) getPreviousTokenStream(); if (streams == null) { streams = new SavedStreams(); setPreviousTokenStream(streams); streams.tokenStream = new StandardTokenizer(reader); streams.filteredTokenStream = new StandardFilter(streams.tokenStream); streams.filteredTokenStream = new LowerCaseFilter(streams.filteredTokenStream); streams.filteredTokenStream = new StopFilter(streams.filteredTokenStream, stopSet); } else { streams.tokenStream.reset(reader); } streams.tokenStream.setMaxTokenLength(maxTokenLength); streams.tokenStream.setReplaceInvalidAcronym(replaceInvalidAcronym); return streams.filteredTokenStream; }