@Override protected TokenStreamComponents createComponents( String fieldName ) { KeywordTokenizer source = new KeywordTokenizer(); return new TokenStreamComponents( source, new LowerCaseFilter( source ) ); }
@Override public final boolean incrementToken() throws IOException { if (!done) { clearAttributes(); done = true; int upto = 0; char[] buffer = termAtt.buffer(); while (true) { final int length = input.read(buffer, upto, buffer.length-upto); if (length == -1) break; upto += length; if (upto == buffer.length) buffer = termAtt.resizeBuffer(1+buffer.length); } termAtt.setLength(upto); finalOffset = correctOffset(upto); offsetAtt.setOffset(correctOffset(0), finalOffset); return true; } return false; }
private final KeywordTokenizer keywordTokenizer = new KeywordTokenizer(new StringReader("")); private final ASCIIFoldingFilter asciiFoldingFilter = new ASCIIFoldingFilter(keywordTokenizer); private final TermAttribute termAttribute = (TermAttribute) asciiFoldingFilter.getAttribute(TermAttribute.class); public String process(String line) { if (line != null) { try { keywordTokenizer.reset(new StringReader(line)); if (asciiFoldingFilter.incrementToken()) { return termAttribute.term(); } } catch (IOException e) { logger.warn("Failed to parse: " + line, e); } } return null; }
@Override public KeywordTokenizer create(AttributeFactory factory) { return new KeywordTokenizer(factory, maxTokenLen); } }
@Override public final boolean incrementToken() throws IOException { if (!done) { clearAttributes(); done = true; int upto = 0; char[] buffer = termAtt.buffer(); while (true) { final int length = input.read(buffer, upto, buffer.length-upto); if (length == -1) break; upto += length; if (upto == buffer.length) buffer = termAtt.resizeBuffer(1+buffer.length); } termAtt.setLength(upto); finalOffset = correctOffset(upto); offsetAtt.setOffset(correctOffset(0), finalOffset); return true; } return false; }
@Override protected TokenStreamComponents createComponents(final String fieldName) { return new TokenStreamComponents(new KeywordTokenizer()); } }
@Override public final boolean incrementToken() throws IOException { if (!done) { clearAttributes(); done = true; int upto = 0; char[] buffer = termAtt.buffer(); while (true) { final int length = input.read(buffer, upto, buffer.length-upto); if (length == -1) break; upto += length; if (upto == buffer.length) buffer = termAtt.resizeBuffer(1+buffer.length); } termAtt.setLength(upto); finalOffset = correctOffset(upto); offsetAtt.setOffset(correctOffset(0), finalOffset); return true; } return false; }
@Override protected TokenStreamComponents createComponents(String fieldName) { KeywordTokenizer tokenizer = new KeywordTokenizer(factory, KeywordTokenizer.DEFAULT_BUFFER_SIZE); return new TokenStreamComponents(tokenizer, tokenizer); } }
@Override public KeywordTokenizer create(AttributeFactory factory) { return new KeywordTokenizer(factory, KeywordTokenizer.DEFAULT_BUFFER_SIZE); } }
@Override public Tokenizer create() { return new KeywordTokenizer(bufferSize); } }
@Override protected Tokenizer create(Version version) { return new KeywordTokenizer(); } },
@Override public Tokenizer create() { return new KeywordTokenizer(bufferSize); } }
@Override public Tokenizer create() { return new KeywordTokenizer(bufferSize); } }
public final class YourAnalyzer extends ReusableAnalyzerBase { @Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final TokenStream source = new KeywordTokenizer(reader); return new TokenStreamComponents(source, new LowercaseFilter(Version.LUCENE_36, source)); } }
@Override protected TokenStreamComponents createComponents(final String fieldName) { return new TokenStreamComponents(new KeywordTokenizer()); } }
@Override protected TokenStreamComponents createComponents(String fieldName) { return new TokenStreamComponents( new KeywordTokenizer() ); }
@Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { return new TokenStreamComponents(new KeywordTokenizer(reader)); } }
@Override protected TokenStreamComponents createComponents(String fieldName) { return new TokenStreamComponents( new KeywordTokenizer() ); }
public final class CustomKeywordAnalyzer extends Analyzer { public CustomKeywordAnalyzer() { } @Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { Tokenizer tokenizer = new KeywordTokenizer(reader) TokenStream filter = new TrimFilter(Version.LUCENE_43, tokenizer); return new TokenStreamComponents(tokenizer, filter); } }
import org.apache.lucene.analysis.KeywordAnalyzer; import org.apache.lucene.analysis.KeywordTokenizer; import org.apache.lucene.analysis.LowerCaseFilter; import org.apache.lucene.analysis.LowerCaseTokenizer; import org.apache.lucene.analysis.ReusableAnalyzerBase; import org.apache.lucene.analysis.Tokenizer; import org.apache.lucene.util.Version; public class YourAnalyzer extends ReusableAnalyzerBase { private final Version version; public YourAnalyzer(final Version version) { super(); this.version = version; } @Override protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) { final Tokenizer source = new KeywordTokenizer(reader); return new TokenStreamComponents(source, new LowerCaseFilter(this.version, source)); } }