@Override public TokenFilter create(TokenStream input) { return new NGramTokenFilter(input, minGramSize, maxGramSize, preserveOriginal); } }
clearAttributes(); termAtt.copyBuffer(curTermBuffer, curPos, curGramSize); if (hasIllegalOffsets) {
return false; state = captureState(); restoreState(state); final int start = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos); final int end = Character.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize); restoreState(state); posIncrAtt.setPositionIncrement(0); termAtt.copyBuffer(curTermBuffer, 0, curTermLength);
/** * Creates NGramTokenFilter with given min and max n-grams. * @param input {@link TokenStream} holding the input to be tokenized * @param minGram the smallest n-gram to generate * @param maxGram the largest n-gram to generate */ public NGramTokenFilter(TokenStream input, int minGram, int maxGram) { super(new CodepointCountFilter(input, minGram, Integer.MAX_VALUE)); this.charUtils = CharacterUtils.getInstance(); if (minGram < 1) { throw new IllegalArgumentException("minGram must be greater than zero"); } if (minGram > maxGram) { throw new IllegalArgumentException("minGram must not be greater than maxGram"); } this.minGram = minGram; this.maxGram = maxGram; posIncAtt = addAttribute(PositionIncrementAttribute.class); posLenAtt = addAttribute(PositionLengthAttribute.class); }
clearAttributes(); final int start = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, 0, curPos); final int end = charUtils.offsetByCodePoints(curTermBuffer, 0, curTermLength, start, curGramSize);
@Override protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { return new TokenStreamComponents(components.getTokenizer(), new NGramTokenFilter(components.getTokenStream(), this.min, this.max)); } }
@Override public TokenStream create(TokenStream tokenStream) { return new NGramTokenFilter(tokenStream, minGram, maxGram); } }
@Override public TokenStream create(TokenStream tokenStream, Version version) { return new NGramTokenFilter(tokenStream); } },
public NGramTokenFilter create(TokenStream input) { return new NGramTokenFilter(input, minGramSize, maxGramSize); } }
@Override public TokenStream create(TokenStream tokenStream) { return new NGramTokenFilter(tokenStream, minGram, maxGram); }
@Override public TokenFilter create(TokenStream input) { if (luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) { return new NGramTokenFilter(input, minGramSize, maxGramSize); } return new Lucene43NGramTokenFilter(input, minGramSize, maxGramSize); } }
+ "Please change the filter name to [ngram] instead."); return new NGramTokenFilter(reader); })); filters.add(PreConfiguredTokenFilter.singleton("persian_normalization", true, PersianNormalizationFilter::new));