/** * Creates NGramTokenFilter with given min and max n-grams. * @param input {@link TokenStream} holding the input to be tokenized * @param minGram the smallest n-gram to generate * @param maxGram the largest n-gram to generate */ public NGramTokenFilter(TokenStream input, int minGram, int maxGram) { super(new CodepointCountFilter(input, minGram, Integer.MAX_VALUE)); this.charUtils = CharacterUtils.getInstance(); if (minGram < 1) { throw new IllegalArgumentException("minGram must be greater than zero"); } if (minGram > maxGram) { throw new IllegalArgumentException("minGram must not be greater than maxGram"); } this.minGram = minGram; this.maxGram = maxGram; posIncAtt = addAttribute(PositionIncrementAttribute.class); posLenAtt = addAttribute(PositionLengthAttribute.class); }