/** * Construct a token stream filtering the given input using an Array of common * words to create bigrams. * * @param input Tokenstream in filter chain * @param commonWords words to be used in constructing bigrams */ public CommonGramsFilter(TokenStream input, String[] commonWords) { this(input, commonWords, false); init(); }
/** * Construct a token stream filtering the given input using an Array of common * words to create bigrams and is case-sensitive if ignoreCase is false. * * @param input Tokenstream in filter chain * @param commonWords words to be used in constructing bigrams * @param ignoreCase -Ignore case when constructing bigrams for common words. */ public CommonGramsFilter(TokenStream input, String[] commonWords, boolean ignoreCase) { super(input); this.commonWords = (CharArraySet) makeCommonSet(commonWords, ignoreCase); init(); }
/** * Construct a token stream filtering the given input using a Set of common * words to create bigrams, case-sensitive if ignoreCase is false (unless Set * is CharArraySet). If <code>commonWords</code> is an instance of * {@link CharArraySet} (true if <code>makeCommonSet()</code> was used to * construct the set) it will be directly used and <code>ignoreCase</code> * will be ignored since <code>CharArraySet</code> directly controls case * sensitivity. * <p/> * If <code>commonWords</code> is not an instance of {@link CharArraySet}, a * new CharArraySet will be constructed and <code>ignoreCase</code> will be * used to specify the case sensitivity of that set. * * @param input TokenStream input in filter chain. * @param commonWords The set of common words. * @param ignoreCase -Ignore case when constructing bigrams for common words. */ public CommonGramsFilter(TokenStream input, Set commonWords, boolean ignoreCase) { super(input); if (commonWords instanceof CharArraySet) { this.commonWords = (CharArraySet) commonWords; } else { this.commonWords = new CharArraySet(commonWords.size(), ignoreCase); this.commonWords.addAll(commonWords); } init(); }