public CommonGramsFilter create(TokenStream input) { CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords, ignoreCase); return commonGrams; } }
/** * Construct a token stream filtering the given input using an Array of common * words to create bigrams and is case-sensitive if ignoreCase is false. * * @param input Tokenstream in filter chain * @param commonWords words to be used in constructing bigrams * @param ignoreCase -Ignore case when constructing bigrams for common words. */ public CommonGramsFilter(TokenStream input, String[] commonWords, boolean ignoreCase) { super(input); this.commonWords = (CharArraySet) makeCommonSet(commonWords, ignoreCase); init(); }
/** * Build a CharArraySet from an array of common words, appropriate for passing * into the CommonGramsFilter constructor. This permits this commonWords * construction to be cached once when an Analyzer is constructed. * * @see #makeCommonSet(java.lang.String[], boolean) passing false to * ignoreCase */ public static final CharArraySet makeCommonSet(String[] commonWords) { return makeCommonSet(commonWords, false); }
/** * Construct a token stream filtering the given input using an Array of common * words to create bigrams. * * @param input Tokenstream in filter chain * @param commonWords words to be used in constructing bigrams */ public CommonGramsFilter(TokenStream input, String[] commonWords) { this(input, commonWords, false); init(); }
public void inform(ResourceLoader loader) { String commonWordFiles = args.get("words"); ignoreCase = getBoolean("ignoreCase", false); if (commonWordFiles != null) { try { List<String> files = StrUtils.splitFileNames(commonWordFiles); if (commonWords == null && files.size() > 0){ //default stopwords list has 35 or so words, but maybe don't make it that big to start commonWords = new CharArraySet(files.size() * 10, ignoreCase); } for (String file : files) { List<String> wlist = loader.getLines(file.trim()); //TODO: once StopFilter.makeStopSet(List) method is available, switch to using that so we can avoid a toArray() call commonWords.addAll(CommonGramsFilter.makeCommonSet((String[])wlist.toArray(new String[0]), ignoreCase)); } } catch (IOException e) { throw new RuntimeException(e); } } else { commonWords = (CharArraySet) CommonGramsFilter.makeCommonSet(StopAnalyzer.ENGLISH_STOP_WORDS, ignoreCase); } }
/** * Construct a token stream filtering the given input using a Set of common * words to create bigrams, case-sensitive if ignoreCase is false (unless Set * is CharArraySet). If <code>commonWords</code> is an instance of * {@link CharArraySet} (true if <code>makeCommonSet()</code> was used to * construct the set) it will be directly used and <code>ignoreCase</code> * will be ignored since <code>CharArraySet</code> directly controls case * sensitivity. * <p/> * If <code>commonWords</code> is not an instance of {@link CharArraySet}, a * new CharArraySet will be constructed and <code>ignoreCase</code> will be * used to specify the case sensitivity of that set. * * @param input TokenStream input in filter chain. * @param commonWords The set of common words. * @param ignoreCase -Ignore case when constructing bigrams for common words. */ public CommonGramsFilter(TokenStream input, Set commonWords, boolean ignoreCase) { super(input); if (commonWords instanceof CharArraySet) { this.commonWords = (CharArraySet) commonWords; } else { this.commonWords = new CharArraySet(commonWords.size(), ignoreCase); this.commonWords.addAll(commonWords); } init(); }
public void inform(ResourceLoader loader) { String commonWordFiles = args.get("words"); ignoreCase = getBoolean("ignoreCase", false); if (commonWordFiles != null) { try { List<String> files = StrUtils.splitFileNames(commonWordFiles); if (commonWords == null && files.size() > 0) { // default stopwords list has 35 or so words, but maybe don't make it // that big to start commonWords = new CharArraySet(files.size() * 10, ignoreCase); } for (String file : files) { List<String> wlist = loader.getLines(file.trim()); // TODO: once StopFilter.makeStopSet(List) method is available, switch // to using that so we can avoid a toArray() call commonWords.addAll(CommonGramsFilter.makeCommonSet((String[]) wlist .toArray(new String[0]), ignoreCase)); } } catch (IOException e) { throw new RuntimeException(e); } } else { commonWords = (CharArraySet) CommonGramsFilter.makeCommonSet( StopAnalyzer.ENGLISH_STOP_WORDS, ignoreCase); } }
/** * Create a CommonGramsFilter and wrap it with a CommonGramsQueryFilter */ public CommonGramsQueryFilter create(TokenStream input) { CommonGramsFilter commonGrams = new CommonGramsFilter(input, commonWords, ignoreCase); CommonGramsQueryFilter commonGramsQuery = new CommonGramsQueryFilter( commonGrams); return commonGramsQuery; } }