/** * Returns the set of stop words being used. * * @return the set of stop words being used */ public static CharArraySet getStopWords() { final CharArraySet words = StopFilter.makeStopSet(ADDITIONAL_STOP_WORDS, true); words.addAll(StopAnalyzer.ENGLISH_STOP_WORDS_SET); return words; }
/** * Builds a Set from an array of stop words, * appropriate for passing into the StopFilter constructor. * This permits this stopWords construction to be cached once when * an Analyzer is constructed. * * @param stopWords An array of stopwords * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase */ public static CharArraySet makeStopSet(String... stopWords) { return makeStopSet(stopWords, false); }
public AnyURIAnalyzer(final Version version, final String[] stopWords) { matchVersion = version; stopSet = StopFilter.makeStopSet(matchVersion, stopWords); }
/** * Builds a Set from an array of stop words, * appropriate for passing into the StopFilter constructor. * This permits this stopWords construction to be cached once when * an Analyzer is constructed. * * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords * @return A Set ({@link CharArraySet}) containing the words * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase */ public static CharArraySet makeStopSet(List<?> stopWords) { return makeStopSet(stopWords, false); }
/** * Builds a Set from an array of stop words, * appropriate for passing into the StopFilter constructor. * This permits this stopWords construction to be cached once when * an Analyzer is constructed. * * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 * @param stopWords An array of stopwords * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase */ public static CharArraySet makeStopSet(String... stopWords) { return makeStopSet(stopWords, false); }
public AnyURIAnalyzer(final Version version, final String[] stopWords) { matchVersion = version; stopSet = StopFilter.makeStopSet(matchVersion, stopWords); }
/** * Builds a Set from an array of stop words, * appropriate for passing into the StopFilter constructor. * This permits this stopWords construction to be cached once when * an Analyzer is constructed. * * @param matchVersion Lucene version to enable correct Unicode 4.0 behavior in the returned set if Version > 3.0 * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords * @return A Set ({@link CharArraySet}) containing the words * @see #makeStopSet(Version, java.lang.String[], boolean) passing false to ignoreCase */ public static CharArraySet makeStopSet(List<?> stopWords) { return makeStopSet(stopWords, false); }
/** Builds an analyzer with the given stop words. */ public CASAnalyzer(CharArraySet stopWords) { Iterator iter = stopWords.iterator(); List<String> sw = new ArrayList<>(); while(iter.hasNext()) { char[] stopWord = (char[]) iter.next(); sw.add(new String(stopWord)); } stopSet = StopFilter.makeStopSet(sw); }
/** * Returns the set of stop words being used. * * @return the set of stop words being used */ public static CharArraySet getStopWords() { final CharArraySet words = StopFilter.makeStopSet(ADDITIONAL_STOP_WORDS, true); words.addAll(StopAnalyzer.ENGLISH_STOP_WORDS_SET); return words; }
/** Builds an analyzer with the given stop words. */ public CASAnalyzer(CharArraySet stopWords) { Iterator iter = stopWords.iterator(); List<String> sw = new ArrayList<>(); while(iter.hasNext()) { char[] stopWord = (char[]) iter.next(); sw.add(new String(stopWord)); } stopSet = StopFilter.makeStopSet(sw); }
/** * Returns as {@link CharArraySet} from wordFiles, which * can be a comma-separated list of filenames */ protected final CharArraySet getWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase) throws IOException { List<String> files = splitFileNames(wordFiles); CharArraySet words = null; if (files.size() > 0) { // default stopwords list has 35 or so words, but maybe don't make it that // big to start words = new CharArraySet(files.size() * 10, ignoreCase); for (String file : files) { List<String> wlist = getLines(loader, file.trim()); words.addAll(StopFilter.makeStopSet(wlist, ignoreCase)); } } return words; }