/** * Builds a Set from an array of stop words, * appropriate for passing into the StopFilter constructor. * This permits this stopWords construction to be cached once when * an Analyzer is constructed. * * @param stopWords An array of stopwords * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase */ public static CharArraySet makeStopSet(String... stopWords) { return makeStopSet(stopWords, false); }
/** * Builds an analyzer which removes words in ENGLISH_STOP_WORDS. */ public HistoryAnalyzer() { super(Analyzer.PER_FIELD_REUSE_STRATEGY); stopWords = StopFilter.makeStopSet(ENGLISH_STOP_WORDS); }
/** * Builds a Set from an array of stop words, * appropriate for passing into the StopFilter constructor. * This permits this stopWords construction to be cached once when * an Analyzer is constructed. * * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords * @return A Set ({@link CharArraySet}) containing the words * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase */ public static CharArraySet makeStopSet(List<?> stopWords) { return makeStopSet(stopWords, false); }
/** * Returns as {@link CharArraySet} from wordFiles, which * can be a comma-separated list of filenames */ protected final CharArraySet getWordSet(ResourceLoader loader, String wordFiles, boolean ignoreCase) throws IOException { List<String> files = splitFileNames(wordFiles); CharArraySet words = null; if (files.size() > 0) { // default stopwords list has 35 or so words, but maybe don't make it that // big to start words = new CharArraySet(files.size() * 10, ignoreCase); for (String file : files) { List<String> wlist = getLines(loader, file.trim()); words.addAll(StopFilter.makeStopSet(wlist, ignoreCase)); } } return words; }
/** * Builds an analyzer with the given stop words. */ public RussianAnalyzer(char[] charset, String[] stopwords) { this.charset = charset; stopSet = StopFilter.makeStopSet(stopwords); }
/** * Builds a Set from an array of stop words, * appropriate for passing into the StopFilter constructor. * This permits this stopWords construction to be cached once when * an Analyzer is constructed. * * @param stopWords An array of stopwords * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase */ public static CharArraySet makeStopSet(String... stopWords) { return makeStopSet(stopWords, false); }
/** * Builds an exclusionlist from an array of Strings. */ public void setStemExclusionTable(String[] exclusionlist) { exclusionSet = StopFilter.makeStopSet(exclusionlist); }
/** * Constructs a filter which removes words from the input * TokenStream that are named in the array of words. */ public StopFilter(TokenStream in, String[] stopWords, boolean ignoreCase) { super(in); this.stopWords = (CharArraySet)makeStopSet(stopWords, ignoreCase); }
/** * Builds a Set from an array of stop words, * appropriate for passing into the StopFilter constructor. * This permits this stopWords construction to be cached once when * an Analyzer is constructed. * * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase */ public static final Set makeStopSet(String[] stopWords) { return makeStopSet(stopWords, false); }
/** * Builds an analyzer with the given stop words. * @deprecated use {@link #RussianAnalyzer(Version, Set)} instead */ @Deprecated public RussianAnalyzer(Version matchVersion, String... stopwords) { this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords)); }
/** * Builds an analyzer with the given stop words. * @deprecated use {@link #GermanAnalyzer(Version, Set)} */ @Deprecated public GermanAnalyzer(Version matchVersion, String... stopwords) { this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords)); }
/** * Builds an analyzer with the given stop words. * @param stopwords Array of stopwords to use. * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead */ @Deprecated public GreekAnalyzer(Version matchVersion, String... stopwords) { this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords)); }
/** * Builds the named analyzer with the given stop words. * @deprecated Use {@link #SnowballAnalyzer(Version, String, Set)} instead. */ @Deprecated public SnowballAnalyzer(Version matchVersion, String name, String[] stopWords) { this(matchVersion, name); stopSet = StopFilter.makeStopSet(matchVersion, stopWords); }
/** * Builds an analyzer with the given stop words. * @deprecated use {@link #FrenchAnalyzer(Version, Set)} instead */ @Deprecated public FrenchAnalyzer(Version matchVersion, String... stopwords) { this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords)); }
/** * Builds an analyzer which removes words in the provided array. * * @param stopWords stop word array * @deprecated use {@link #CJKAnalyzer(Version, Set)} instead */ @Deprecated public CJKAnalyzer(Version matchVersion, String... stopWords) { super(matchVersion, StopFilter.makeStopSet(matchVersion, stopWords)); }
/** * Builds an analyzer with the given stop words. * * @param matchVersion * @param stopwords * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead */ @Deprecated public DutchAnalyzer(Version matchVersion, String... stopwords) { this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords)); }
public NgramAnalyzer(Locale locale, int ngramLength) { if (ngramLength <= 0) { throw new IllegalArgumentException( "'ngramLength' cannot be less than 0"); } this.stopNgrams = StopFilter.makeStopSet(Version.LUCENE_31, ENGLISH_STOP_NGRAMS); this.locale = locale; this.ngramLength = ngramLength; }
public void inform(ResourceLoader loader) { try { List<String> wlist = loader.getLines(dictFile); dictionary = StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), false); } catch (IOException e) { throw new RuntimeException(e); } } public DictionaryCompoundWordTokenFilter create(TokenStream input) {
public RussianAnalyzer() { charset = RussianCharsets.UnicodeRussian; stopSet = StopFilter.makeStopSet( makeStopWords(RussianCharsets.UnicodeRussian)); }
@Override public TokenStream apply(final TokenStream input) { return new StopFilter(LuceneVersion.get(), input, makeStopSet(GermanAnalyzer.GERMAN_STOP_WORDS)); } };