org.apache.lucene.analysis.StopFilter.makeStopSet java code examples

/**
 * Builds a Set from an array of stop words,
 * appropriate for passing into the StopFilter constructor.
 * This permits this stopWords construction to be cached once when
 * an Analyzer is constructed.
 * 
 * @param stopWords An array of stopwords
 * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
 */
public static CharArraySet makeStopSet(String... stopWords) {
 return makeStopSet(stopWords, false);
}

/**
 * Builds an analyzer which removes words in ENGLISH_STOP_WORDS.
 */
public HistoryAnalyzer() {
  super(Analyzer.PER_FIELD_REUSE_STRATEGY);
  stopWords = StopFilter.makeStopSet(ENGLISH_STOP_WORDS);
}

/**
 * Builds a Set from an array of stop words,
 * appropriate for passing into the StopFilter constructor.
 * This permits this stopWords construction to be cached once when
 * an Analyzer is constructed.
 * 
 * @param stopWords A List of Strings or char[] or any other toString()-able list representing the stopwords
 * @return A Set ({@link CharArraySet}) containing the words
 * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
 */
public static CharArraySet makeStopSet(List<?> stopWords) {
 return makeStopSet(stopWords, false);
}

/**
 * Returns as {@link CharArraySet} from wordFiles, which
 * can be a comma-separated list of filenames
 */
protected final CharArraySet getWordSet(ResourceLoader loader,
  String wordFiles, boolean ignoreCase) throws IOException {
 List<String> files = splitFileNames(wordFiles);
 CharArraySet words = null;
 if (files.size() > 0) {
  // default stopwords list has 35 or so words, but maybe don't make it that
  // big to start
  words = new CharArraySet(files.size() * 10, ignoreCase);
  for (String file : files) {
   List<String> wlist = getLines(loader, file.trim());
   words.addAll(StopFilter.makeStopSet(wlist, ignoreCase));
  }
 }
 return words;
}

/**
 * Builds an analyzer with the given stop words.
 */
public RussianAnalyzer(char[] charset, String[] stopwords)
{
  this.charset = charset;
  stopSet = StopFilter.makeStopSet(stopwords);
}

/**
 * Builds a Set from an array of stop words,
 * appropriate for passing into the StopFilter constructor.
 * This permits this stopWords construction to be cached once when
 * an Analyzer is constructed.
 * 
 * @param stopWords An array of stopwords
 * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
 */
public static CharArraySet makeStopSet(String... stopWords) {
 return makeStopSet(stopWords, false);
}

/**
 * Builds an exclusionlist from an array of Strings.
 */
public void setStemExclusionTable(String[] exclusionlist) {
 exclusionSet = StopFilter.makeStopSet(exclusionlist);
}

/**
 * Constructs a filter which removes words from the input
 * TokenStream that are named in the array of words.
 */
public StopFilter(TokenStream in, String[] stopWords, boolean ignoreCase) {
 super(in);
 this.stopWords = (CharArraySet)makeStopSet(stopWords, ignoreCase);
}

/**
 * Builds a Set from an array of stop words,
 * appropriate for passing into the StopFilter constructor.
 * This permits this stopWords construction to be cached once when
 * an Analyzer is constructed.
 * 
 * @see #makeStopSet(java.lang.String[], boolean) passing false to ignoreCase
 */
public static final Set makeStopSet(String[] stopWords) {
 return makeStopSet(stopWords, false);
}

/**
 * Builds an analyzer with the given stop words.
 * @deprecated use {@link #RussianAnalyzer(Version, Set)} instead
 */
@Deprecated
public RussianAnalyzer(Version matchVersion, String... stopwords) {
 this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
}

/**
 * Builds an analyzer with the given stop words.
 * @deprecated use {@link #GermanAnalyzer(Version, Set)}
 */
@Deprecated
public GermanAnalyzer(Version matchVersion, String... stopwords) {
 this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
}

/**
 * Builds an analyzer with the given stop words.
 * @param stopwords Array of stopwords to use.
 * @deprecated use {@link #GreekAnalyzer(Version, Set)} instead
 */
@Deprecated
public GreekAnalyzer(Version matchVersion, String... stopwords) {
 this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
}

/** 
 * Builds the named analyzer with the given stop words.
 * @deprecated Use {@link #SnowballAnalyzer(Version, String, Set)} instead.  
 */
@Deprecated
public SnowballAnalyzer(Version matchVersion, String name, String[] stopWords) {
 this(matchVersion, name);
 stopSet = StopFilter.makeStopSet(matchVersion, stopWords);
}

/**
 * Builds an analyzer with the given stop words.
 * @deprecated use {@link #FrenchAnalyzer(Version, Set)} instead
 */
@Deprecated
public FrenchAnalyzer(Version matchVersion, String... stopwords) {
 this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
}

/**
 * Builds an analyzer which removes words in the provided array.
 *
 * @param stopWords stop word array
 * @deprecated use {@link #CJKAnalyzer(Version, Set)} instead
 */
@Deprecated
public CJKAnalyzer(Version matchVersion, String... stopWords) {
 super(matchVersion, StopFilter.makeStopSet(matchVersion, stopWords));
}

/**
 * Builds an analyzer with the given stop words.
 *
 * @param matchVersion
 * @param stopwords
 * @deprecated use {@link #DutchAnalyzer(Version, Set)} instead
 */
@Deprecated
public DutchAnalyzer(Version matchVersion, String... stopwords) {
 this(matchVersion, StopFilter.makeStopSet(matchVersion, stopwords));
}

public NgramAnalyzer(Locale locale, int ngramLength) {
  if (ngramLength <= 0) {
    throw new IllegalArgumentException(
        "'ngramLength' cannot be less than 0");
  }
  this.stopNgrams = StopFilter.makeStopSet(Version.LUCENE_31, ENGLISH_STOP_NGRAMS);
  this.locale = locale;
  this.ngramLength = ngramLength;
}

public void inform(ResourceLoader loader) {
 try {
  List<String> wlist = loader.getLines(dictFile);
  dictionary = StopFilter.makeStopSet((String[])wlist.toArray(new String[0]), false);
 } catch (IOException e) {
  throw new RuntimeException(e);
 }
}
public DictionaryCompoundWordTokenFilter create(TokenStream input) {

public RussianAnalyzer() {
  charset = RussianCharsets.UnicodeRussian;
  stopSet = StopFilter.makeStopSet(
        makeStopWords(RussianCharsets.UnicodeRussian));
}

  @Override
  public TokenStream apply(final TokenStream input)
  {
    return new StopFilter(LuceneVersion.get(), input, makeStopSet(GermanAnalyzer.GERMAN_STOP_WORDS));
  }
};

Javadoc

Builds a Set from an array of stop words, appropriate for passing into the StopFilter constructor. This permits this stopWords construction to be cached once when an Analyzer is constructed.

Popular methods of StopFilter

<init>
setEnablePositionIncrements
Set to true to make this StopFilter enable position increments to result tokens. When set, when a t
getEnablePositionIncrementsVersionDefault

Popular in Java

Parsing JSON documents to java classes using gson
setRequestProperty (URLConnection)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
requestLocationUpdates (LocationManager)
Proxy (java.net)
This class represents proxy server settings. A created instance of Proxy stores a type and an addres
Path (java.nio.file)
Map (java.util)
A Map is a data structure consisting of a set of keys and values in which each key is mapped to a si
Logger (org.slf4j)
The org.slf4j.Logger interface is the main user entry point of SLF4J API. It is expected that loggin
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
JComboBox (javax.swing)
Top plugins for WebStorm

How to use makeStopSetmethodin org.apache.lucene.analysis.StopFilter

Best Java code snippets using org.apache.lucene.analysis.StopFilter.makeStopSet (Showing top 20 results out of 315)

How to use
makeStopSet
method
in
org.apache.lucene.analysis.StopFilter