/** Creates a new StopFilterFactory */ public StopFilterFactory(Map<String,String> args) { super(args); stopWordFiles = get(args, "words"); format = get(args, "format", (null == stopWordFiles ? null : FORMAT_WORDSET)); ignoreCase = getBoolean(args, "ignoreCase", false); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
@Override public void inform(ResourceLoader loader) throws IOException { if (stopWordFiles != null) { if (FORMAT_WORDSET.equalsIgnoreCase(format)) { stopWords = getWordSet(loader, stopWordFiles, ignoreCase); } else if (FORMAT_SNOWBALL.equalsIgnoreCase(format)) { stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase); } else { throw new IllegalArgumentException("Unknown 'format' specified for 'words' file: " + format); } } else { if (null != format) { throw new IllegalArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format); } stopWords = new CharArraySet(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase); } }
if (factory instanceof StopFilterFactory) { CharArraySet stopWords = ((StopFilterFactory) factory).getStopWords(); solrStopWords.get(fieldName).add(stopWords);
@Test public void analyzerByComposition_FileResource() throws Exception{ NodeBuilder nb = EMPTY_NODE.builder(); nb.child(ANL_TOKENIZER).setProperty(ANL_NAME, "whitespace"); NodeBuilder filters = nb.child(ANL_FILTERS); //name is optional. Derived from nodeName NodeBuilder stop = filters.child("stop"); stop.setProperty("words", "set1.txt, set2.txt"); createFileNode(stop, "set1.txt", newCharArraySet("foo", "bar")); createFileNode(stop, "set2.txt", newCharArraySet("foo1", "bar1")); TokenizerChain analyzer = (TokenizerChain) factory.createInstance(nb.getNodeState()); assertEquals(1, analyzer.getFilters().length); //check the order assertEquals(StopFilterFactory.class.getName(), analyzer.getFilters()[0].getClassArg()); StopFilterFactory sff = (StopFilterFactory) analyzer.getFilters()[0]; assertTrue(sff.getStopWords().contains("foo")); assertTrue(sff.getStopWords().contains("foo1")); }
/** Creates a new StopFilterFactory */ public StopFilterFactory(Map<String,String> args) { super(args); stopWordFiles = get(args, "words"); format = get(args, "format", (null == stopWordFiles ? null : FORMAT_WORDSET)); ignoreCase = getBoolean(args, "ignoreCase", false); if (luceneMatchVersion.onOrAfter(Version.LUCENE_5_0_0) == false) { boolean defaultValue = luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0); enablePositionIncrements = getBoolean(args, "enablePositionIncrements", defaultValue); if (enablePositionIncrements == false && luceneMatchVersion.onOrAfter(Version.LUCENE_4_4_0)) { throw new IllegalArgumentException("enablePositionIncrements=false is not supported anymore as of Lucene 4.4"); } } else if (args.containsKey("enablePositionIncrements")) { throw new IllegalArgumentException("enablePositionIncrements is not a valid option as of Lucene 5.0"); } if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
@Override public void inform(ResourceLoader loader) throws IOException { if (stopWordFiles != null) { if (FORMAT_WORDSET.equalsIgnoreCase(format)) { stopWords = getWordSet(loader, stopWordFiles, ignoreCase); } else if (FORMAT_SNOWBALL.equalsIgnoreCase(format)) { stopWords = getSnowballWordSet(loader, stopWordFiles, ignoreCase); } else { throw new IllegalArgumentException("Unknown 'format' specified for 'words' file: " + format); } } else { if (null != format) { throw new IllegalArgumentException("'format' can not be specified w/o an explicit 'words' file: " + format); } stopWords = new CharArraySet(StopAnalyzer.ENGLISH_STOP_WORDS_SET, ignoreCase); } }