@Override public AbstractAnalysisFactory getMultiTermComponent() { Map<String,String> map = new HashMap<>(getOriginalArgs()); map.remove("maxTokenLen"); //removing "maxTokenLen" argument for LowerCaseFilterFactory init return new LowerCaseFilterFactory(map); } }
public static void main(String[] args) throws IOException { StringReader inputText = new StringReader("This is a TEST string"); Map<String, String> param = new HashMap<>(); param.put("luceneMatchVersion", "LUCENE_44"); TokenizerFactory stdTokenFact = new StandardTokenizerFactory(param); Tokenizer tokenizer = stdTokenFact.create(inputText); param.put("luceneMatchVersion", "LUCENE_44"); LowerCaseFilterFactory lowerCaseFactory = new LowerCaseFilterFactory(param); TokenStream tokenStream = lowerCaseFactory.create(tokenizer); CharTermAttribute termAttrib = (CharTermAttribute) tokenStream.getAttribute(CharTermAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { System.out.println("CharTermAttribute Length = " + termAttrib.length()); System.out.println(termAttrib.toString()); } tokenStream.end(); tokenStream.close(); }
@Override protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = tokenizerFactory.create(reader); if (lowerCaseFilterFactory != null) { return new TokenStreamComponents(tokenizer, lowerCaseFilterFactory.create(tokenizer)); } else { return new TokenStreamComponents(tokenizer); } }
@Override public AbstractAnalysisFactory getMultiTermComponent() { return new LowerCaseFilterFactory(new HashMap<>(getOriginalArgs())); } }
public SimpleAnalyzer(boolean lowerCase) { Map<String, String> parameters = new HashMap<String, String>(); parameters.put(PatternTokenizerFactory.PATTERN, PATTERN); parameters.put(PatternTokenizerFactory.GROUP, "0"); parameters.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, version.name()); tokenizerFactory = new PatternTokenizerFactory(parameters); if (lowerCase) { parameters = new HashMap<String, String>(); parameters.put(AbstractAnalysisFactory.LUCENE_MATCH_VERSION_PARAM, version.name()); lowerCaseFilterFactory = new LowerCaseFilterFactory(parameters); } else { lowerCaseFilterFactory = null; } }