@Override public StringNormalizer get(String name, Config config, Map<String, String> runtimeParams) throws ConfigurationException { if (!config.getString("type").equals("lucene")) { return null; } Version version = Version.parseLeniently(config.getString("version")); TokenizerOptions opts = new TokenizerOptions( config.getBoolean("caseInsensitive"), config.getBoolean("useStopWords"), config.getBoolean("useStem") ); return new LuceneStringNormalizer(opts, version); } }
@Override public String normalize(LocalString string) { return normalize(string.getLanguage(), string.getString()); }
@Override public String normalize(Language language, String text) { StringBuilder normalized = new StringBuilder(); try { TokenStream stream = getTokenizer(language).getTokenStream(new StringReader(text)); CharTermAttribute cattr = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { if (normalized.length() > 0) { normalized.append(' '); } normalized.append(cattr.toString()); } stream.end(); stream.close(); return normalized.toString(); } catch (IOException e) { throw new RuntimeException(e); } }