public SynonymGraphFilterFactory(Map<String,String> args) { super(args); ignoreCase = getBoolean(args, "ignoreCase", false); synonyms = require(args, "synonyms"); format = get(args, "format"); expand = getBoolean(args, "expand", true); analyzerName = get(args, "analyzer"); tokenizerFactory = get(args, "tokenizerFactory"); if (analyzerName != null && tokenizerFactory != null) { throw new IllegalArgumentException("Analyzer and TokenizerFactory can't be specified both: " + analyzerName + " and " + tokenizerFactory); } if (tokenizerFactory != null) { tokArgs.put("luceneMatchVersion", getLuceneMatchVersion().toString()); for (Iterator<String> itr = args.keySet().iterator(); itr.hasNext();) { String key = itr.next(); tokArgs.put(key.replaceAll("^tokenizerFactory\\.",""), args.get(key)); itr.remove(); } } if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
@Override public void inform(ResourceLoader loader) throws IOException { final TokenizerFactory factory = tokenizerFactory == null ? null : loadTokenizerFactory(loader, tokenizerFactory); Analyzer analyzer; if (analyzerName != null) { analyzer = loadAnalyzer(loader, analyzerName); } else { analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = factory == null ? new WhitespaceTokenizer() : factory.create(); TokenStream stream = ignoreCase ? new LowerCaseFilter(tokenizer) : tokenizer; return new TokenStreamComponents(tokenizer, stream); } }; } try (Analyzer a = analyzer) { String formatClass = format; if (format == null || format.equals("solr")) { formatClass = SolrSynonymParser.class.getName(); } else if (format.equals("wordnet")) { formatClass = WordnetSynonymParser.class.getName(); } // TODO: expose dedup as a parameter? map = loadSynonyms(loader, formatClass, true, a); } catch (ParseException e) { throw new IOException("Error parsing synonyms file:", e); } }
/** * Load synonyms with the given {@link SynonymMap.Parser} class. */ protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException { CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() .onMalformedInput(CodingErrorAction.REPORT) .onUnmappableCharacter(CodingErrorAction.REPORT); SynonymMap.Parser parser; Class<? extends SynonymMap.Parser> clazz = loader.findClass(cname, SynonymMap.Parser.class); try { parser = clazz.getConstructor(boolean.class, boolean.class, Analyzer.class).newInstance(dedup, expand, analyzer); } catch (Exception e) { throw new RuntimeException(e); } List<String> files = splitFileNames(synonyms); for (String file : files) { decoder.reset(); parser.parse(new InputStreamReader(loader.openResource(file), decoder)); } return parser.build(); }