protected SynonymMap buildSynonyms(Analyzer analyzer, Reader rules) { try { SynonymMap.Builder parser; if ("wordnet".equalsIgnoreCase(format)) { parser = new ESWordnetSynonymParser(true, expand, lenient, analyzer); ((ESWordnetSynonymParser) parser).parse(rules); } else { parser = new ESSolrSynonymParser(true, expand, lenient, analyzer); ((ESSolrSynonymParser) parser).parse(rules); } return parser.build(); } catch (Exception e) { throw new IllegalArgumentException("failed to build synonyms", e); } }
@Override public boolean processLine(String line) throws IOException { List<String> synonyms = newArrayList(Splitter.on(',').trimResults().split(line)); for (String term: synonyms) { for (String synonym: synonyms) { if (!term.equals(synonym)) { builder.add(new CharsRef(term), new CharsRef(synonym), true); } } } return true; }
synonymMap = parser.build(); } catch (Exception e) { throw new ElasticsearchIllegalArgumentException("failed to build synonyms", e);
synonymMap = parser.build(); } catch (Exception e) { throw new IllegalArgumentException("failed to build synonyms", e);
return Resources.readLines(Resources.getResource("lemmatization.txt"), Charsets.UTF_8, new LineProcessor<SynonymMap>() { SynonymMap.Builder builder = new SynonymMap.Builder(true);
@Override public void run() { try { File synonymFile = new File(synonymFileURL.toURI()); if(synonymFile.exists() && lastModified < synonymFile.lastModified()) { Reader rulesReader = new InputStreamReader(synonymFileURL.openStream(), Charsets.UTF_8); SynonymMap.Builder parser = null; if ("wordnet".equalsIgnoreCase(format)) { parser = new WordnetSynonymParser(true, expand, analyzer); ((WordnetSynonymParser) parser).parse(rulesReader); } else { parser = new SolrSynonymParser(true, expand, analyzer); ((SolrSynonymParser) parser).parse(rulesReader); } synonymMap = parser.build(); lastModified = synonymFile.lastModified(); } } catch (Exception e) { throw new RuntimeException("could not reload synonyms file: " + e.getMessage()); } } }
@Override public SynonymMap reloadSynonymMap() { Reader rulesReader = null; try { logger.info("start reload remote synonym from {}.", location); rulesReader = getReader(); SynonymMap.Builder parser; parser = getSynonymParser(rulesReader, format, expand, analyzer); return parser.build(); } catch (Exception e) { logger.error("reload remote synonym {} error!", e, location); throw new IllegalArgumentException( "could not reload remote synonyms file to build synonyms", e); } finally { if (rulesReader != null) { try { rulesReader.close(); } catch (Exception e) { logger.error("failed to close rulesReader", e); } } } }
protected SynonymMap buildSynonyms(Analyzer analyzer, Reader rules) { try { SynonymMap.Builder parser; if ("wordnet".equalsIgnoreCase(format)) { parser = new ESWordnetSynonymParser(true, expand, lenient, analyzer); ((ESWordnetSynonymParser) parser).parse(rules); } else { parser = new ESSolrSynonymParser(true, expand, lenient, analyzer); ((ESSolrSynonymParser) parser).parse(rules); } return parser.build(); } catch (Exception e) { throw new IllegalArgumentException("failed to build synonyms", e); } }
/** * Add a phrase->phrase synonym mapping. * Phrases are character sequences where words are * separated with character zero (U+0000). Empty words * (two U+0000s in a row) are not allowed in the input nor * the output! * * @param input input phrase * @param output output phrase * @param includeOrig true if the original should be included */ public void add(CharsRef input, CharsRef output, boolean includeOrig) { add(input, countWords(input), output, countWords(output), includeOrig); }
public SynonymMap build() throws IOException { return builder.build(); }
/** * Add a phrase->phrase synonym mapping. * Phrases are character sequences where words are * separated with character zero (\u0000). Empty words * (two \u0000s in a row) are not allowed in the input nor * the output! * * @param input input phrase * @param output output phrase * @param includeOrig true if the original should be included */ public void add(CharsRef input, CharsRef output, boolean includeOrig) { add(input, countWords(input), output, countWords(output), includeOrig); }
@Override public SynonymMap reloadSynonymMap() { try { logger.info("start reload local synonym from {}.", location); Reader rulesReader = getReader(); SynonymMap.Builder parser = RemoteSynonymFile.getSynonymParser(rulesReader, format, expand, analyzer); return parser.build(); } catch (Exception e) { logger.error("reload local synonym {} error!", e, location); throw new IllegalArgumentException( "could not reload local synonyms file to build synonyms", e); } }
public void add(final boolean keepOrig, final String... synonyms) throws IOException { if (synonyms == null || synonyms.length == 1) return; final CharsRef input = getCharsRef(synonyms[0]); for (int i = 1; i < synonyms.length; i++) { final CharsRef output = getCharsRef(synonyms[i]); builder.add(input, output, keepOrig); if (bidirectional) builder.add(output, input, keepOrig); } }
@Override public SynonymMap getResult() { try { return builder.build(); } catch (IOException e) { e.printStackTrace(); return null; } } });
public SynonymMapBuilder(final Analyzer analyzer, final boolean bidirectional, final boolean dedup) { this.analyzer = analyzer; this.builder = new SynonymMap.Builder(dedup); this.bidirectional = bidirectional; }
/** * Add a phrase->phrase synonym mapping. * Phrases are character sequences where words are * separated with character zero (U+0000). Empty words * (two U+0000s in a row) are not allowed in the input nor * the output! * * @param input input phrase * @param output output phrase * @param includeOrig true if the original should be included */ public void add(CharsRef input, CharsRef output, boolean includeOrig) { add(input, countWords(input), output, countWords(output), includeOrig); }