String sides[] = split(line, "=>"); if (sides.length > 1) { // explicit mapping if (sides.length != 2) { throw new IllegalArgumentException("more than one explicit mapping specified on the same line"); String inputStrings[] = split(sides[0], ","); CharsRef[] inputs = new CharsRef[inputStrings.length]; for (int i = 0; i < inputs.length; i++) { inputs[i] = analyze(unescape(inputStrings[i]).trim(), new CharsRefBuilder()); String outputStrings[] = split(sides[1], ","); CharsRef[] outputs = new CharsRef[outputStrings.length]; for (int i = 0; i < outputs.length; i++) { outputs[i] = analyze(unescape(outputStrings[i]).trim(), new CharsRefBuilder()); add(inputs[i], outputs[j], false); String inputStrings[] = split(line, ","); CharsRef[] inputs = new CharsRef[inputStrings.length]; for (int i = 0; i < inputs.length; i++) { inputs[i] = analyze(unescape(inputStrings[i]).trim(), new CharsRefBuilder()); for (int j = 0; j < inputs.length; j++) { if (i != j) { add(inputs[i], inputs[j], true); add(inputs[i], inputs[0], false);
@Override public void parse(Reader in) throws IOException, ParseException { LineNumberReader br = new LineNumberReader(in); try { addInternal(br); } catch (IllegalArgumentException e) { ParseException ex = new ParseException("Invalid synonym rule at line " + br.getLineNumber(), 0); ex.initCause(e); throw ex; } finally { br.close(); } }
static SynonymMap.Builder getSynonymParser(Reader rulesReader, String format, boolean expand, Analyzer analyzer) throws IOException, ParseException { SynonymMap.Builder parser; if ("wordnet".equalsIgnoreCase(format)) { parser = new WordnetSynonymParser(true, expand, analyzer); ((WordnetSynonymParser) parser).parse(rulesReader); } else { parser = new SolrSynonymParser(true, expand, analyzer); ((SolrSynonymParser) parser).parse(rulesReader); } return parser; }
private static SynonymMap loadFile(String filePath, Path custom, Analyzer analyzer) throws IOException, ParseException { try { SolrSynonymParser solrSynonymParser = new SolrSynonymParser(true, true, analyzer == null ? new WhitespaceAnalyzer() : analyzer); solrSynonymParser.parse(new BufferedReader(new InputStreamReader(RessourceLoading.class.getResourceAsStream( filePath)))); if (custom != null && custom.toFile().exists()) { solrSynonymParser.parse(new BufferedReader(new InputStreamReader(Files.newInputStream(custom)))); } return solrSynonymParser.build(); } catch (IOException | ParseException e) { throw new ElasticsearchException(e); } }
@Override public void add(CharsRef input, CharsRef output, boolean includeOrig) { // This condition follows up on the overridden analyze method. In case lenient was set to true and there was an // exception during super.analyze we return a zero-length CharsRef for that word which caused an exception. When // the synonym mappings for the words are added using the add method we skip the ones that were left empty by // analyze i.e., in the case when lenient is set we only add those combinations which are non-zero-length. The // else would happen only in the case when the input or output is empty and lenient is set, in which case we // quietly ignore it. For more details on the control-flow see SolrSynonymParser::addInternal. if (lenient == false || (input.length > 0 && output.length > 0)) { super.add(input, output, includeOrig); } }
@Override public CharsRef analyze(String text, CharsRefBuilder reuse) throws IOException { try { return super.analyze(text, reuse); } catch (IllegalArgumentException ex) { if (lenient) { logger.info("Synonym rule for [" + text + "] was ignored"); return new CharsRef(""); } else { throw ex; } } } }
public LuceneSynonymsRewriterFactory(boolean expand, final boolean ignoreCase) throws IOException { Analyzer analyzer = new Analyzer() { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new KeywordTokenizer(); TokenStream stream = tokenizer; if (ignoreCase) { stream = new LowerCaseFilter(stream); } return new TokenStreamComponents(tokenizer, stream); } }; parser = new SolrSynonymParser(true, expand, analyzer); }
public void addResource(InputStream is) throws IOException { try { parser.parse(new InputStreamReader(is)); } catch (ParseException e) { throw new IOException(e); } }
public void build() throws IOException { synonymMap = parser.build(); }
@Override public void run() { try { File synonymFile = new File(synonymFileURL.toURI()); if(synonymFile.exists() && lastModified < synonymFile.lastModified()) { Reader rulesReader = new InputStreamReader(synonymFileURL.openStream(), Charsets.UTF_8); SynonymMap.Builder parser = null; if ("wordnet".equalsIgnoreCase(format)) { parser = new WordnetSynonymParser(true, expand, analyzer); ((WordnetSynonymParser) parser).parse(rulesReader); } else { parser = new SolrSynonymParser(true, expand, analyzer); ((SolrSynonymParser) parser).parse(rulesReader); } synonymMap = parser.build(); lastModified = synonymFile.lastModified(); } } catch (Exception e) { throw new RuntimeException("could not reload synonyms file: " + e.getMessage()); } } }
.onUnmappableCharacter(CodingErrorAction.REPORT); SolrSynonymParser parser = new SolrSynonymParser(true, expand, analyzer); File synonymFile = new File(synonyms); if (loader != null){ //first call in constructor decoder.reset(); parser.parse(new InputStreamReader(loader.openResource(synonyms), decoder)); } else { for (String file : files) { decoder.reset(); parser.parse(new InputStreamReader(loader.openResource(file), decoder)); return parser.build();
@Override public void add(CharsRef input, CharsRef output, boolean includeOrig) { // This condition follows up on the overridden analyze method. In case lenient was set to true and there was an // exception during super.analyze we return a zero-length CharsRef for that word which caused an exception. When // the synonym mappings for the words are added using the add method we skip the ones that were left empty by // analyze i.e., in the case when lenient is set we only add those combinations which are non-zero-length. The // else would happen only in the case when the input or output is empty and lenient is set, in which case we // quietly ignore it. For more details on the control-flow see SolrSynonymParser::addInternal. if (lenient == false || (input.length > 0 && output.length > 0)) { super.add(input, output, includeOrig); } }
@Override public CharsRef analyze(String text, CharsRefBuilder reuse) throws IOException { try { return super.analyze(text, reuse); } catch (IllegalArgumentException ex) { if (lenient) { logger.info("Synonym rule for [" + text + "] was ignored"); return new CharsRef(""); } else { throw ex; } } } }
String sides[] = split(line, "=>"); if (sides.length > 1) { // explicit mapping if (sides.length != 2) { throw new IllegalArgumentException("more than one explicit mapping specified on the same line"); String inputStrings[] = split(sides[0], ","); inputs = new CharsRef[inputStrings.length]; for (int i = 0; i < inputs.length; i++) { inputs[i] = analyze(analyzer, unescape(inputStrings[i]).trim(), new CharsRef()); String outputStrings[] = split(sides[1], ","); outputs = new CharsRef[outputStrings.length]; for (int i = 0; i < outputs.length; i++) { outputs[i] = analyze(analyzer, unescape(outputStrings[i]).trim(), new CharsRef()); String inputStrings[] = split(line, ","); inputs = new CharsRef[inputStrings.length]; for (int i = 0; i < inputs.length; i++) { inputs[i] = analyze(analyzer, unescape(inputStrings[i]).trim(), new CharsRef()); add(inputs[i], outputs[j], false);
((WordnetSynonymParser) parser).parse(rulesReader); } else { parser = new SolrSynonymParser(true, expand, analyzer); ((SolrSynonymParser) parser).parse(rulesReader);
@Override public void add(CharsRef input, CharsRef output, boolean includeOrig) { // This condition follows up on the overridden analyze method. In case lenient was set to true and there was an // exception during super.analyze we return a zero-length CharsRef for that word which caused an exception. When // the synonym mappings for the words are added using the add method we skip the ones that were left empty by // analyze i.e., in the case when lenient is set we only add those combinations which are non-zero-length. The // else would happen only in the case when the input or output is empty and lenient is set, in which case we // quietly ignore it. For more details on the control-flow see SolrSynonymParser::addInternal. if (lenient == false || (input.length > 0 && output.length > 0)) { super.add(input, output, includeOrig); } }
@Override public CharsRef analyze(String text, CharsRefBuilder reuse) throws IOException { try { return super.analyze(text, reuse); } catch (IllegalArgumentException ex) { if (lenient) { logger.info("Synonym rule for [" + text + "] was ignored"); return new CharsRef(""); } else { throw ex; } } } }
public void add(Reader in) throws IOException, ParseException { LineNumberReader br = new LineNumberReader(in); try { addInternal(br); } catch (IllegalArgumentException e) { ParseException ex = new ParseException("Invalid synonym rule at line " + br.getLineNumber(), 0); ex.initCause(e); throw ex; } finally { br.close(); } }
String sides[] = split(line, "=>"); if (sides.length > 1) { // explicit mapping if (sides.length != 2) { throw new IllegalArgumentException("more than one explicit mapping specified on the same line"); String inputStrings[] = split(sides[0], ","); CharsRef[] inputs = new CharsRef[inputStrings.length]; for (int i = 0; i < inputs.length; i++) { inputs[i] = analyze(unescape(inputStrings[i]).trim(), new CharsRefBuilder()); String outputStrings[] = split(sides[1], ","); CharsRef[] outputs = new CharsRef[outputStrings.length]; for (int i = 0; i < outputs.length; i++) { outputs[i] = analyze(unescape(outputStrings[i]).trim(), new CharsRefBuilder()); add(inputs[i], outputs[j], false); String inputStrings[] = split(line, ","); CharsRef[] inputs = new CharsRef[inputStrings.length]; for (int i = 0; i < inputs.length; i++) { inputs[i] = analyze(unescape(inputStrings[i]).trim(), new CharsRefBuilder()); for (int j = 0; j < inputs.length; j++) { if (i != j) { add(inputs[i], inputs[j], true); add(inputs[i], inputs[0], false);