org.apache.lucene.analysis.synonym.SolrSynonymParser java code examples

String sides[] = split(line, "=>");
if (sides.length > 1) { // explicit mapping
 if (sides.length != 2) {
  throw new IllegalArgumentException("more than one explicit mapping specified on the same line");
 String inputStrings[] = split(sides[0], ",");
 CharsRef[] inputs = new CharsRef[inputStrings.length];
 for (int i = 0; i < inputs.length; i++) {
  inputs[i] = analyze(unescape(inputStrings[i]).trim(), new CharsRefBuilder());
 String outputStrings[] = split(sides[1], ",");
 CharsRef[] outputs = new CharsRef[outputStrings.length];
 for (int i = 0; i < outputs.length; i++) {
  outputs[i] = analyze(unescape(outputStrings[i]).trim(), new CharsRefBuilder());
   add(inputs[i], outputs[j], false);
 String inputStrings[] = split(line, ",");
 CharsRef[] inputs = new CharsRef[inputStrings.length];
 for (int i = 0; i < inputs.length; i++) {
  inputs[i] = analyze(unescape(inputStrings[i]).trim(), new CharsRefBuilder());
   for (int j = 0; j < inputs.length; j++) {
    if (i != j) {
     add(inputs[i], inputs[j], true);
   add(inputs[i], inputs[0], false);

@Override
public void parse(Reader in) throws IOException, ParseException {
 LineNumberReader br = new LineNumberReader(in);
 try {
  addInternal(br);
 } catch (IllegalArgumentException e) {
  ParseException ex = new ParseException("Invalid synonym rule at line " + br.getLineNumber(), 0);
  ex.initCause(e);
  throw ex;
 } finally {
  br.close();
 }
}

static SynonymMap.Builder getSynonymParser(Reader rulesReader, String format, boolean expand, Analyzer analyzer) throws IOException, ParseException {
  SynonymMap.Builder parser;
  if ("wordnet".equalsIgnoreCase(format)) {
    parser = new WordnetSynonymParser(true, expand, analyzer);
    ((WordnetSynonymParser) parser).parse(rulesReader);
  } else {
    parser = new SolrSynonymParser(true, expand, analyzer);
    ((SolrSynonymParser) parser).parse(rulesReader);
  }
  return parser;
}

private static SynonymMap loadFile(String filePath, Path custom, Analyzer analyzer)
    throws IOException, ParseException {
  try {
    SolrSynonymParser solrSynonymParser =
        new SolrSynonymParser(true, true, analyzer == null ? new WhitespaceAnalyzer() : analyzer);
    solrSynonymParser.parse(new BufferedReader(new InputStreamReader(RessourceLoading.class.getResourceAsStream(
        filePath))));
    if (custom != null && custom.toFile().exists()) {
      solrSynonymParser.parse(new BufferedReader(new InputStreamReader(Files.newInputStream(custom))));
    }
    return solrSynonymParser.build();
  } catch (IOException | ParseException e) {
    throw new ElasticsearchException(e);
  }
}

 SolrSynonymParser parser = new SolrSynonymParser(true, false, analyzer);
Reader synonymFileReader = new FileRader(new File(path));
parser.add(synonymFileReader);
SynonymMap map = parser.build(); // SolrSynonymParser extends SynonymMap.Builder

@Override
public void add(CharsRef input, CharsRef output, boolean includeOrig) {
  // This condition follows up on the overridden analyze method. In case lenient was set to true and there was an
  // exception during super.analyze we return a zero-length CharsRef for that word which caused an exception. When
  // the synonym mappings for the words are added using the add method we skip the ones that were left empty by
  // analyze i.e., in the case when lenient is set we only add those combinations which are non-zero-length. The
  // else would happen only in the case when the input or output is empty and lenient is set, in which case we
  // quietly ignore it. For more details on the control-flow see SolrSynonymParser::addInternal.
  if (lenient == false || (input.length > 0 && output.length > 0)) {
    super.add(input, output, includeOrig);
  }
}

  @Override
  public CharsRef analyze(String text, CharsRefBuilder reuse) throws IOException {
    try {
      return super.analyze(text, reuse);
    } catch (IllegalArgumentException ex) {
      if (lenient) {
        logger.info("Synonym rule for [" + text + "] was ignored");
        return new CharsRef("");
      } else {
        throw ex;
      }
    }
  }
}

public LuceneSynonymsRewriterFactory(boolean expand, final boolean ignoreCase) throws IOException {
 Analyzer analyzer = new Analyzer() {
   @Override
   protected TokenStreamComponents createComponents(String fieldName) {
    Tokenizer tokenizer = new KeywordTokenizer();
    TokenStream stream = tokenizer;
    if (ignoreCase) {
      stream = new LowerCaseFilter(stream);
    }
    return new TokenStreamComponents(tokenizer, stream);
   }
 };
 parser = new SolrSynonymParser(true, expand, analyzer);
}

public void addResource(InputStream is) throws IOException {
 try {
   parser.parse(new InputStreamReader(is));
 } catch (ParseException e) {
   throw new IOException(e);
 }
}

public void build() throws IOException {
 synonymMap = parser.build();
}

  @Override
  public void run() {
    try {
      File synonymFile = new File(synonymFileURL.toURI());
      if(synonymFile.exists() && lastModified < synonymFile.lastModified())
      {
        Reader rulesReader = new InputStreamReader(synonymFileURL.openStream(), Charsets.UTF_8);
        SynonymMap.Builder parser = null;
        if ("wordnet".equalsIgnoreCase(format)) {
          parser = new WordnetSynonymParser(true, expand, analyzer);
          ((WordnetSynonymParser) parser).parse(rulesReader);
        } else {
          parser = new SolrSynonymParser(true, expand, analyzer);
          ((SolrSynonymParser) parser).parse(rulesReader);
        }
        synonymMap = parser.build();
        lastModified = synonymFile.lastModified();
      }
    } catch (Exception e) {
      throw new RuntimeException("could not reload synonyms file: " + e.getMessage());
    }
  }
}

    .onUnmappableCharacter(CodingErrorAction.REPORT);
SolrSynonymParser parser = new SolrSynonymParser(true, expand,	analyzer);
File synonymFile = new File(synonyms);
if (loader != null){ //first call in constructor
    decoder.reset();
    parser.parse(new InputStreamReader(loader.openResource(synonyms),
        decoder));
  } else {
    for (String file : files) {
      decoder.reset();
      parser.parse(new InputStreamReader(loader.openResource(file),
          decoder));
return parser.build();

@Override
public void add(CharsRef input, CharsRef output, boolean includeOrig) {
  // This condition follows up on the overridden analyze method. In case lenient was set to true and there was an
  // exception during super.analyze we return a zero-length CharsRef for that word which caused an exception. When
  // the synonym mappings for the words are added using the add method we skip the ones that were left empty by
  // analyze i.e., in the case when lenient is set we only add those combinations which are non-zero-length. The
  // else would happen only in the case when the input or output is empty and lenient is set, in which case we
  // quietly ignore it. For more details on the control-flow see SolrSynonymParser::addInternal.
  if (lenient == false || (input.length > 0 && output.length > 0)) {
    super.add(input, output, includeOrig);
  }
}

  @Override
  public CharsRef analyze(String text, CharsRefBuilder reuse) throws IOException {
    try {
      return super.analyze(text, reuse);
    } catch (IllegalArgumentException ex) {
      if (lenient) {
        logger.info("Synonym rule for [" + text + "] was ignored");
        return new CharsRef("");
      } else {
        throw ex;
      }
    }
  }
}

String sides[] = split(line, "=>");
if (sides.length > 1) { // explicit mapping
 if (sides.length != 2) {
  throw new IllegalArgumentException("more than one explicit mapping specified on the same line");
 String inputStrings[] = split(sides[0], ",");
 inputs = new CharsRef[inputStrings.length];
 for (int i = 0; i < inputs.length; i++) {
  inputs[i] = analyze(analyzer, unescape(inputStrings[i]).trim(), new CharsRef());
 String outputStrings[] = split(sides[1], ",");
 outputs = new CharsRef[outputStrings.length];
 for (int i = 0; i < outputs.length; i++) {
  outputs[i] = analyze(analyzer, unescape(outputStrings[i]).trim(), new CharsRef());
 String inputStrings[] = split(line, ",");
 inputs = new CharsRef[inputStrings.length];
 for (int i = 0; i < inputs.length; i++) {
  inputs[i] = analyze(analyzer, unescape(inputStrings[i]).trim(), new CharsRef());
  add(inputs[i], outputs[j], false);

  ((WordnetSynonymParser) parser).parse(rulesReader);
} else {
  parser = new SolrSynonymParser(true, expand, analyzer);
  ((SolrSynonymParser) parser).parse(rulesReader);

@Override
public void add(CharsRef input, CharsRef output, boolean includeOrig) {
  // This condition follows up on the overridden analyze method. In case lenient was set to true and there was an
  // exception during super.analyze we return a zero-length CharsRef for that word which caused an exception. When
  // the synonym mappings for the words are added using the add method we skip the ones that were left empty by
  // analyze i.e., in the case when lenient is set we only add those combinations which are non-zero-length. The
  // else would happen only in the case when the input or output is empty and lenient is set, in which case we
  // quietly ignore it. For more details on the control-flow see SolrSynonymParser::addInternal.
  if (lenient == false || (input.length > 0 && output.length > 0)) {
    super.add(input, output, includeOrig);
  }
}

  @Override
  public CharsRef analyze(String text, CharsRefBuilder reuse) throws IOException {
    try {
      return super.analyze(text, reuse);
    } catch (IllegalArgumentException ex) {
      if (lenient) {
        logger.info("Synonym rule for [" + text + "] was ignored");
        return new CharsRef("");
      } else {
        throw ex;
      }
    }
  }
}

public void add(Reader in) throws IOException, ParseException {
 LineNumberReader br = new LineNumberReader(in);
 try {
  addInternal(br);
 } catch (IllegalArgumentException e) {
  ParseException ex = new ParseException("Invalid synonym rule at line " + br.getLineNumber(), 0);
  ex.initCause(e);
  throw ex;
 } finally {
  br.close();
 }
}

String sides[] = split(line, "=>");
if (sides.length > 1) { // explicit mapping
 if (sides.length != 2) {
  throw new IllegalArgumentException("more than one explicit mapping specified on the same line");
 String inputStrings[] = split(sides[0], ",");
 CharsRef[] inputs = new CharsRef[inputStrings.length];
 for (int i = 0; i < inputs.length; i++) {
  inputs[i] = analyze(unescape(inputStrings[i]).trim(), new CharsRefBuilder());
 String outputStrings[] = split(sides[1], ",");
 CharsRef[] outputs = new CharsRef[outputStrings.length];
 for (int i = 0; i < outputs.length; i++) {
  outputs[i] = analyze(unescape(outputStrings[i]).trim(), new CharsRefBuilder());
   add(inputs[i], outputs[j], false);
 String inputStrings[] = split(line, ",");
 CharsRef[] inputs = new CharsRef[inputStrings.length];
 for (int i = 0; i < inputs.length; i++) {
  inputs[i] = analyze(unescape(inputStrings[i]).trim(), new CharsRefBuilder());
   for (int j = 0; j < inputs.length; j++) {
    if (i != j) {
     add(inputs[i], inputs[j], true);
   add(inputs[i], inputs[0], false);

Javadoc

Parser for the Solr synonyms format.

Blank lines and lines starting with '#' are comments.
Explicit mappings match any token sequence on the LHS of "=>" and replace with all alternatives on the RHS. These types of mappings ignore the expand parameter in the constructor. Example:
i-pod, i pod => ipod
Equivalent synonyms may be separated with commas and give no explicit mapping. In this case the mapping behavior will be taken from the expand parameter in the constructor. This allows the same synonym file to be used in different synonym handling strategies. Example:
ipod, i-pod, i pod
Multiple synonym mapping entries are merged. Example:
foo => foo bar
foo => baz

is equivalent to

foo => foo bar, baz

Most used methods

Popular in Java

Creating JSON documents from java classes using gson
getExternalFilesDir (Context)
requestLocationUpdates (LocationManager)
startActivity (Activity)
HttpServer (com.sun.net.httpserver)
This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
HttpURLConnection (java.net)
An URLConnection for HTTP (RFC 2616 [http://tools.ietf.org/html/rfc2616]) used to send and receive d
ResultSet (java.sql)
An interface for an object which represents a database table entry, returned as the result of the qu
LinkedList (java.util)
Doubly-linked list implementation of the List and Dequeinterfaces. Implements all optional list oper
TimeUnit (java.util.concurrent)
A TimeUnit represents time durations at a given unit of granularity and provides utility methods to
ImageIO (javax.imageio)
Best IntelliJ plugins

How to useSolrSynonymParser in org.apache.lucene.analysis.synonym

Best Java code snippets using org.apache.lucene.analysis.synonym.SolrSynonymParser (Showing top 20 results out of 315)

How to use
SolrSynonymParser
in
org.apache.lucene.analysis.synonym