org.apache.lucene.util.automaton.CompiledAutomaton.<init> java code examples

/**
 * Create a new AutomatonQuery from an {@link Automaton}.
 * 
 * @param term Term containing field and possibly some pattern structure. The
 *        term text is ignored.
 * @param automaton Automaton to run, terms that are accepted are considered a
 *        match.
 * @param maxDeterminizedStates maximum number of states in the resulting
 *   automata.  If the automata would need more than this many states
 *   TooComplextToDeterminizeException is thrown.  Higher number require more
 *   space but can process more complex automata.
 * @param isBinary if true, this automaton is already binary and
 *   will not go through the UTF32ToUTF8 conversion
 */
public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates, boolean isBinary) {
 super(term.field());
 this.term = term;
 this.automaton = automaton;
 this.automatonIsBinary = isBinary;
 // TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?:
 this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary);
}

/** Test Terms.intersect on this range, and validates that it returns the same doc ids as using non-intersect TermsEnum.  Returns true if
 *  any fake terms were seen. */
private static boolean checkSingleTermRange(String field, int maxDoc, Terms terms, BytesRef minTerm, BytesRef maxTerm, FixedBitSet normalDocs, FixedBitSet intersectDocs) throws IOException {
 //System.out.println("    check minTerm=" + minTerm.utf8ToString() + " maxTerm=" + maxTerm.utf8ToString());
 assert minTerm.compareTo(maxTerm) <= 0;
 TermsEnum termsEnum = terms.iterator();
 TermsEnum.SeekStatus status = termsEnum.seekCeil(minTerm);
 if (status != TermsEnum.SeekStatus.FOUND) {
  throw new RuntimeException("failed to seek to existing term field=" + field + " term=" + minTerm);
 }
 // Do "dumb" iteration to visit all terms in the range:
 long normalTermCount = getDocsFromTermRange(field, maxDoc, termsEnum, normalDocs, minTerm, maxTerm, false);
 // Now do the same operation using intersect:
 long intersectTermCount = getDocsFromTermRange(field, maxDoc, terms.intersect(new CompiledAutomaton(Automata.makeBinaryInterval(minTerm, true, maxTerm, false), true, false, Integer.MAX_VALUE, true), null), intersectDocs, minTerm, maxTerm, true);
 if (intersectTermCount > normalTermCount) {
  throw new RuntimeException("intersect returned too many terms: field=" + field + " intersectTermCount=" + intersectTermCount + " normalTermCount=" + normalTermCount);
 }
 if (normalDocs.equals(intersectDocs) == false) {
  throw new RuntimeException("intersect visited different docs than straight terms enum: " + normalDocs.cardinality() + " for straight enum, vs " + intersectDocs.cardinality() + " for intersect, minTerm=" + minTerm + " maxTerm=" + maxTerm);
 }
 //System.out.println("      docs=" + normalTermCount);
 //System.out.println("    " + intersectTermCount + " vs " + normalTermCount);
 return intersectTermCount != normalTermCount;
}

for (int i = 0; i <= maxEdits; i++) {
 Automaton a = builder.toAutomaton(i, prefix);
 prevAutomata[i] = new CompiledAutomaton(a, true, false);

private AutomatonBackedOrdinalsFilter(Automaton automaton) {
  this.compiled = new CompiledAutomaton(automaton);
}

/** {@inheritDoc} */
@Override
public TermsEnum getTermsEnumForSuggestions(final Terms terms) {
  if (terms == null) {
    return TermsEnum.EMPTY;
  }
  BytesRef prefix = getPrefix();
  if (prefix != null) {
    Automaton prefixAutomaton = PrefixQuery.toAutomaton(prefix);
    Automaton finalAutomaton;
    if (suggestPosition == SuggestPosition.LOWER) {
      Automaton binaryInt = Automata.makeBinaryInterval(
          getLowerTerm(), includesLower(), getUpperTerm(), includesUpper());
      finalAutomaton = Operations.intersection(binaryInt, prefixAutomaton);
    } else {
      Automaton binaryInt = Automata.makeBinaryInterval(null, true, getLowerTerm(), !includesLower());
      finalAutomaton = Operations.minus(prefixAutomaton, binaryInt, Integer.MIN_VALUE);
    }
    CompiledAutomaton compiledAutomaton = new CompiledAutomaton(finalAutomaton);
    try {
      return compiledAutomaton.getTermsEnum(terms);
    } catch (IOException e) {
      logger.log(Level.WARNING, "Could not compile automaton for range suggestions", e);
    }
  }
  return TermsEnum.EMPTY;
}

private AutomatonBackedOrdinalsFilter(Automaton automaton) {
  this.compiled = new CompiledAutomaton(automaton);
}

private AutomatonBackedOrdinalsFilter(Automaton automaton) {
  this.compiled = new CompiledAutomaton(automaton);
}

private AutomatonBackedOrdinalsFilter(Automaton automaton) {
  this.compiled = new CompiledAutomaton(automaton);
}

private AutomatonBackedOrdinalsFilter(Automaton automaton) {
  this.compiled = new CompiledAutomaton(automaton);
}

private AutomatonBackedOrdinalsFilter(Automaton automaton) {
  this.compiled = new CompiledAutomaton(automaton);
}

public CompiledAutomaton acceptableTerms() {
  if (acceptable == null) {
    acceptable = new CompiledAutomaton(buildAcceptableTerms());
  }
  return acceptable;
}

/**
 * Create a new AutomatonQuery from an {@link Automaton}.
 *
 * @param term Term containing field and possibly some pattern structure. The
 *        term text is ignored.
 * @param automaton Automaton to run, terms that are accepted are considered a
 *        match.
 */
public NodeAutomatonQuery(final Term term, final Automaton automaton) {
 super(term.field());
 this.term = term;
 this.automaton = automaton;
 this.compiled = new CompiledAutomaton(automaton);
}

/**
 * Create a new AutomatonQuery from an {@link Automaton}.
 *
 * @param term Term containing field and possibly some pattern structure. The
 *        term text is ignored.
 * @param automaton Automaton to run, terms that are accepted are considered a
 *        match.
 */
public NodeAutomatonQuery(final Term term, final Automaton automaton) {
 super(term.field());
 this.term = term;
 this.automaton = automaton;
 this.compiled = new CompiledAutomaton(automaton);
}

/**
 * Create a new AutomatonQuery from an {@link Automaton}.
 * 
 * @param term Term containing field and possibly some pattern structure. The
 *        term text is ignored.
 * @param automaton Automaton to run, terms that are accepted are considered a
 *        match.
 * @param maxDeterminizedStates maximum number of states in the resulting
 *   automata.  If the automata would need more than this many states
 *   TooComplextToDeterminizeException is thrown.  Higher number require more
 *   space but can process more complex automata.
 * @param isBinary if true, this automaton is already binary and
 *   will not go through the UTF32ToUTF8 conversion
 */
public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates, boolean isBinary) {
 super(term.field());
 this.term = term;
 this.automaton = automaton;
 // TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?:
 this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary);
}

/**
 * Create a new AutomatonQuery from an {@link Automaton}.
 * 
 * @param term Term containing field and possibly some pattern structure. The
 *        term text is ignored.
 * @param automaton Automaton to run, terms that are accepted are considered a
 *        match.
 * @param maxDeterminizedStates maximum number of states in the resulting
 *   automata.  If the automata would need more than this many states
 *   TooComplextToDeterminizeException is thrown.  Higher number require more
 *   space but can process more complex automata.
 * @param isBinary if true, this automaton is already binary and
 *   will not go through the UTF32ToUTF8 conversion
 */
public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates, boolean isBinary) {
 super(term.field());
 this.term = term;
 this.automaton = automaton;
 // TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?:
 this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary);
}

/**
 * Create a new AutomatonQuery from an {@link Automaton}.
 * 
 * @param term Term containing field and possibly some pattern structure. The
 *        term text is ignored.
 * @param automaton Automaton to run, terms that are accepted are considered a
 *        match.
 * @param maxDeterminizedStates maximum number of states in the resulting
 *   automata.  If the automata would need more than this many states
 *   TooComplextToDeterminizeException is thrown.  Higher number require more
 *   space but can process more complex automata.
 * @param isBinary if true, this automaton is already binary and
 *   will not go through the UTF32ToUTF8 conversion
 */
public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates, boolean isBinary) {
 super(term.field());
 this.term = term;
 this.automaton = automaton;
 this.automatonIsBinary = isBinary;
 // TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?:
 this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary);
}

/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
 final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
 //System.out.println("cached automata size: " + runAutomata.size());
 if (runAutomata.size() <= maxDistance &&
   maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
  LevenshteinAutomata builder = 
   new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
  String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
  for (int i = runAutomata.size(); i <= maxDistance; i++) {
   Automaton a = builder.toAutomaton(i, prefix);
   //System.out.println("compute automaton n=" + i);
   runAutomata.add(new CompiledAutomaton(a, true, false));
  }
 }
 return runAutomata;
}

/** initialize levenshtein DFAs up to maxDistance, if possible */
private List<CompiledAutomaton> initAutomata(int maxDistance) {
 final List<CompiledAutomaton> runAutomata = dfaAtt.automata();
 //System.out.println("cached automata size: " + runAutomata.size());
 if (runAutomata.size() <= maxDistance &&
   maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) {
  LevenshteinAutomata builder = 
   new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
  String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength);
  for (int i = runAutomata.size(); i <= maxDistance; i++) {
   Automaton a = builder.toAutomaton(i, prefix);
   //System.out.println("compute automaton n=" + i);
   runAutomata.add(new CompiledAutomaton(a, true, false));
  }
 }
 return runAutomata;
}

/**
 * Loads all the prefix terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use.
 * @param values  The list of values to load.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the prefix term enumeration.
 */
public static void prefix(IndexReader reader, List<String> values, Term term) throws IOException {
 Fields fields = MultiFields.getFields(reader);
 org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
 if (terms == null) return;
 TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())), term.bytes());
 BytesRef val;
 while ((val = prefixes.next()) != null) {
  values.add(val.utf8ToString());
 }
}

/**
 * Loads all the prefix terms in the list of terms given the reader.
 *
 * @param reader  Index reader to use.
 * @param values  The list of values to load.
 * @param term    The term to use.
 *
 * @throws IOException If an error is thrown by the prefix term enumeration.
 */
public static void prefix(IndexReader reader, Bucket<Term> bucket, Term term) throws IOException {
 Fields fields = MultiFields.getFields(reader);
 org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field());
 if (terms == null) return;
 TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())), term.bytes());
 BytesRef val;
 while ((val = prefixes.next()) != null) {
  Term t = new Term(term.field(), BytesRef.deepCopyOf(val));
  bucket.add(t, reader.docFreq(t));
 }
}

Javadoc

Create this, passing simplify=true and finite=null, so that we try to simplify the automaton and determine if it is finite.

Popular methods of CompiledAutomaton

getTermsEnum
Return a TermsEnum intersecting the provided Termswith the terms accepted by this automaton.
addTail
equals
findSinkState
Returns sink state, if present, else -1.
floor
Finds largest term accepted by this Automaton, that's <= the provided input term. The result is plac
hashCode

Popular in Java

Creating JSON documents from java classes using gson
findViewById (Activity)
getOriginalFilename (MultipartFile)
Return the original filename in the client's filesystem.This may contain path information depending
scheduleAtFixedRate (ScheduledExecutorService)
HttpServer (com.sun.net.httpserver)
This class implements a simple HTTP server. A HttpServer is bound to an IP address and port number a
BufferedWriter (java.io)
Wraps an existing Writer and buffers the output. Expensive interaction with the underlying reader is
FileOutputStream (java.io)
An output stream that writes bytes to a file. If the output file exists, it can be replaced or appen
KeyStore (java.security)
KeyStore is responsible for maintaining cryptographic keys and their owners. The type of the syste
Options (org.apache.commons.cli)
Main entry-point into the library. Options represents a collection of Option objects, which describ
Point (java.awt)
A point representing a location in (x,y) coordinate space, specified in integer precision.
CodeWhisperer alternatives

How to use org.apache.lucene.util.automaton.CompiledAutomatonconstructor

Best Java code snippets using org.apache.lucene.util.automaton.CompiledAutomaton.<init> (Showing top 20 results out of 315)

How to use
org.apache.lucene.util.automaton.CompiledAutomaton
constructor