/** * Create a new AutomatonQuery from an {@link Automaton}. * * @param term Term containing field and possibly some pattern structure. The * term text is ignored. * @param automaton Automaton to run, terms that are accepted are considered a * match. * @param maxDeterminizedStates maximum number of states in the resulting * automata. If the automata would need more than this many states * TooComplextToDeterminizeException is thrown. Higher number require more * space but can process more complex automata. * @param isBinary if true, this automaton is already binary and * will not go through the UTF32ToUTF8 conversion */ public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates, boolean isBinary) { super(term.field()); this.term = term; this.automaton = automaton; this.automatonIsBinary = isBinary; // TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?: this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary); }
/** Test Terms.intersect on this range, and validates that it returns the same doc ids as using non-intersect TermsEnum. Returns true if * any fake terms were seen. */ private static boolean checkSingleTermRange(String field, int maxDoc, Terms terms, BytesRef minTerm, BytesRef maxTerm, FixedBitSet normalDocs, FixedBitSet intersectDocs) throws IOException { //System.out.println(" check minTerm=" + minTerm.utf8ToString() + " maxTerm=" + maxTerm.utf8ToString()); assert minTerm.compareTo(maxTerm) <= 0; TermsEnum termsEnum = terms.iterator(); TermsEnum.SeekStatus status = termsEnum.seekCeil(minTerm); if (status != TermsEnum.SeekStatus.FOUND) { throw new RuntimeException("failed to seek to existing term field=" + field + " term=" + minTerm); } // Do "dumb" iteration to visit all terms in the range: long normalTermCount = getDocsFromTermRange(field, maxDoc, termsEnum, normalDocs, minTerm, maxTerm, false); // Now do the same operation using intersect: long intersectTermCount = getDocsFromTermRange(field, maxDoc, terms.intersect(new CompiledAutomaton(Automata.makeBinaryInterval(minTerm, true, maxTerm, false), true, false, Integer.MAX_VALUE, true), null), intersectDocs, minTerm, maxTerm, true); if (intersectTermCount > normalTermCount) { throw new RuntimeException("intersect returned too many terms: field=" + field + " intersectTermCount=" + intersectTermCount + " normalTermCount=" + normalTermCount); } if (normalDocs.equals(intersectDocs) == false) { throw new RuntimeException("intersect visited different docs than straight terms enum: " + normalDocs.cardinality() + " for straight enum, vs " + intersectDocs.cardinality() + " for intersect, minTerm=" + minTerm + " maxTerm=" + maxTerm); } //System.out.println(" docs=" + normalTermCount); //System.out.println(" " + intersectTermCount + " vs " + normalTermCount); return intersectTermCount != normalTermCount; }
for (int i = 0; i <= maxEdits; i++) { Automaton a = builder.toAutomaton(i, prefix); prevAutomata[i] = new CompiledAutomaton(a, true, false);
private AutomatonBackedOrdinalsFilter(Automaton automaton) { this.compiled = new CompiledAutomaton(automaton); }
/** {@inheritDoc} */ @Override public TermsEnum getTermsEnumForSuggestions(final Terms terms) { if (terms == null) { return TermsEnum.EMPTY; } BytesRef prefix = getPrefix(); if (prefix != null) { Automaton prefixAutomaton = PrefixQuery.toAutomaton(prefix); Automaton finalAutomaton; if (suggestPosition == SuggestPosition.LOWER) { Automaton binaryInt = Automata.makeBinaryInterval( getLowerTerm(), includesLower(), getUpperTerm(), includesUpper()); finalAutomaton = Operations.intersection(binaryInt, prefixAutomaton); } else { Automaton binaryInt = Automata.makeBinaryInterval(null, true, getLowerTerm(), !includesLower()); finalAutomaton = Operations.minus(prefixAutomaton, binaryInt, Integer.MIN_VALUE); } CompiledAutomaton compiledAutomaton = new CompiledAutomaton(finalAutomaton); try { return compiledAutomaton.getTermsEnum(terms); } catch (IOException e) { logger.log(Level.WARNING, "Could not compile automaton for range suggestions", e); } } return TermsEnum.EMPTY; }
private AutomatonBackedOrdinalsFilter(Automaton automaton) { this.compiled = new CompiledAutomaton(automaton); }
private AutomatonBackedOrdinalsFilter(Automaton automaton) { this.compiled = new CompiledAutomaton(automaton); }
private AutomatonBackedOrdinalsFilter(Automaton automaton) { this.compiled = new CompiledAutomaton(automaton); }
private AutomatonBackedOrdinalsFilter(Automaton automaton) { this.compiled = new CompiledAutomaton(automaton); }
private AutomatonBackedOrdinalsFilter(Automaton automaton) { this.compiled = new CompiledAutomaton(automaton); }
public CompiledAutomaton acceptableTerms() { if (acceptable == null) { acceptable = new CompiledAutomaton(buildAcceptableTerms()); } return acceptable; }
/** * Create a new AutomatonQuery from an {@link Automaton}. * * @param term Term containing field and possibly some pattern structure. The * term text is ignored. * @param automaton Automaton to run, terms that are accepted are considered a * match. */ public NodeAutomatonQuery(final Term term, final Automaton automaton) { super(term.field()); this.term = term; this.automaton = automaton; this.compiled = new CompiledAutomaton(automaton); }
/** * Create a new AutomatonQuery from an {@link Automaton}. * * @param term Term containing field and possibly some pattern structure. The * term text is ignored. * @param automaton Automaton to run, terms that are accepted are considered a * match. */ public NodeAutomatonQuery(final Term term, final Automaton automaton) { super(term.field()); this.term = term; this.automaton = automaton; this.compiled = new CompiledAutomaton(automaton); }
/** * Create a new AutomatonQuery from an {@link Automaton}. * * @param term Term containing field and possibly some pattern structure. The * term text is ignored. * @param automaton Automaton to run, terms that are accepted are considered a * match. * @param maxDeterminizedStates maximum number of states in the resulting * automata. If the automata would need more than this many states * TooComplextToDeterminizeException is thrown. Higher number require more * space but can process more complex automata. * @param isBinary if true, this automaton is already binary and * will not go through the UTF32ToUTF8 conversion */ public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates, boolean isBinary) { super(term.field()); this.term = term; this.automaton = automaton; // TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?: this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary); }
/** * Create a new AutomatonQuery from an {@link Automaton}. * * @param term Term containing field and possibly some pattern structure. The * term text is ignored. * @param automaton Automaton to run, terms that are accepted are considered a * match. * @param maxDeterminizedStates maximum number of states in the resulting * automata. If the automata would need more than this many states * TooComplextToDeterminizeException is thrown. Higher number require more * space but can process more complex automata. * @param isBinary if true, this automaton is already binary and * will not go through the UTF32ToUTF8 conversion */ public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates, boolean isBinary) { super(term.field()); this.term = term; this.automaton = automaton; // TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?: this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary); }
/** * Create a new AutomatonQuery from an {@link Automaton}. * * @param term Term containing field and possibly some pattern structure. The * term text is ignored. * @param automaton Automaton to run, terms that are accepted are considered a * match. * @param maxDeterminizedStates maximum number of states in the resulting * automata. If the automata would need more than this many states * TooComplextToDeterminizeException is thrown. Higher number require more * space but can process more complex automata. * @param isBinary if true, this automaton is already binary and * will not go through the UTF32ToUTF8 conversion */ public AutomatonQuery(final Term term, Automaton automaton, int maxDeterminizedStates, boolean isBinary) { super(term.field()); this.term = term; this.automaton = automaton; this.automatonIsBinary = isBinary; // TODO: we could take isFinite too, to save a bit of CPU in CompiledAutomaton ctor?: this.compiled = new CompiledAutomaton(automaton, null, true, maxDeterminizedStates, isBinary); }
/** initialize levenshtein DFAs up to maxDistance, if possible */ private List<CompiledAutomaton> initAutomata(int maxDistance) { final List<CompiledAutomaton> runAutomata = dfaAtt.automata(); //System.out.println("cached automata size: " + runAutomata.size()); if (runAutomata.size() <= maxDistance && maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { LevenshteinAutomata builder = new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions); String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength); for (int i = runAutomata.size(); i <= maxDistance; i++) { Automaton a = builder.toAutomaton(i, prefix); //System.out.println("compute automaton n=" + i); runAutomata.add(new CompiledAutomaton(a, true, false)); } } return runAutomata; }
/** initialize levenshtein DFAs up to maxDistance, if possible */ private List<CompiledAutomaton> initAutomata(int maxDistance) { final List<CompiledAutomaton> runAutomata = dfaAtt.automata(); //System.out.println("cached automata size: " + runAutomata.size()); if (runAutomata.size() <= maxDistance && maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { LevenshteinAutomata builder = new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions); String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength); for (int i = runAutomata.size(); i <= maxDistance; i++) { Automaton a = builder.toAutomaton(i, prefix); //System.out.println("compute automaton n=" + i); runAutomata.add(new CompiledAutomaton(a, true, false)); } } return runAutomata; }
/** * Loads all the prefix terms in the list of terms given the reader. * * @param reader Index reader to use. * @param values The list of values to load. * @param term The term to use. * * @throws IOException If an error is thrown by the prefix term enumeration. */ public static void prefix(IndexReader reader, List<String> values, Term term) throws IOException { Fields fields = MultiFields.getFields(reader); org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field()); if (terms == null) return; TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())), term.bytes()); BytesRef val; while ((val = prefixes.next()) != null) { values.add(val.utf8ToString()); } }
/** * Loads all the prefix terms in the list of terms given the reader. * * @param reader Index reader to use. * @param values The list of values to load. * @param term The term to use. * * @throws IOException If an error is thrown by the prefix term enumeration. */ public static void prefix(IndexReader reader, Bucket<Term> bucket, Term term) throws IOException { Fields fields = MultiFields.getFields(reader); org.apache.lucene.index.Terms terms = fields == null ? null : fields.terms(term.field()); if (terms == null) return; TermsEnum prefixes = terms.intersect(new CompiledAutomaton(PrefixQuery.toAutomaton(term.bytes())), term.bytes()); BytesRef val; while ((val = prefixes.next()) != null) { Term t = new Term(term.field(), BytesRef.deepCopyOf(val)); bucket.add(t, reader.docFreq(t)); } }