if (Operations.isEmpty(automaton)) { isTotal = Operations.isTotal(automaton, 0, 0xff); } else { isTotal = Operations.isTotal(automaton); automaton = Operations.determinize(automaton, maxDeterminizedStates); IntsRef singleton = Operations.getSingleton(automaton); this.finite = Operations.isFinite(automaton); } else { this.finite = finite; BytesRef suffix = Operations.getCommonSuffixBytesRef(binary, maxDeterminizedStates); if (suffix.length == 0) { commonSuffixRef = null;
findLeaves(exp2, Kind.REGEXP_UNION, list, automata, automaton_provider, maxDeterminizedStates); a = Operations.union(list); a = MinimizationOperations.minimize(a, maxDeterminizedStates); break; findLeaves(exp2, Kind.REGEXP_CONCATENATION, list, automata, automaton_provider, maxDeterminizedStates); a = Operations.concatenate(list); a = MinimizationOperations.minimize(a, maxDeterminizedStates); break; case REGEXP_INTERSECTION: a = Operations.intersection( exp1.toAutomatonInternal( automata, automaton_provider, maxDeterminizedStates), break; case REGEXP_OPTIONAL: a = Operations.optional(exp1.toAutomatonInternal(automata, automaton_provider, maxDeterminizedStates)); a = MinimizationOperations.minimize(a, maxDeterminizedStates); break; case REGEXP_REPEAT: a = Operations.repeat(exp1.toAutomatonInternal( automata, automaton_provider, maxDeterminizedStates)); a = MinimizationOperations.minimize(a, maxDeterminizedStates); a = Operations.repeat(a, min); a = MinimizationOperations.minimize(a, maxDeterminizedStates);
/** {@inheritDoc} */ @Override public TermsEnum getTermsEnumForSuggestions(final Terms terms) { if (terms == null) { return TermsEnum.EMPTY; } BytesRef prefix = getPrefix(); if (prefix != null) { Automaton prefixAutomaton = PrefixQuery.toAutomaton(prefix); Automaton finalAutomaton; if (suggestPosition == SuggestPosition.LOWER) { Automaton binaryInt = Automata.makeBinaryInterval( getLowerTerm(), includesLower(), getUpperTerm(), includesUpper()); finalAutomaton = Operations.intersection(binaryInt, prefixAutomaton); } else { Automaton binaryInt = Automata.makeBinaryInterval(null, true, getLowerTerm(), !includesLower()); finalAutomaton = Operations.minus(prefixAutomaton, binaryInt, Integer.MIN_VALUE); } CompiledAutomaton compiledAutomaton = new CompiledAutomaton(finalAutomaton); try { return compiledAutomaton.getTermsEnum(terms); } catch (IOException e) { logger.log(Level.WARNING, "Could not compile automaton for range suggestions", e); } } return TermsEnum.EMPTY; }
/** * Returns the longest BytesRef that is a suffix of all accepted strings. * Worst case complexity: exponential in number of states (this calls * determinize). * @param maxDeterminizedStates maximum number of states determinizing the * automaton can result in. Set higher to allow more complex queries and * lower to prevent memory exhaustion. * @return common suffix, which can be an empty (length 0) BytesRef (never null) */ public static BytesRef getCommonSuffixBytesRef(Automaton a, int maxDeterminizedStates) { // reverse the language of the automaton, then reverse its common prefix. Automaton r = Operations.determinize(reverse(a), maxDeterminizedStates); BytesRef ref = getCommonPrefixBytesRef(r); reverseBytes(ref); return ref; }
/** * Returns a (deterministic) automaton that accepts the intersection of the * language of <code>a1</code> and the complement of the language of * <code>a2</code>. As a side-effect, the automata may be determinized, if not * already deterministic. * <p> * Complexity: quadratic in number of states if a2 already deterministic and * exponential in number of a2's states otherwise. */ static public Automaton minus(Automaton a1, Automaton a2, int maxDeterminizedStates) { if (Operations.isEmpty(a1) || a1 == a2) { return Automata.makeEmpty(); } if (Operations.isEmpty(a2)) { return a1; } return intersection(a1, complement(a2, maxDeterminizedStates)); }
public GraphTokenStreamFiniteStrings(TokenStream in) throws IOException { Automaton aut = build(in); this.det = Operations.removeDeadStates(Operations.determinize(aut, DEFAULT_MAX_DETERMINIZED_STATES)); }
/** * Returns a (deterministic) automaton that accepts the complement of the * language of the given automaton. * <p> * Complexity: linear in number of states if already deterministic and * exponential otherwise. * @param maxDeterminizedStates maximum number of states determinizing the * automaton can result in. Set higher to allow more complex queries and * lower to prevent memory exhaustion. */ static public Automaton complement(Automaton a, int maxDeterminizedStates) { a = totalize(determinize(a, maxDeterminizedStates)); int numStates = a.getNumStates(); for (int p=0;p<numStates;p++) { a.setAccept(p, !a.isAccept(p)); } return removeDeadStates(a); }
/** Make matches on objects also match dots in field names. * For instance, if the original simple regex is `foo`, this will translate * it into `foo` OR `foo.*`. */ private static Automaton makeMatchDotsInFieldNames(Automaton automaton) { return Operations.union( automaton, Operations.concatenate(Arrays.asList(automaton, Automata.makeChar('.'), Automata.makeAnyString()))); }
protected Automaton convertAutomaton(Automaton a) { if (queryPrefix != null) { a = Operations.concatenate(Arrays.asList(queryPrefix, a)); // This automaton should not blow up during determinize: a = Operations.determinize(a, Integer.MAX_VALUE); } return a; }
a = Operations.determinize(a, maxDeterminizedStates); this.automaton = a; points = a.getStartPoints();
/** * Returns an automaton that accepts the union of the languages of the given * automata. * <p> * Complexity: linear in number of states. */ public static Automaton union(Automaton a1, Automaton a2) { return union(Arrays.asList(a1, a2)); }
/** * Returns an automaton that accepts the concatenation of the languages of the * given automata. * <p> * Complexity: linear in total number of states. */ static public Automaton concatenate(Automaton a1, Automaton a2) { return concatenate(Arrays.asList(a1, a2)); }
Automaton a = Operations.union(subs); return Operations.determinize(a, DEFAULT_MAX_DETERMINIZED_STATES);
/** * Returns an automaton that accepts <code>min</code> or more concatenated * repetitions of the language of the given automaton. * <p> * Complexity: linear in number of states and in <code>min</code>. */ static public Automaton repeat(Automaton a, int count) { if (count == 0) { return repeat(a); } List<Automaton> as = new ArrayList<>(); while (count-- > 0) { as.add(a); } as.add(repeat(a)); return concatenate(as); }
throw new IllegalArgumentException("a2 must be deterministic"); assert hasDeadStatesFromInitial(a1) == false; assert hasDeadStatesFromInitial(a2) == false; if (a1.getNumStates() == 0) { return isEmpty(a1);
private Factory(String regexString, int maxDeterminizedStates) { Automaton automaton = new RegExp(regexString).toAutomaton(maxDeterminizedStates); forward = new OffsetReturningRunAutomaton(automaton, false); if (hasLeadingWildcard(automaton)) { Automaton reversed = Operations.determinize(Operations.reverse( new RegExp("(" + regexString + ").*").toAutomaton(maxDeterminizedStates)), maxDeterminizedStates); reverse = new AcceptReturningReverseRunAutomaton(reversed); } else { reverse = null; } }
private Automaton toAutomaton() { Automaton a = null; if (include != null) { a = include.toAutomaton(); } else if (includeValues != null) { a = Automata.makeStringUnion(includeValues); } else { a = Automata.makeAnyString(); } if (exclude != null) { a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES); } else if (excludeValues != null) { a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_MAX_DETERMINIZED_STATES); } return a; }
as.add(a); b = concatenate(as); Set<Integer> prevAcceptStates = toSet(b, 0); Automaton.Builder builder = new Automaton.Builder(); builder.copy(b); builder.addEpsilon(s, numStates); prevAcceptStates = toSet(a, numStates);
/** * Returns true if the given automaton accepts all strings. The automaton must be minimized. */ public static boolean isTotal(Automaton a) { return isTotal(a, Character.MIN_CODE_POINT, Character.MAX_CODE_POINT); }