/** * Returns a new (deterministic and minimal) automaton that accepts the union * of the given collection of {@link BytesRef}s representing UTF-8 encoded * strings. * * @param utf8Strings * The input strings, UTF-8 encoded. The collection must be in sorted * order. * * @return An {@link Automaton} accepting all input strings. The resulting * automaton is codepoint based (full unicode codepoints on * transitions). */ public static Automaton makeStringUnion(Collection<BytesRef> utf8Strings) { if (utf8Strings.isEmpty()) { return makeEmpty(); } else { return DaciukMihovAutomatonBuilder.build(utf8Strings); } } }
return Automata.makeEmpty();
/** * Returns a (deterministic) automaton that accepts the intersection of the * language of <code>a1</code> and the complement of the language of * <code>a2</code>. As a side-effect, the automata may be determinized, if not * already deterministic. * <p> * Complexity: quadratic in number of states if a2 already deterministic and * exponential in number of a2's states otherwise. */ static public Automaton minus(Automaton a1, Automaton a2, int maxDeterminizedStates) { if (Operations.isEmpty(a1) || a1 == a2) { return Automata.makeEmpty(); } if (Operations.isEmpty(a2)) { return a1; } return intersection(a1, complement(a2, maxDeterminizedStates)); }
/** * Returns a new (deterministic) automaton that accepts a single codepoint whose * value is in the given interval (including both end points). */ public static Automaton makeCharRange(int min, int max) { if (min > max) { return makeEmpty(); } Automaton a = new Automaton(); int s1 = a.createState(); int s2 = a.createState(); a.setAccept(s2, true); a.addTransition(s1, s2, min, max); a.finishState(); return a; }
break; case REGEXP_EMPTY: a = Automata.makeEmpty(); break; case REGEXP_STRING:
return Automata.makeEmpty(); // matches nothing } else if (subs.size() == 1) {
/** * Returns a function that filters a document map based on the given include and exclude rules. * @see #filter(Map, String[], String[]) for details */ public static Function<Map<String, ?>, Map<String, Object>> filter(String[] includes, String[] excludes) { CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString()); CharacterRunAutomaton include; if (includes == null || includes.length == 0) { include = matchAllAutomaton; } else { Automaton includeA = Regex.simpleMatchToAutomaton(includes); includeA = makeMatchDotsInFieldNames(includeA); include = new CharacterRunAutomaton(includeA); } Automaton excludeA; if (excludes == null || excludes.length == 0) { excludeA = Automata.makeEmpty(); } else { excludeA = Regex.simpleMatchToAutomaton(excludes); excludeA = makeMatchDotsInFieldNames(excludeA); } CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA); // NOTE: We cannot use Operations.minus because of the special case that // we want all sub properties to match as soon as an object matches return (map) -> filter(map, include, 0, exclude, 0, matchAllAutomaton); }
/** * Returns a new (deterministic and minimal) automaton that accepts the union * of the given collection of {@link BytesRef}s representing UTF-8 encoded * strings. * * @param utf8Strings * The input strings, UTF-8 encoded. The collection must be in sorted * order. * * @return An {@link Automaton} accepting all input strings. The resulting * automaton is codepoint based (full unicode codepoints on * transitions). */ public static Automaton makeStringUnion(Collection<BytesRef> utf8Strings) { if (utf8Strings.isEmpty()) { return makeEmpty(); } else { return DaciukMihovAutomatonBuilder.build(utf8Strings); } } }
/** * Returns a new (deterministic and minimal) automaton that accepts the union * of the given collection of {@link BytesRef}s representing UTF-8 encoded * strings. * * @param utf8Strings * The input strings, UTF-8 encoded. The collection must be in sorted * order. * * @return An {@link Automaton} accepting all input strings. The resulting * automaton is codepoint based (full unicode codepoints on * transitions). */ public static Automaton makeStringUnion(Collection<BytesRef> utf8Strings) { if (utf8Strings.isEmpty()) { return makeEmpty(); } else { return DaciukMihovAutomatonBuilder.build(utf8Strings); } } }
/** * Returns a new (deterministic and minimal) automaton that accepts the union * of the given collection of {@link BytesRef}s representing UTF-8 encoded * strings. * * @param utf8Strings * The input strings, UTF-8 encoded. The collection must be in sorted * order. * * @return An {@link Automaton} accepting all input strings. The resulting * automaton is codepoint based (full unicode codepoints on * transitions). */ public static Automaton makeStringUnion(Collection<BytesRef> utf8Strings) { if (utf8Strings.isEmpty()) { return makeEmpty(); } else { return DaciukMihovAutomatonBuilder.build(utf8Strings); } } }
private Automaton buildAcceptableTerms() { if (automata.isEmpty()) { if (terms.isEmpty()) { return Automata.makeEmpty(); } return buildTermsAutomata(); } if (automata.size() == 1 && terms.isEmpty()) { return automata.get(0).automaton; } List<Automaton> all = new ArrayList<>(automata.size() + 1); for (AutomatonSourceInfo info : automata) { all.add(info.automaton); } if (!terms.isEmpty()) { all.add(buildTermsAutomata()); } return Operations.union(all); }
/** * Build the {@link CharacterRunAutomaton} that represents the reindex-from-remote whitelist and make sure that it doesn't whitelist * the world. */ static CharacterRunAutomaton buildRemoteWhitelist(List<String> whitelist) { if (whitelist.isEmpty()) { return new CharacterRunAutomaton(Automata.makeEmpty()); } Automaton automaton = Regex.simpleMatchToAutomaton(whitelist.toArray(Strings.EMPTY_ARRAY)); automaton = MinimizationOperations.minimize(automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES); if (Operations.isTotal(automaton)) { throw new IllegalArgumentException("Refusing to start because whitelist " + whitelist + " accepts all addresses. " + "This would allow users to reindex-from-remote any URL they like effectively having Elasticsearch make HTTP GETs " + "for them."); } return new CharacterRunAutomaton(automaton); }
/** * Returns a (deterministic) automaton that accepts the intersection of the * language of <code>a1</code> and the complement of the language of * <code>a2</code>. As a side-effect, the automata may be determinized, if not * already deterministic. * <p> * Complexity: quadratic in number of states if a2 already deterministic and * exponential in number of a2's states otherwise. */ static public Automaton minus(Automaton a1, Automaton a2, int maxDeterminizedStates) { if (Operations.isEmpty(a1) || a1 == a2) { return Automata.makeEmpty(); } if (Operations.isEmpty(a2)) { return a1; } return intersection(a1, complement(a2, maxDeterminizedStates)); }
/** * Returns a (deterministic) automaton that accepts the intersection of the * language of <code>a1</code> and the complement of the language of * <code>a2</code>. As a side-effect, the automata may be determinized, if not * already deterministic. * <p> * Complexity: quadratic in number of states if a2 already deterministic and * exponential in number of a2's states otherwise. */ static public Automaton minus(Automaton a1, Automaton a2, int maxDeterminizedStates) { if (Operations.isEmpty(a1) || a1 == a2) { return Automata.makeEmpty(); } if (Operations.isEmpty(a2)) { return a1; } return intersection(a1, complement(a2, maxDeterminizedStates)); }
/** * Returns a (deterministic) automaton that accepts the intersection of the * language of <code>a1</code> and the complement of the language of * <code>a2</code>. As a side-effect, the automata may be determinized, if not * already deterministic. * <p> * Complexity: quadratic in number of states if a2 already deterministic and * exponential in number of a2's states otherwise. */ static public Automaton minus(Automaton a1, Automaton a2, int maxDeterminizedStates) { if (Operations.isEmpty(a1) || a1 == a2) { return Automata.makeEmpty(); } if (Operations.isEmpty(a2)) { return a1; } return intersection(a1, complement(a2, maxDeterminizedStates)); }
/** * Returns a new (deterministic) automaton that accepts a single codepoint whose * value is in the given interval (including both end points). */ public static Automaton makeCharRange(int min, int max) { if (min > max) { return makeEmpty(); } Automaton a = new Automaton(); int s1 = a.createState(); int s2 = a.createState(); a.setAccept(s2, true); a.addTransition(s1, s2, min, max); a.finishState(); return a; }
/** * Returns a new (deterministic) automaton that accepts a single codepoint whose * value is in the given interval (including both end points). */ public static Automaton makeCharRange(int min, int max) { if (min > max) { return makeEmpty(); } Automaton a = new Automaton(); int s1 = a.createState(); int s2 = a.createState(); a.setAccept(s2, true); a.addTransition(s1, s2, min, max); a.finishState(); return a; }
/** * Returns a new (deterministic) automaton that accepts a single codepoint whose * value is in the given interval (including both end points). */ public static Automaton makeCharRange(int min, int max) { if (min > max) { return makeEmpty(); } Automaton a = new Automaton(); int s1 = a.createState(); int s2 = a.createState(); a.setAccept(s2, true); a.addTransition(s1, s2, min, max); a.finishState(); return a; }
/** * Returns a function that filters a document map based on the given include and exclude rules. * @see #filter(Map, String[], String[]) for details */ public static Function<Map<String, ?>, Map<String, Object>> filter(String[] includes, String[] excludes) { CharacterRunAutomaton matchAllAutomaton = new CharacterRunAutomaton(Automata.makeAnyString()); CharacterRunAutomaton include; if (includes == null || includes.length == 0) { include = matchAllAutomaton; } else { Automaton includeA = Regex.simpleMatchToAutomaton(includes); includeA = makeMatchDotsInFieldNames(includeA); include = new CharacterRunAutomaton(includeA); } Automaton excludeA; if (excludes == null || excludes.length == 0) { excludeA = Automata.makeEmpty(); } else { excludeA = Regex.simpleMatchToAutomaton(excludes); excludeA = makeMatchDotsInFieldNames(excludeA); } CharacterRunAutomaton exclude = new CharacterRunAutomaton(excludeA); // NOTE: We cannot use Operations.minus because of the special case that // we want all sub properties to match as soon as an object matches return (map) -> filter(map, include, 0, exclude, 0, matchAllAutomaton); }