/** * Returns true if the given string is accepted by the automaton. The input must be deterministic. * <p> * Complexity: linear in the length of the string. * <p> * <b>Note:</b> for full performance, use the {@link RunAutomaton} class. */ public static boolean run(Automaton a, String s) { assert a.isDeterministic(); int state = 0; for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) { int nextState = a.step(state, cp = s.codePointAt(i)); if (nextState == -1) { return false; } state = nextState; } return a.isAccept(state); }
/** * Returns true if the given string (expressed as unicode codepoints) is accepted by the automaton. The input must be deterministic. * <p> * Complexity: linear in the length of the string. * <p> * <b>Note:</b> for full performance, use the {@link RunAutomaton} class. */ public static boolean run(Automaton a, IntsRef s) { assert a.isDeterministic(); int state = 0; for (int i=0;i<s.length;i++) { int nextState = a.step(state, s.ints[s.offset+i]); if (nextState == -1) { return false; } state = nextState; } return a.isAccept(state); }
/** * Returns the longest string that is a prefix of all accepted strings and * visits each state at most once. The automaton must be deterministic. * * @return common prefix, which can be an empty (length 0) String (never null) */ public static String getCommonPrefix(Automaton a) { if (a.isDeterministic() == false) { throw new IllegalArgumentException("input automaton must be deterministic"); } StringBuilder b = new StringBuilder(); HashSet<Integer> visited = new HashSet<>(); int s = 0; boolean done; Transition t = new Transition(); do { done = true; visited.add(s); if (a.isAccept(s) == false && a.getNumTransitions(s) == 1) { a.getTransition(s, 0, t); if (t.min == t.max && !visited.contains(t.dest)) { b.appendCodePoint(t.min); s = t.dest; done = false; } } } while (!done); return b.toString(); }
/** * Returns a new (deterministic) automaton that accepts the single given * string. */ public static Automaton makeString(String s) { Automaton a = new Automaton(); int lastState = a.createState(); for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) { int state = a.createState(); cp = s.codePointAt(i); a.addTransition(lastState, state, cp); lastState = state; } a.setAccept(lastState, true); a.finishState(); assert a.isDeterministic(); assert Operations.hasDeadStates(a) == false; return a; }
/** If this automaton accepts a single input, return it. Else, return null. * The automaton must be deterministic. */ public static IntsRef getSingleton(Automaton a) { if (a.isDeterministic() == false) { throw new IllegalArgumentException("input automaton must be deterministic"); } IntsRefBuilder builder = new IntsRefBuilder(); HashSet<Integer> visited = new HashSet<>(); int s = 0; Transition t = new Transition(); while (true) { visited.add(s); if (a.isAccept(s) == false) { if (a.getNumTransitions(s) == 1) { a.getTransition(s, 0, t); if (t.min == t.max && !visited.contains(t.dest)) { builder.append(t.min); s = t.dest; continue; } } } else if (a.getNumTransitions(s) == 0) { return builder.get(); } // Automaton accepts more than one string: return null; } }
if (a1.isDeterministic() == false) { throw new IllegalArgumentException("a1 must be deterministic"); if (a2.isDeterministic() == false) { throw new IllegalArgumentException("a2 must be deterministic");
/** Build an automaton accepting all terms with the specified prefix. */ public static Automaton toAutomaton(BytesRef prefix) { final int numStatesAndTransitions = prefix.length+1; final Automaton automaton = new Automaton(numStatesAndTransitions, numStatesAndTransitions); int lastState = automaton.createState(); for(int i=0;i<prefix.length;i++) { int state = automaton.createState(); automaton.addTransition(lastState, state, prefix.bytes[prefix.offset+i]&0xff); lastState = state; } automaton.setAccept(lastState, true); automaton.addTransition(lastState, lastState, 0, 255); automaton.finishState(); assert automaton.isDeterministic(); return automaton; }
/** * Returns a new (deterministic) automaton that accepts the single given * binary term. */ public static Automaton makeBinary(BytesRef term) { Automaton a = new Automaton(); int lastState = a.createState(); for (int i=0;i<term.length;i++) { int state = a.createState(); int label = term.bytes[term.offset+i] & 0xff; a.addTransition(lastState, state, label); lastState = state; } a.setAccept(lastState, true); a.finishState(); assert a.isDeterministic(); assert Operations.hasDeadStates(a) == false; return a; }
assert a.isDeterministic(); return a;
if (a.isDeterministic()) { assert result.isDeterministic(); return result;
/** Runs a pre-built automaton. */ public SimplePatternTokenizer(AttributeFactory factory, Automaton dfa) { super(factory); // we require user to do this up front because it is a possibly very costly operation, and user may be creating us frequently, not // realizing this ctor is otherwise trappy if (dfa.isDeterministic() == false) { throw new IllegalArgumentException("please determinize the incoming automaton first"); } runDFA = new CharacterRunAutomaton(dfa, Operations.DEFAULT_MAX_DETERMINIZED_STATES); }
/** Runs a pre-built automaton. */ public SimplePatternSplitTokenizer(AttributeFactory factory, Automaton dfa) { super(factory); // we require user to do this up front because it is a possibly very costly operation, and user may be creating us frequently, not // realizing this ctor is otherwise trappy if (dfa.isDeterministic() == false) { throw new IllegalArgumentException("please determinize the incoming automaton first"); } runDFA = new CharacterRunAutomaton(dfa, Operations.DEFAULT_MAX_DETERMINIZED_STATES); }
assert a.isDeterministic(): a.toDot();
/** * Returns true if the given string is accepted by the automaton. The input must be deterministic. * <p> * Complexity: linear in the length of the string. * <p> * <b>Note:</b> for full performance, use the {@link RunAutomaton} class. */ public static boolean run(Automaton a, String s) { assert a.isDeterministic(); int state = 0; for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) { int nextState = a.step(state, cp = s.codePointAt(i)); if (nextState == -1) { return false; } state = nextState; } return a.isAccept(state); }
/** * Returns true if the given string is accepted by the automaton. The input must be deterministic. * <p> * Complexity: linear in the length of the string. * <p> * <b>Note:</b> for full performance, use the {@link RunAutomaton} class. */ public static boolean run(Automaton a, String s) { assert a.isDeterministic(); int state = 0; for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) { int nextState = a.step(state, cp = s.codePointAt(i)); if (nextState == -1) { return false; } state = nextState; } return a.isAccept(state); }
/** * Returns a new (deterministic) automaton that accepts the single given * string. */ public static Automaton makeString(String s) { Automaton a = new Automaton(); int lastState = a.createState(); for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) { int state = a.createState(); cp = s.codePointAt(i); a.addTransition(lastState, state, cp); lastState = state; } a.setAccept(lastState, true); a.finishState(); assert a.isDeterministic(); assert Operations.hasDeadStates(a) == false; return a; }
/** * Returns a new (deterministic) automaton that accepts the single given * string. */ public static Automaton makeString(String s) { Automaton a = new Automaton(); int lastState = a.createState(); for (int i = 0, cp = 0; i < s.length(); i += Character.charCount(cp)) { int state = a.createState(); cp = s.codePointAt(i); a.addTransition(lastState, state, cp); lastState = state; } a.setAccept(lastState, true); a.finishState(); assert a.isDeterministic(); assert Operations.hasDeadStates(a) == false; return a; }
/** Build an automaton accepting all terms with the specified prefix. */ public static Automaton toAutomaton(BytesRef prefix) { Automaton automaton = new Automaton(); int lastState = automaton.createState(); for(int i=0;i<prefix.length;i++) { int state = automaton.createState(); automaton.addTransition(lastState, state, prefix.bytes[prefix.offset+i]&0xff); lastState = state; } automaton.setAccept(lastState, true); automaton.addTransition(lastState, lastState, 0, 255); automaton.finishState(); assert automaton.isDeterministic(); return automaton; }
/** Build an automaton accepting all terms with the specified prefix. */ public static Automaton toAutomaton(BytesRef prefix) { final int numStatesAndTransitions = prefix.length+1; final Automaton automaton = new Automaton(numStatesAndTransitions, numStatesAndTransitions); int lastState = automaton.createState(); for(int i=0;i<prefix.length;i++) { int state = automaton.createState(); automaton.addTransition(lastState, state, prefix.bytes[prefix.offset+i]&0xff); lastState = state; } automaton.setAccept(lastState, true); automaton.addTransition(lastState, lastState, 0, 255); automaton.finishState(); assert automaton.isDeterministic(); return automaton; }
/** Build an automaton accepting all terms with the specified prefix. */ public static Automaton toAutomaton(BytesRef prefix) { Automaton automaton = new Automaton(); int lastState = automaton.createState(); for(int i=0;i<prefix.length;i++) { int state = automaton.createState(); automaton.addTransition(lastState, state, prefix.bytes[prefix.offset+i]&0xff); lastState = state; } automaton.setAccept(lastState, true); automaton.addTransition(lastState, lastState, 0, 255); automaton.finishState(); assert automaton.isDeterministic(); return automaton; }