/** * Create a new outgoing transition labeled <code>label</code> and return * the newly created target state for this transition. */ State newState(int label) { assert Arrays.binarySearch(labels, label) < 0 : "State already has transition labeled: " + label; labels = ArrayUtil.growExact(labels, labels.length + 1); states = ArrayUtil.growExact(states, states.length + 1); labels[labels.length - 1] = label; return states[states.length - 1] = new State(); }
/** * Add another character sequence to this automaton. The sequence must be * lexicographically larger or equal compared to any previous sequences added * to this automaton (the input must be sorted). */ public void add(CharsRef current) { if (current.length > MAX_TERM_LENGTH) { throw new IllegalArgumentException("This builder doesn't allow terms that are larger than 1,000 characters, got " + current); } assert stateRegistry != null : "Automaton already built."; assert previous == null || comparator.compare(previous, current) <= 0 : "Input must be in sorted UTF-8 order: " + previous + " >= " + current; assert setPrevious(current); // Descend in the automaton (find matching prefix). int pos = 0, max = current.length(); State next, state = root; while (pos < max && (next = state.lastChild(Character.codePointAt(current, pos))) != null) { state = next; // todo, optimize me pos += Character.charCount(Character.codePointAt(current, pos)); } if (state.hasChildren()) replaceOrRegister(state); addSuffix(state, current, pos); }
/** * Add a suffix of <code>current</code> starting at <code>fromIndex</code> * (inclusive) to state <code>state</code>. */ private void addSuffix(State state, CharSequence current, int fromIndex) { final int len = current.length(); while (fromIndex < len) { int cp = Character.codePointAt(current, fromIndex); state = state.newState(cp); fromIndex += Character.charCount(cp); } state.is_final = true; } }
/** * Add another character sequence to this automaton. The sequence must be * lexicographically larger or equal compared to any previous sequences added * to this automaton (the input must be sorted). */ public void add(CharsRef current) { if (current.length > MAX_TERM_LENGTH) { throw new IllegalArgumentException("This builder doesn't allow terms that are larger than 1,000 characters, got " + current); } assert stateRegistry != null : "Automaton already built."; assert previous == null || comparator.compare(previous, current) <= 0 : "Input must be in sorted UTF-8 order: " + previous + " >= " + current; assert setPrevious(current); // Descend in the automaton (find matching prefix). int pos = 0, max = current.length(); State next, state = root; while (pos < max && (next = state.lastChild(Character.codePointAt(current, pos))) != null) { state = next; // todo, optimize me pos += Character.charCount(Character.codePointAt(current, pos)); } if (state.hasChildren()) replaceOrRegister(state); addSuffix(state, current, pos); }
/** * Add another character sequence to this automaton. The sequence must be * lexicographically larger or equal compared to any previous sequences added * to this automaton (the input must be sorted). */ public void add(CharsRef current) { assert stateRegistry != null : "Automaton already built."; assert previous == null || comparator.compare(previous, current) <= 0 : "Input must be in sorted UTF-8 order: " + previous + " >= " + current; assert setPrevious(current); // Descend in the automaton (find matching prefix). int pos = 0, max = current.length(); State next, state = root; while (pos < max && (next = state.lastChild(Character.codePointAt(current, pos))) != null) { state = next; // todo, optimize me pos += Character.charCount(Character.codePointAt(current, pos)); } if (state.hasChildren()) replaceOrRegister(state); addSuffix(state, current, pos); }
/** * Add another character sequence to this automaton. The sequence must be * lexicographically larger or equal compared to any previous sequences added * to this automaton (the input must be sorted). */ public void add(CharsRef current) { assert stateRegistry != null : "Automaton already built."; assert previous == null || comparator.compare(previous, current) <= 0 : "Input must be in sorted UTF-8 order: " + previous + " >= " + current; assert setPrevious(current); // Descend in the automaton (find matching prefix). int pos = 0, max = current.length(); State next, state = root; while (pos < max && (next = state.lastChild(Character.codePointAt(current, pos))) != null) { state = next; // todo, optimize me pos += Character.charCount(Character.codePointAt(current, pos)); } if (state.hasChildren()) replaceOrRegister(state); addSuffix(state, current, pos); }
/** * Two states are equal if: * <ul> * <li>they have an identical number of outgoing transitions, labeled with * the same labels</li> * <li>corresponding outgoing transitions lead to the same states (to states * with an identical right-language). * </ul> */ @Override public boolean equals(Object obj) { final State other = (State) obj; return is_final == other.is_final && Arrays.equals(this.labels, other.labels) && referenceEquals(this.states, other.states); }
/** * Two states are equal if: * <ul> * <li>they have an identical number of outgoing transitions, labeled with * the same labels</li> * <li>corresponding outgoing transitions lead to the same states (to states * with an identical right-language). * </ul> */ @Override public boolean equals(Object obj) { final State other = (State) obj; return is_final == other.is_final && Arrays.equals(this.labels, other.labels) && referenceEquals(this.states, other.states); }
/** * Add a suffix of <code>current</code> starting at <code>fromIndex</code> * (inclusive) to state <code>state</code>. */ private void addSuffix(State state, CharSequence current, int fromIndex) { final int len = current.length(); while (fromIndex < len) { int cp = Character.codePointAt(current, fromIndex); state = state.newState(cp); fromIndex += Character.charCount(cp); } state.is_final = true; } }
/** * Return the associated state if the most recent transition is labeled with * <code>label</code>. */ State lastChild(int label) { final int index = labels.length - 1; State s = null; if (index >= 0 && labels[index] == label) { s = states[index]; } assert s == getState(label); return s; }
/** * Two states are equal if: * <ul> * <li>they have an identical number of outgoing transitions, labeled with * the same labels</li> * <li>corresponding outgoing transitions lead to the same states (to states * with an identical right-language). * </ul> */ @Override public boolean equals(Object obj) { final State other = (State) obj; return is_final == other.is_final && Arrays.equals(this.labels, other.labels) && referenceEquals(this.states, other.states); }
/** * Add a suffix of <code>current</code> starting at <code>fromIndex</code> * (inclusive) to state <code>state</code>. */ private void addSuffix(State state, CharSequence current, int fromIndex) { final int len = current.length(); while (fromIndex < len) { int cp = Character.codePointAt(current, fromIndex); state = state.newState(cp); fromIndex += Character.charCount(cp); } state.is_final = true; } }
/** * Add a suffix of <code>current</code> starting at <code>fromIndex</code> * (inclusive) to state <code>state</code>. */ private void addSuffix(State state, CharSequence current, int fromIndex) { final int len = current.length(); while (fromIndex < len) { int cp = Character.codePointAt(current, fromIndex); state = state.newState(cp); fromIndex += Character.charCount(cp); } state.is_final = true; } }
/** * Return the associated state if the most recent transition is labeled with * <code>label</code>. */ State lastChild(int label) { final int index = labels.length - 1; State s = null; if (index >= 0 && labels[index] == label) { s = states[index]; } assert s == getState(label); return s; }
/** * Return the associated state if the most recent transition is labeled with * <code>label</code>. */ State lastChild(int label) { final int index = labels.length - 1; State s = null; if (index >= 0 && labels[index] == label) { s = states[index]; } assert s == getState(label); return s; }
/** * Finalize the automaton and return the root state. No more strings can be * added to the builder after this call. * * @return Root automaton state. */ public State complete() { if (this.stateRegistry == null) throw new IllegalStateException(); if (root.hasChildren()) replaceOrRegister(root); stateRegistry = null; return root; }
/** * Replace the last added outgoing transition's target state with the given * state. */ void replaceLastChild(State state) { assert hasChildren() : "No outgoing transitions."; states[states.length - 1] = state; }
/** * Replace last child of <code>state</code> with an already registered state * or stateRegistry the last child state. */ private void replaceOrRegister(State state) { final State child = state.lastChild(); if (child.hasChildren()) replaceOrRegister(child); final State registered = stateRegistry.get(child); if (registered != null) { state.replaceLastChild(registered); } else { stateRegistry.put(child, child); } }
/** * Replace the last added outgoing transition's target state with the given * state. */ void replaceLastChild(State state) { assert hasChildren() : "No outgoing transitions."; states[states.length - 1] = state; }
/** * Finalize the automaton and return the root state. No more strings can be * added to the builder after this call. * * @return Root automaton state. */ public State complete() { if (this.stateRegistry == null) throw new IllegalStateException(); if (root.hasChildren()) replaceOrRegister(root); stateRegistry = null; return root; }