/** expert: if isBinary is true, the input is already byte-based */ public ByteRunAutomaton(Automaton a, boolean isBinary, int maxDeterminizedStates) { super(isBinary ? a : new UTF32ToUTF8().convert(a), 256, maxDeterminizedStates); }
} else { binary = new UTF32ToUTF8().convert(automaton);
@Override protected Automaton convertAutomaton(Automaton a) { if (unicodeAware) { // FLORIAN EDIT: get converted Automaton from superclass Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a)); // This automaton should not blow up during determinize: utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE); return utf8automaton; } else { return super.convertAutomaton(a); } }
/** expert: if isBinary is true, the input is already byte-based */ public ByteRunAutomaton(Automaton a, boolean isBinary, int maxDeterminizedStates) { super(isBinary ? a : new UTF32ToUTF8().convert(a), 256, maxDeterminizedStates); }
/** expert: if utf8 is true, the input is already byte-based */ public ByteRunAutomaton(Automaton a, boolean isBinary, int maxDeterminizedStates) { super(isBinary ? a : new UTF32ToUTF8().convert(a), 256, true, maxDeterminizedStates); }
/** expert: if utf8 is true, the input is already byte-based */ public ByteRunAutomaton(Automaton a, boolean isBinary, int maxDeterminizedStates) { super(isBinary ? a : new UTF32ToUTF8().convert(a), 256, true, maxDeterminizedStates); }
@Override protected Automaton convertAutomaton(Automaton a) { if (unicodeAware) { Automaton utf8automaton = new UTF32ToUTF8().convert(a); utf8automaton = Operations.determinize(utf8automaton, DEFAULT_MAX_DETERMINIZED_STATES); return utf8automaton; } else { return a; } }
@Override protected Automaton convertAutomaton(Automaton a) { if (unicodeAware) { // FLORIAN EDIT: get converted Automaton from superclass Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a)); // This automaton should not blow up during determinize: utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE); return utf8automaton; } else { return super.convertAutomaton(a); } }
@Override protected Automaton convertAutomaton(Automaton a) { if (unicodeAware) { // FLORIAN EDIT: get converted Automaton from superclass Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a)); // This automaton should not blow up during determinize: utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE); return utf8automaton; } else { return super.convertAutomaton(a); } }
@Override protected Automaton convertAutomaton(Automaton a) { if (unicodeAware) { // FLORIAN EDIT: get converted Automaton from superclass Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a)); // This automaton should not blow up during determinize: utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE); return utf8automaton; } else { return super.convertAutomaton(a); } }
@Override protected Automaton convertAutomaton(Automaton a) { if (unicodeAware) { // FLORIAN EDIT: get converted Automaton from superclass Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a)); // This automaton should not blow up during determinize: utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE); return utf8automaton; } else { return super.convertAutomaton(a); } }
@Override public Weight createWeight(IndexSearcher searcher, boolean needsScores) throws IOException { CompletionTokenStream stream = (CompletionTokenStream) analyzer.tokenStream(getField(), getTerm().text()); Set<IntsRef> refs = new HashSet<>(); Automaton automaton = toLevenshteinAutomata(stream.toAutomaton(unicodeAware), refs); if (unicodeAware) { Automaton utf8automaton = new UTF32ToUTF8().convert(automaton); utf8automaton = Operations.determinize(utf8automaton, maxDeterminizedStates); automaton = utf8automaton; } // TODO Accumulating all refs is bad, because the resulting set may be very big. // TODO Better iterate over automaton again inside FuzzyCompletionWeight? return new FuzzyCompletionWeight(this, automaton, refs); }
} else { binary = new UTF32ToUTF8().convert(automaton);
} else { binary = new UTF32ToUTF8().convert(automaton);
} else { binary = new UTF32ToUTF8().convert(automaton);