new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
LevenshteinAutomata lev = new LevenshteinAutomata( ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions); subs.add(lev.toAutomaton(maxEdits, UnicodeUtil.newString(string.ints, string.offset, nonFuzzyPrefix)));
@Override public Automaton getAutomaton(Options schema) { Properties properties = schema.getProperties(field); String message; Type fieldType = properties != null ? properties.getType() : Type.text; if (fieldType == Type.string || fieldType == Type.text) { String analyzedValue = analyze(field, value, schema.analyzer); LevenshteinAutomata levenshteinAutomata = new LevenshteinAutomata(analyzedValue, transpositions); return levenshteinAutomata.toAutomaton(maxEdits); } message = String.format("Fuzzy queries cannot be supported for field type %s", fieldType); throw new UnsupportedOperationException(message); }
/** initialize levenshtein DFAs up to maxDistance, if possible */ private List<CompiledAutomaton> initAutomata(int maxDistance) { final List<CompiledAutomaton> runAutomata = dfaAtt.automata(); //System.out.println("cached automata size: " + runAutomata.size()); if (runAutomata.size() <= maxDistance && maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { LevenshteinAutomata builder = new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions); String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength); for (int i = runAutomata.size(); i <= maxDistance; i++) { Automaton a = builder.toAutomaton(i, prefix); //System.out.println("compute automaton n=" + i); runAutomata.add(new CompiledAutomaton(a, true, false)); } } return runAutomata; }
/** initialize levenshtein DFAs up to maxDistance, if possible */ private List<CompiledAutomaton> initAutomata(int maxDistance) { final List<CompiledAutomaton> runAutomata = dfaAtt.automata(); //System.out.println("cached automata size: " + runAutomata.size()); if (runAutomata.size() <= maxDistance && maxDistance <= LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { LevenshteinAutomata builder = new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions); String prefix = UnicodeUtil.newString(termText, 0, realPrefixLength); for (int i = runAutomata.size(); i <= maxDistance; i++) { Automaton a = builder.toAutomaton(i, prefix); //System.out.println("compute automaton n=" + i); runAutomata.add(new CompiledAutomaton(a, true, false)); } } return runAutomata; }
LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions); subs.add(lev.toAutomaton(maxEdits, UnicodeUtil.newString(string.ints, string.offset, nonFuzzyPrefix)));
LevenshteinAutomata lev = new LevenshteinAutomata( ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions); subs.add(lev.toAutomaton(maxEdits, UnicodeUtil.newString(string.ints, string.offset, nonFuzzyPrefix)));
LevenshteinAutomata lev = new LevenshteinAutomata( ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions); subs.add(lev.toAutomaton(maxEdits, UnicodeUtil.newString(string.ints, string.offset, nonFuzzyPrefix)));
LevenshteinAutomata lev = new LevenshteinAutomata( ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions); subs.add(lev.toAutomaton(maxEdits, UnicodeUtil.newString(string.ints, string.offset, nonFuzzyPrefix)));
LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions); subs.add(lev.toAutomaton(maxEdits, UnicodeUtil.newString(string.ints, string.offset, nonFuzzyPrefix)));
LevenshteinAutomata lev = new LevenshteinAutomata(ints, unicodeAware ? Character.MAX_CODE_POINT : 255, transpositions);
new LevenshteinAutomata(UnicodeUtil.newString(termText, realPrefixLength, termText.length - realPrefixLength), transpositions);
int prefixLength = Math.min(fq.getPrefixLength(), termLength); String suffix = UnicodeUtil.newString(termText, prefixLength, termText.length - prefixLength); LevenshteinAutomata builder = new LevenshteinAutomata(suffix, fq.getTranspositions()); String prefix = UnicodeUtil.newString(termText, 0, prefixLength); Automaton automaton = builder.toAutomaton(fq.getMaxEdits(), prefix);
protected void flattenQuery(FuzzyQuery query, float pathBoost, Object sourceOverride, IndexReader reader, Callback callback) { float boost = pathBoost; if (query.getMaxEdits() == 0) { callback.flattened(query.getTerm().bytes(), boost, sourceOverride); } String term = query.getTerm().bytes().utf8ToString(); if (query.getPrefixLength() >= term.length()) { callback.flattened(query.getTerm().bytes(), boost, sourceOverride); return; } FuzzyQueryInfo key = new FuzzyQueryInfo(term, query); if (!sentAutomata.add(key)) { return; } // Make an effort to resolve the fuzzy query to an automata String fuzzed = term.substring(query.getPrefixLength()); int editDistance = query.getMaxEdits(); if (editDistance > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { editDistance = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE; } LevenshteinAutomata automata = new LevenshteinAutomata(fuzzed, query.getTranspositions()); Automaton automaton = automata.toAutomaton(editDistance); if (query.getPrefixLength() > 0) { Automaton prefix = Automata.makeString(term.substring(0, query.getPrefixLength())); automaton = Operations.concatenate(prefix, automaton); } Object source = sourceOverride == null ? key : sourceOverride; callback.flattened(automaton, boost, source.hashCode()); }