@Override protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths( List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton, FST<PairOutputs.Pair<Long,BytesRef>> fst) throws IOException { // TODO: right now there's no penalty for fuzzy/edits, // ie a completion whose prefix matched exactly what the // user typed gets no boost over completions that // required an edit, which get no boost over completions // requiring two edits. I suspect a multiplicative // factor is appropriate (eg, say a fuzzy match must be at // least 2X better weight than the non-fuzzy match to // "compete") ... in which case I think the wFST needs // to be log weights or something ... Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton)); /* Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); w.write(levA.toDot()); w.close(); System.out.println("Wrote LevA to out.dot"); */ return FSTUtil.intersectPrefixPaths(levA, fst); }
@Override public Lookup getLookup(CompletionFieldMapper2x.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext) { AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.name()); if (analyzingSuggestHolder == null) { return null; } int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0; final XAnalyzingSuggester suggester; final Automaton queryPrefix = fieldType.requiresContext() ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null; final FuzzyOptions fuzzyOptions = suggestionContext.getFuzzyOptions(); if (fuzzyOptions != null) { suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, fuzzyOptions.getEditDistance(), fuzzyOptions.isTranspositions(), fuzzyOptions.getFuzzyPrefixLength(), fuzzyOptions.getFuzzyMinLength(), fuzzyOptions.isUnicodeAware(), analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); } else { suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); } return suggester; }
@Override public Lookup getLookup(CompletionFieldMapper.CompletionFieldType fieldType, CompletionSuggestionContext suggestionContext) { AnalyzingSuggestHolder analyzingSuggestHolder = lookupMap.get(fieldType.names().indexName()); if (analyzingSuggestHolder == null) { return null; } int flags = analyzingSuggestHolder.getPreserveSeparator() ? XAnalyzingSuggester.PRESERVE_SEP : 0; final XAnalyzingSuggester suggester; final Automaton queryPrefix = fieldType.requiresContext() ? ContextQuery.toAutomaton(analyzingSuggestHolder.getPreserveSeparator(), suggestionContext.getContextQueries()) : null; if (suggestionContext.isFuzzy()) { suggester = new XFuzzySuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, suggestionContext.getFuzzyEditDistance(), suggestionContext.isFuzzyTranspositions(), suggestionContext.getFuzzyPrefixLength(), suggestionContext.getFuzzyMinLength(), suggestionContext.isFuzzyUnicodeAware(), analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); } else { suggester = new XAnalyzingSuggester(fieldType.indexAnalyzer(), queryPrefix, fieldType.searchAnalyzer(), flags, analyzingSuggestHolder.maxSurfaceFormsPerAnalyzedForm, analyzingSuggestHolder.maxGraphExpansions, analyzingSuggestHolder.preservePositionIncrements, analyzingSuggestHolder.fst, analyzingSuggestHolder.hasPayloads, analyzingSuggestHolder.maxAnalyzedPathsForOneInput, analyzingSuggestHolder.sepLabel, analyzingSuggestHolder.payloadSep, analyzingSuggestHolder.endByte, analyzingSuggestHolder.holeCharacter); } return suggester; }
@Override protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths( List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton, FST<PairOutputs.Pair<Long,BytesRef>> fst) throws IOException { // TODO: right now there's no penalty for fuzzy/edits, // ie a completion whose prefix matched exactly what the // user typed gets no boost over completions that // required an edit, which get no boost over completions // requiring two edits. I suspect a multiplicative // factor is appropriate (eg, say a fuzzy match must be at // least 2X better weight than the non-fuzzy match to // "compete") ... in which case I think the wFST needs // to be log weights or something ... Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton)); /* Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); w.write(levA.toDot()); w.close(); System.out.println("Wrote LevA to out.dot"); */ return FSTUtil.intersectPrefixPaths(levA, fst); }
@Override protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths( List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton, FST<PairOutputs.Pair<Long,BytesRef>> fst) throws IOException { // TODO: right now there's no penalty for fuzzy/edits, // ie a completion whose prefix matched exactly what the // user typed gets no boost over completions that // required an edit, which get no boost over completions // requiring two edits. I suspect a multiplicative // factor is appropriate (eg, say a fuzzy match must be at // least 2X better weight than the non-fuzzy match to // "compete") ... in which case I think the wFST needs // to be log weights or something ... Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton)); /* Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); w.write(levA.toDot()); w.close(); System.out.println("Wrote LevA to out.dot"); */ return FSTUtil.intersectPrefixPaths(levA, fst); }
@Override protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths( List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton, FST<PairOutputs.Pair<Long,BytesRef>> fst) throws IOException { // TODO: right now there's no penalty for fuzzy/edits, // ie a completion whose prefix matched exactly what the // user typed gets no boost over completions that // required an edit, which get no boost over completions // requiring two edits. I suspect a multiplicative // factor is appropriate (eg, say a fuzzy match must be at // least 2X better weight than the non-fuzzy match to // "compete") ... in which case I think the wFST needs // to be log weights or something ... Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton)); /* Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); w.write(levA.toDot()); w.close(); System.out.println("Wrote LevA to out.dot"); */ return FSTUtil.intersectPrefixPaths(levA, fst); }
@Override protected List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<PairOutputs.Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton, FST<PairOutputs.Pair<Long,BytesRef>> fst) throws IOException { // TODO: right now there's no penalty for fuzzy/edits, // ie a completion whose prefix matched exactly what the // user typed gets no boost over completions that // required an edit, which get no boost over completions // requiring two edits. I suspect a multiplicative // factor is appropriate (eg, say a fuzzy match must be at // least 2X better weight than the non-fuzzy match to // "compete") ... in which case I think the wFST needs // to be log weights or something ... Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton)); /* Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), "UTF-8"); w.write(levA.toDot()); w.close(); System.out.println("Wrote LevA to out.dot"); */ return FSTUtil.intersectPrefixPaths(levA, fst); }