@Override public FuzzySuggester getSuggester(Analyzer indexAnalyzer, Analyzer queryAnalyzer, ShardSuggestService.FieldType fieldType) throws Exception { FuzzySuggester fuzzySuggester = new FuzzySuggester(indexAnalyzer, queryAnalyzer, FuzzySuggester.EXACT_FIRST | FuzzySuggester.PRESERVE_SEP, 256, -1, fieldType.preservePositionIncrements(), FuzzySuggester.DEFAULT_MAX_EDITS, FuzzySuggester.DEFAULT_TRANSPOSITIONS, FuzzySuggester.DEFAULT_NON_FUZZY_PREFIX, FuzzySuggester.DEFAULT_MIN_FUZZY_LENGTH, FuzzySuggester.DEFAULT_UNICODE_AWARE); fuzzySuggester.build(dictCache.getUnchecked(fieldType.field())); return fuzzySuggester; } }
private List<String> getUsingFuzzySuggester(String searchQuery) throws IOException { List<LookupResult> imResult = fuzzySuggester.lookup(searchQuery, false, 2); List<String> result = new ArrayList<>(); imResult.forEach(r -> result.add(r.key.toString())); return result; }
@Override protected List<FSTUtil.Path<Pair<Long,BytesRef>>> getFullPrefixPaths(List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths, Automaton lookupAutomaton, FST<Pair<Long,BytesRef>> fst) throws IOException { // TODO: right now there's no penalty for fuzzy/edits, // ie a completion whose prefix matched exactly what the // user typed gets no boost over completions that // required an edit, which get no boost over completions // requiring two edits. I suspect a multiplicative // factor is appropriate (eg, say a fuzzy match must be at // least 2X better weight than the non-fuzzy match to // "compete") ... in which case I think the wFST needs // to be log weights or something ... Automaton levA = convertAutomaton(toLevenshteinAutomata(lookupAutomaton)); /* Writer w = new OutputStreamWriter(new FileOutputStream("out.dot"), StandardCharsets.UTF_8); w.write(levA.toDot()); w.close(); System.out.println("Wrote LevA to out.dot"); */ return FSTUtil.intersectPrefixPaths(levA, fst); }
public ShardSuggestStatisticsResponse getStatistics() { ShardSuggestStatisticsResponse shardSuggestStatisticsResponse = new ShardSuggestStatisticsResponse(shardId()); for (FieldType fieldType : analyzingSuggesterCache.asMap().keySet()) { long sizeInBytes = analyzingSuggesterCache.getIfPresent(fieldType).ramBytesUsed(); FstStats.FstIndexShardStats fstIndexShardStats = new FstStats.FstIndexShardStats(shardId, "analyzingsuggester", fieldType, sizeInBytes); shardSuggestStatisticsResponse.getFstIndexShardStats().add(fstIndexShardStats); } for (FieldType fieldType : fuzzySuggesterCache.asMap().keySet()) { long sizeInBytes = fuzzySuggesterCache.getIfPresent(fieldType).ramBytesUsed(); FstStats.FstIndexShardStats fstIndexShardStats = new FstStats.FstIndexShardStats(shardId, "fuzzysuggester", fieldType, sizeInBytes); shardSuggestStatisticsResponse.getFstIndexShardStats().add(fstIndexShardStats); } return shardSuggestStatisticsResponse; }
@Override public void indexingDone() { try { spellChecker = new DirectSpellChecker(); spellChecker.setMaxEdits(2); spellChecker.setAccuracy(0.1f); spellChecker.setMinPrefix(0); reader = DirectoryReader.open(writer); fuzzySuggester = new FuzzySuggester(directory, "", writer.getAnalyzer()); Dictionary dict = new DocumentValueSourceDictionary(reader, WORD_FIELD, new LongValuesSource() { @Override public boolean needsScores() { return false; } @Override public LongValues getValues(LeafReaderContext ctx, DoubleValues scores) throws IOException { return null; } }); fuzzySuggester.build(dict); writer.close(); searcher = new IndexSearcher(DirectoryReader.open(directory)); } catch (IOException e) { throw new RuntimeException(e); } }
private Collection<String> getSuggestions(ShardSuggestRequest shardSuggestRequest) throws IOException { List<LookupResult> lookupResults = Lists.newArrayList(); if ("full".equals(shardSuggestRequest.suggestType())) { AnalyzingSuggester analyzingSuggester = analyzingSuggesterCache.getUnchecked(new FieldType(shardSuggestRequest)); lookupResults.addAll(analyzingSuggester.lookup(shardSuggestRequest.term(), false, shardSuggestRequest.size())); } else if ("fuzzy".equals(shardSuggestRequest.suggestType())) { lookupResults.addAll(fuzzySuggesterCache.getUnchecked(new FieldType(shardSuggestRequest)) .lookup(shardSuggestRequest.term(), false, shardSuggestRequest.size())); } else { lookupResults.addAll(lookupCache.getUnchecked(shardSuggestRequest.field()) .lookup(shardSuggestRequest.term(), true, shardSuggestRequest.size() + 1)); Collection<String> suggestions = Collections2.transform(lookupResults, new LookupResultToStringFunction()); float similarity = shardSuggestRequest.similarity(); if (similarity < 1.0f && suggestions.size() < shardSuggestRequest.size()) { suggestions = Lists.newArrayList(suggestions); suggestions.addAll(getSimilarSuggestions(shardSuggestRequest)); } return suggestions; } return Collections2.transform(lookupResults, new LookupResultToStringFunction()); }