try { Automaton lookupAutomaton = toLookupAutomaton(key); List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = FSTUtil.intersectPrefixPaths(convertAutomaton(lookupAutomaton), fst); if (sameSurfaceForm(utf8Key, output2)) { results.add(getLookupResult(completion.output.output1, output2, spare)); break; prefixPaths = getFullPrefixPaths(prefixPaths, lookupAutomaton, fst); LookupResult result = getLookupResult(completion.output.output1, completion.output.output2, spare);
private LookupResult getLookupResult(Long output1, BytesRef output2, CharsRefBuilder spare) { LookupResult result; if (hasPayloads) { int sepIndex = -1; for(int i=0;i<output2.length;i++) { if (output2.bytes[output2.offset+i] == payloadSep) { sepIndex = i; break; } } assert sepIndex != -1; final int payloadLen = output2.length - sepIndex - 1; spare.copyUTF8Bytes(output2.bytes, output2.offset, sepIndex); BytesRef payload = new BytesRef(payloadLen); System.arraycopy(output2.bytes, sepIndex+1, payload.bytes, 0, payloadLen); payload.length = payloadLen; result = new LookupResult(spare.toString(), decodeWeight(output1), payload); } else { spare.copyUTF8Bytes(output2); result = new LookupResult(spare.toString(), decodeWeight(output1)); } return result; }
public void addSurface(BytesRef surface, BytesRef payload, long cost) throws IOException { int surfaceIndex = -1; long encodedWeight = cost == -1 ? cost : encodeWeight(cost);
final Automaton toAutomaton(TokenStream ts, final TokenStreamToAutomaton ts2a) throws IOException { // Create corresponding automaton: labels are bytes // from each analyzed token, with byte 0 used as // separator between tokens: Automaton automaton = ts2a.toAutomaton(ts); automaton = replaceSep(automaton); automaton = convertAutomaton(automaton); // TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings // assert SpecialOperations.isFinite(automaton); // Get all paths from the automaton (there can be // more than one path, eg if the analyzer created a // graph using SynFilter or WDF): return automaton; }
final Automaton toLookupAutomaton(final CharSequence key) throws IOException { // TODO: is there a Reader from a CharSequence? // Turn tokenstream into automaton: Automaton automaton = null; try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) { automaton = getTokenStreamToAutomaton().toAutomaton(ts); } automaton = replaceSep(automaton); // TODO: we can optimize this somewhat by determinizing // while we convert // This automaton should not blow up during determinize: automaton = Operations.determinize(automaton, Integer.MAX_VALUE); return automaton; }
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); Automaton automaton; try (TokenStream ts = stream) { automaton = toAutomaton(ts, ts2a); } LimitedFiniteStringsIterator finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions); Set<IntsRef> set = new HashSet<>(); for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) { set.add(IntsRef.deepCopyOf(string)); } return Collections.unmodifiableSet(set); }
@Override public void build(InputIterator iterator) throws IOException { String prefix = getClass().getSimpleName(); Directory tempDir = getTempDir(); OfflineSorter sorter = new OfflineSorter(tempDir, prefix, new AnalyzingComparator(hasPayloads)); TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); String tempSortedFileName = null; new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions); for (IntsRef string; (string = finiteStrings.next()) != null; count++) { Util.toBytesRef(string, scratch); output.writeInt(encodeWeight(iterator.weight()));
BytesRefBuilder scratch = new BytesRefBuilder(); TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions); for (IntsRef string; (string = finiteStrings.next()) != null; count++) { Util.toBytesRef(string, scratch); output.writeInt(encodeWeight(iterator.weight()));
@Override public TokenStreamToAutomaton getTokenStreamToAutomaton() { final TokenStreamToAutomaton tsta = super.getTokenStreamToAutomaton(); tsta.setUnicodeArcs(unicodeAware); return tsta; }
final Automaton toAutomaton(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException { try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) { return toAutomaton(ts, ts2a); } }
@Override protected Automaton convertAutomaton(Automaton a) { if (unicodeAware) { // FLORIAN EDIT: get converted Automaton from superclass Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a)); // This automaton should not blow up during determinize: utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE); return utf8automaton; } else { return super.convertAutomaton(a); } }
@Override public void build(InputIterator iterator) throws IOException { String prefix = getClass().getSimpleName(); Directory tempDir = getTempDir(); OfflineSorter sorter = new OfflineSorter(tempDir, prefix, new AnalyzingComparator(hasPayloads)); TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); String tempSortedFileName = null; new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions); for (IntsRef string; (string = finiteStrings.next()) != null; count++) { Util.toBytesRef(string, scratch); output.writeInt(encodeWeight(iterator.weight()));
public Set<IntsRef> toFiniteStrings(TokenStream stream) throws IOException { final TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); Automaton automaton; try (TokenStream ts = stream) { automaton = toAutomaton(ts, ts2a); } LimitedFiniteStringsIterator finiteStrings = new LimitedFiniteStringsIterator(automaton, maxGraphExpansions); Set<IntsRef> set = new HashSet<>(); for (IntsRef string = finiteStrings.next(); string != null; string = finiteStrings.next()) { set.add(IntsRef.deepCopyOf(string)); } return Collections.unmodifiableSet(set); }
final Automaton toAutomaton(TokenStream ts, final TokenStreamToAutomaton ts2a) throws IOException { // Create corresponding automaton: labels are bytes // from each analyzed token, with byte 0 used as // separator between tokens: Automaton automaton = ts2a.toAutomaton(ts); automaton = replaceSep(automaton); automaton = convertAutomaton(automaton); // TODO: LUCENE-5660 re-enable this once we disallow massive suggestion strings // assert SpecialOperations.isFinite(automaton); // Get all paths from the automaton (there can be // more than one path, eg if the analyzer created a // graph using SynFilter or WDF): return automaton; }
final Automaton toLookupAutomaton(final CharSequence key) throws IOException { // TODO: is there a Reader from a CharSequence? // Turn tokenstream into automaton: Automaton automaton = null; try (TokenStream ts = queryAnalyzer.tokenStream("", key.toString())) { automaton = getTokenStreamToAutomaton().toAutomaton(ts); } automaton = replaceSep(automaton); // TODO: we can optimize this somewhat by determinizing // while we convert // This automaton should not blow up during determinize: automaton = Operations.determinize(automaton, Integer.MAX_VALUE); return automaton; }
@Override public TokenStreamToAutomaton getTokenStreamToAutomaton() { final TokenStreamToAutomaton tsta = super.getTokenStreamToAutomaton(); tsta.setUnicodeArcs(unicodeAware); return tsta; }
final Automaton toAutomaton(final BytesRef surfaceForm, final TokenStreamToAutomaton ts2a) throws IOException { try (TokenStream ts = indexAnalyzer.tokenStream("", surfaceForm.utf8ToString())) { return toAutomaton(ts, ts2a); } }
@Override protected Automaton convertAutomaton(Automaton a) { if (unicodeAware) { // FLORIAN EDIT: get converted Automaton from superclass Automaton utf8automaton = new UTF32ToUTF8().convert(super.convertAutomaton(a)); // This automaton should not blow up during determinize: utf8automaton = Operations.determinize(utf8automaton, Integer.MAX_VALUE); return utf8automaton; } else { return super.convertAutomaton(a); } }
try { Automaton lookupAutomaton = toLookupAutomaton(key); List<FSTUtil.Path<Pair<Long,BytesRef>>> prefixPaths = FSTUtil.intersectPrefixPaths(convertAutomaton(lookupAutomaton), fst); if (sameSurfaceForm(utf8Key, output2)) { results.add(getLookupResult(completion.output.output1, output2, spare)); break; prefixPaths = getFullPrefixPaths(prefixPaths, lookupAutomaton, fst); LookupResult result = getLookupResult(completion.output.output1, completion.output.output2, spare);
@Override public void build(InputIterator iterator) throws IOException { String prefix = getClass().getSimpleName(); Directory tempDir = getTempDir(); OfflineSorter sorter = new OfflineSorter(tempDir, prefix, new AnalyzingComparator(hasPayloads)); TokenStreamToAutomaton ts2a = getTokenStreamToAutomaton(); String tempSortedFileName = null; new LimitedFiniteStringsIterator(toAutomaton(surfaceForm, ts2a), maxGraphExpansions); for (IntsRef string; (string = finiteStrings.next()) != null; count++) { Util.toBytesRef(string, scratch); output.writeInt(encodeWeight(iterator.weight()));