/** Starting from node, find the top N min cost * completions to a final node. */ public static <T> TopResults<T> shortestPaths(FST<T> fst, FST.Arc<T> fromNode, T startOutput, Comparator<T> comparator, int topN, boolean allowEmptyString) throws IOException { // All paths are kept, so we can pass topN for // maxQueueDepth and the pruning is admissible: TopNSearcher<T> searcher = new TopNSearcher<>(fst, topN, topN, comparator); // since this search is initialized with a single start node // it is okay to start with an empty input path here searcher.addStartPaths(fromNode, startOutput, allowEmptyString, new IntsRefBuilder()); return searcher.search(); }
/** If this automaton accepts a single input, return it. Else, return null. * The automaton must be deterministic. */ public static IntsRef getSingleton(Automaton a) { if (a.isDeterministic() == false) { throw new IllegalArgumentException("input automaton must be deterministic"); } IntsRefBuilder builder = new IntsRefBuilder(); HashSet<Integer> visited = new HashSet<>(); int s = 0; Transition t = new Transition(); while (true) { visited.add(s); if (a.isAccept(s) == false) { if (a.getNumTransitions(s) == 1) { a.getTransition(s, 0, t); if (t.min == t.max && !visited.contains(t.dest)) { builder.append(t.min); s = t.dest; continue; } } } else if (a.getNumTransitions(s) == 0) { return builder.get(); } // Automaton accepts more than one string: return null; } }
/** * Constructor. * * @param a Automaton to create finite string from. * @param startState The starting state for each path. * @param endState The state where each path should stop or -1 if only accepted states should be final. */ public FiniteStringsIterator(Automaton a, int startState, int endState) { this.a = a; this.endState = endState; this.nodes = new PathNode[16]; for (int i = 0, end = nodes.length; i < end; i++) { nodes[i] = new PathNode(); } this.string = new IntsRefBuilder(); this.pathStates = new BitSet(a.getNumStates()); this.string.setLength(0); this.emitEmptyString = a.isAccept(0); // Start iteration with node startState. if (a.getNumTransitions(startState) > 0) { pathStates.set(startState); nodes[0].resetState(a, startState); string.append(startState); } }
/** Reverse lookup (lookup by output instead of by input), * in the special case when your FSTs outputs are * strictly ascending. This locates the input/output * pair where the output is equal to the target, and will * return null if that output does not exist. * * <p>NOTE: this only works with {@code FST<Long>}, only * works when the outputs are ascending in order with * the inputs. * For example, simple ordinals (0, 1, * 2, ...), or file offets (when appending to a file) * fit this. */ public static IntsRef getByOutput(FST<Long> fst, long targetOutput) throws IOException { final BytesReader in = fst.getBytesReader(); // TODO: would be nice not to alloc this on every lookup FST.Arc<Long> arc = fst.getFirstArc(new FST.Arc<Long>()); FST.Arc<Long> scratchArc = new FST.Arc<>(); final IntsRefBuilder result = new IntsRefBuilder(); return getByOutput(fst, targetOutput, in, arc, scratchArc, result); }
IntsRefBuilder newInput = new IntsRefBuilder(); newInput.copyInts(path.input.get()); newInput.append(path.arc.label);
private FST<CharsRef> parseConversions(LineNumberReader reader, int num) throws IOException, ParseException { Map<String,String> mappings = new TreeMap<>(); for (int i = 0; i < num; i++) { String line = reader.readLine(); String parts[] = line.split("\\s+"); if (parts.length != 3) { throw new ParseException("invalid syntax: " + line, reader.getLineNumber()); } if (mappings.put(parts[1], parts[2]) != null) { throw new IllegalStateException("duplicate mapping specified for: " + parts[1]); } } Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton(); Builder<CharsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE2, outputs); IntsRefBuilder scratchInts = new IntsRefBuilder(); for (Map.Entry<String,String> entry : mappings.entrySet()) { Util.toUTF16(entry.getKey(), scratchInts); builder.add(scratchInts.get(), new CharsRef(entry.getValue())); } return builder.finish(); }
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException { IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton(); Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs); IntsRefBuilder scratch = new IntsRefBuilder(); for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) { Util.toUTF32(entry.getKey(), scratch); List<Integer> entries = entry.getValue(); IntsRef output = new IntsRef(entries.size()); for (Integer c : entries) { output.ints[output.length++] = c; } builder.add(scratch.get(), output); } return builder.finish(); }
/** Builds the NormalizeCharMap; call this once you * are done calling {@link #add}. */ public NormalizeCharMap build() { final FST<CharsRef> map; try { final Outputs<CharsRef> outputs = CharSequenceOutputs.getSingleton(); final org.apache.lucene.util.fst.Builder<CharsRef> builder = new org.apache.lucene.util.fst.Builder<>(FST.INPUT_TYPE.BYTE2, outputs); final IntsRefBuilder scratch = new IntsRefBuilder(); for(Map.Entry<String,String> ent : pendingPairs.entrySet()) { builder.add(Util.toUTF16(ent.getKey(), scratch), new CharsRef(ent.getValue())); } map = builder.finish(); pendingPairs.clear(); } catch (IOException ioe) { // Bogus FST IOExceptions!! (will never happen) throw new RuntimeException(ioe); } return new NormalizeCharMap(map); } }
Arrays.sort(sortedKeys, CharsRef.getUTF16SortedAsUTF8Comparator()); final IntsRefBuilder scratchIntsRef = new IntsRefBuilder();
IntsRefBuilder scratchInts = new IntsRefBuilder(); IntsRefBuilder currentOrds = new IntsRefBuilder(); currentOrds = new IntsRefBuilder(); // must be this way
/** * Returns an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter} * @return an {@link StemmerOverrideMap} to be used with the {@link StemmerOverrideFilter} * @throws IOException if an {@link IOException} occurs; */ public StemmerOverrideMap build() throws IOException { ByteSequenceOutputs outputs = ByteSequenceOutputs.getSingleton(); org.apache.lucene.util.fst.Builder<BytesRef> builder = new org.apache.lucene.util.fst.Builder<>( FST.INPUT_TYPE.BYTE4, outputs); final int[] sort = hash.sort(); IntsRefBuilder intsSpare = new IntsRefBuilder(); final int size = hash.size(); BytesRef spare = new BytesRef(); for (int i = 0; i < size; i++) { int id = sort[i]; BytesRef bytesRef = hash.get(id, spare); intsSpare.copyUTF8Bytes(bytesRef); builder.add(intsSpare.get(), new BytesRef(outputValues.get(id))); } return new StemmerOverrideMap(builder.finish(), ignoreCase); }
BytesRefBuilder analyzed = new BytesRefBuilder(); BytesRef surface = new BytesRef(); IntsRefBuilder scratchInts = new IntsRefBuilder(); ByteArrayDataInput input = new ByteArrayDataInput();
/** Starting from node, find the top N min cost * completions to a final node. */ public static <T> TopResults<T> shortestPaths(FST<T> fst, FST.Arc<T> fromNode, T startOutput, Comparator<T> comparator, int topN, boolean allowEmptyString) throws IOException { // All paths are kept, so we can pass topN for // maxQueueDepth and the pruning is admissible: TopNSearcher<T> searcher = new TopNSearcher<>(fst, topN, topN, comparator); // since this search is initialized with a single start node // it is okay to start with an empty input path here searcher.addStartPaths(fromNode, startOutput, allowEmptyString, new IntsRefBuilder()); return searcher.search(); }
/** Starting from node, find the top N min cost * completions to a final node. */ public static <T> TopResults<T> shortestPaths(FST<T> fst, FST.Arc<T> fromNode, T startOutput, Comparator<T> comparator, int topN, boolean allowEmptyString) throws IOException { // All paths are kept, so we can pass topN for // maxQueueDepth and the pruning is admissible: TopNSearcher<T> searcher = new TopNSearcher<>(fst, topN, topN, comparator); // since this search is initialized with a single start node // it is okay to start with an empty input path here searcher.addStartPaths(fromNode, startOutput, allowEmptyString, new IntsRefBuilder()); return searcher.search(); }
/** Starting from node, find the top N min cost * completions to a final node. */ public static <T> TopResults<T> shortestPaths(FST<T> fst, FST.Arc<T> fromNode, T startOutput, Comparator<T> comparator, int topN, boolean allowEmptyString) throws IOException { // All paths are kept, so we can pass topN for // maxQueueDepth and the pruning is admissible: TopNSearcher<T> searcher = new TopNSearcher<>(fst, topN, topN, comparator); // since this search is initialized with a single start node // it is okay to start with an empty input path here searcher.addStartPaths(fromNode, startOutput, allowEmptyString, new IntsRefBuilder()); return searcher.search(); }
private void updateFST(SortedMap<String, Double> weights) throws IOException { PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs); BytesRefBuilder scratchBytes = new BytesRefBuilder(); IntsRefBuilder scratchInts = new IntsRefBuilder(); for (Map.Entry<String, Double> entry : weights.entrySet()) { scratchBytes.copyChars(entry.getKey()); fstBuilder.add(Util.toIntsRef(scratchBytes.get(), scratchInts), entry .getValue().longValue()); } fst = fstBuilder.finish(); }
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException { IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton(); Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs); IntsRefBuilder scratch = new IntsRefBuilder(); for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) { Util.toUTF32(entry.getKey(), scratch); List<Integer> entries = entry.getValue(); IntsRef output = new IntsRef(entries.size()); for (Integer c : entries) { output.ints[output.length++] = c; } builder.add(scratch.get(), output); } return builder.finish(); }
private FST<IntsRef> affixFST(TreeMap<String,List<Integer>> affixes) throws IOException { IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton(); Builder<IntsRef> builder = new Builder<>(FST.INPUT_TYPE.BYTE4, outputs); IntsRefBuilder scratch = new IntsRefBuilder(); for (Map.Entry<String,List<Integer>> entry : affixes.entrySet()) { Util.toUTF32(entry.getKey(), scratch); List<Integer> entries = entry.getValue(); IntsRef output = new IntsRef(entries.size()); for (Integer c : entries) { output.ints[output.length++] = c; } builder.add(scratch.get(), output); } return builder.finish(); }
private static void serialize(String file, BytesRef [] all) throws IOException { final Object nothing = NoOutputs.getSingleton().getNoOutput(); final Builder<Object> builder = new Builder<Object>(INPUT_TYPE.BYTE4, NoOutputs.getSingleton()); final IntsRefBuilder intsRef = new IntsRefBuilder(); for (BytesRef br : all) { intsRef.clear(); intsRef.copyUTF8Bytes(br); builder.add(intsRef.get(), nothing); } final FST<Object> fst = builder.finish(); final OutputStreamDataOutput out = new OutputStreamDataOutput(new FileOutputStream(file)); fst.save(out); out.close(); } }
private void writeFST(FieldInfo field, Iterable<BytesRef> values) throws IOException { meta.writeVInt(field.number); meta.writeByte(FST); meta.writeLong(data.getFilePointer()); PositiveIntOutputs outputs = PositiveIntOutputs.getSingleton(); Builder<Long> builder = new Builder<>(INPUT_TYPE.BYTE1, outputs); IntsRefBuilder scratch = new IntsRefBuilder(); long ord = 0; for (BytesRef v : values) { builder.add(Util.toIntsRef(v, scratch), ord); ord++; } FST<Long> fst = builder.finish(); if (fst != null) { fst.save(data); } meta.writeVLong(ord); }