Refine search
final Builder<BytesRef> indexBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, false, Integer.MAX_VALUE, outputs, true, 15); assert bytes.length > 0; scratchBytes.writeTo(bytes, 0); indexBuilder.add(Util.toIntsRef(prefix, scratchIntsRef), new BytesRef(bytes, 0, bytes.length)); scratchBytes.reset(); index = indexBuilder.finish();
/** Returns final FST. NOTE: this will return null if * nothing is accepted by the FST. */ public FST<T> finish() throws IOException { final UnCompiledNode<T> root = frontier[0]; // minimize nodes in the last word's suffix freezeTail(0); if (root.inputCount < minSuffixCount1 || root.inputCount < minSuffixCount2 || root.numArcs == 0) { if (fst.emptyOutput == null) { return null; } else if (minSuffixCount1 > 0 || minSuffixCount2 > 0) { // empty string got pruned return null; } } else { if (minSuffixCount2 != 0) { compileAllTargets(root, lastInput.length()); } } //if (DEBUG) System.out.println(" builder.finish root.isFinal=" + root.isFinal + " root.output=" + root.output); fst.finish(compileNode(root, lastInput.length()).node); return fst; }
assert lastInput.length() == 0 || input.compareTo(lastInput.get()) >= 0: "inputs are added out of order lastInput=" + lastInput.get() + " vs input=" + input; assert validOutput(output); freezeTail(prefixLenPlus1); assert validOutput(lastOutput); assert validOutput(commonOutputPrefix); wordSuffix = fst.outputs.subtract(lastOutput, commonOutputPrefix); assert validOutput(wordSuffix); parentNode.setLastOutput(input.ints[input.offset + idx - 1], commonOutputPrefix); node.prependOutput(wordSuffix); assert validOutput(output);
private void freezeTail(int prefixLenPlus1) throws IOException { for(int idx=lastInput.length(); idx >= downTo; idx--) { parent.deleteLast(lastInput.intAt(idx-1), node); } else { compileAllTargets(node, lastInput.length()-idx); parent.replaceLast(lastInput.intAt(idx-1), compileNode(node, 1+lastInput.length()-idx), nextFinalOutput, isFinal);
Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, fstOutput); IntsRefBuilder scratch = new IntsRefBuilder(); long ord = 0; scratch.grow(token.length()); scratch.setLength(token.length()); for (int i = 0; i < token.length(); i++) { scratch.setIntAt(i, (int) token.charAt(i)); fstBuilder.add(scratch.get(), ord); segmentations.add(wordIdAndLength); ord++; this.fst = new TokenInfoFST(fstBuilder.finish(), false); this.data = data.toArray(new String[data.size()]); this.segmentations = segmentations.toArray(new int[segmentations.size()][]);
public void finishTerm(long defaultWeight) throws IOException { ArrayUtil.timSort(surfaceFormsAndPayload, 0, count); int deduplicator = 0; analyzed.append((byte) 0); analyzed.setLength(analyzed.length() + 1); analyzed.grow(analyzed.length()); for (int i = 0; i < count; i++) { analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++); Util.toIntsRef(analyzed.get(), scratchInts); SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i]; long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight; builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload)); } seenSurfaceForms.clear(); count = 0; }
private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException { final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex); BytesRefFSTEnum.InputOutput<BytesRef> indexEnt; while((indexEnt = subIndexEnum.next()) != null) { //if (DEBUG) { // System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output); //} builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output); } } }
IntsRefBuilder scratchInts = new IntsRefBuilder(); IntsRefBuilder currentOrds = new IntsRefBuilder(); Util.toUTF32(currentEntry, scratchInts); words.add(scratchInts.get(), currentOrds.get()); Util.toUTF32(currentEntry, scratchInts); words.add(scratchInts.get(), currentOrds.get()); success2 = true; } finally {
public FST<Pair<Long, BytesRef>> build() throws IOException { return builder.finish(); }
public XBuilder(int maxSurfaceFormsPerAnalyzedForm, boolean hasPayloads, int payloadSep) { this.payloadSep = payloadSep; this.outputs = new PairOutputs<>(PositiveIntOutputs.getSingleton(), ByteSequenceOutputs.getSingleton()); this.builder = new Builder<>(FST.INPUT_TYPE.BYTE1, outputs); this.maxSurfaceFormsPerAnalyzedForm = maxSurfaceFormsPerAnalyzedForm; this.hasPayloads = hasPayloads; surfaceFormsAndPayload = new SurfaceFormAndPayload[maxSurfaceFormsPerAnalyzedForm]; } public void startTerm(BytesRef analyzed) {
public FSTFieldWriter(FieldInfo fieldInfo, long termsFilePointer) throws IOException { this.fieldInfo = fieldInfo; fstOutputs = PositiveIntOutputs.getSingleton(); fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE1, fstOutputs); indexStart = out.getFilePointer(); ////System.out.println("VGW: field=" + fieldInfo.name); // Always put empty string in fstBuilder.add(new IntsRef(), termsFilePointer); startTermsFilePointer = termsFilePointer; }
private void compileAllTargets(UnCompiledNode<T> node, int tailLength) throws IOException { for(int arcIdx=0;arcIdx<node.numArcs;arcIdx++) { final Arc<T> arc = node.arcs[arcIdx]; if (!arc.target.isCompiled()) { // not yet compiled @SuppressWarnings({"rawtypes","unchecked"}) final UnCompiledNode<T> n = (UnCompiledNode<T>) arc.target; if (n.numArcs == 0) { //System.out.println("seg=" + segment + " FORCE final arc=" + (char) arc.label); arc.isFinal = n.isFinal = true; } arc.target = compileNode(n, tailLength-1); } } }
Builder<Long> fstBuilder = new Builder<>(FST.INPUT_TYPE.BYTE2, fstOutput); IntsRefBuilder scratch = new IntsRefBuilder(); scratch.grow(token.length()); scratch.setLength(token.length()); for (int i = 0; i < token.length(); i++) { scratch.setIntAt(i, (int) token.charAt(i)); fstBuilder.add(scratch.get(), ord); lastToken = token; ord ++; this.fst = new TokenInfoFST(fstBuilder.finish()); this.segmentations = segmentations.toArray(new int[segmentations.size()][]); this.rightIds = new short[rightIds.size()];
public void finishTerm(long defaultWeight) throws IOException { ArrayUtil.timSort(surfaceFormsAndPayload, 0, count); int deduplicator = 0; analyzed.append((byte) 0); analyzed.setLength(analyzed.length() + 1); analyzed.grow(analyzed.length()); for (int i = 0; i < count; i++) { analyzed.setByteAt(analyzed.length() - 1, (byte) deduplicator++); Util.toIntsRef(analyzed.get(), scratchInts); SurfaceFormAndPayload candiate = surfaceFormsAndPayload[i]; long cost = candiate.weight == -1 ? encodeWeight(Math.min(Integer.MAX_VALUE, defaultWeight)) : candiate.weight; builder.add(scratchInts.get(), outputs.newPair(cost, candiate.payload)); } seenSurfaceForms.clear(); count = 0; }
private void append(Builder<BytesRef> builder, FST<BytesRef> subIndex, IntsRefBuilder scratchIntsRef) throws IOException { final BytesRefFSTEnum<BytesRef> subIndexEnum = new BytesRefFSTEnum<>(subIndex); BytesRefFSTEnum.InputOutput<BytesRef> indexEnt; while((indexEnt = subIndexEnum.next()) != null) { //if (DEBUG) { // System.out.println(" add sub=" + indexEnt.input + " " + indexEnt.input + " output=" + indexEnt.output); //} builder.add(Util.toIntsRef(indexEnt.input, scratchIntsRef), indexEnt.output); } } }
public FST<Pair<Long, BytesRef>> build() throws IOException { return builder.finish(); }
public TermsWriter(IndexOutput out, FieldInfo field) { this.out = out; this.field = field; builder = new Builder<>(FST.INPUT_TYPE.BYTE1, 0, 0, true, true, Integer.MAX_VALUE, outputs, true, 15); }