@Override public void dump(PrintStream out) { out.println("TrieDictionaryForest"); out.println("baseId:" + baseId); StringBuilder sb = new StringBuilder(); sb.append("value divide:"); for (ByteArray ba : valueDivide) sb.append(bytesConvert.convertFromBytes(ba.array(), 0, ba.length()) + " "); sb.append("\noffset divide:"); for (Integer offset : accuOffset) sb.append(offset + " "); out.println(sb.toString()); for (int i = 0; i < trees.size(); i++) { out.println("----tree " + i + "--------"); trees.get(i).dump(out); } }
private static TrieDictionary<String> testSerialize(TrieDictionary<String> dict) { try { ByteArrayOutputStream bout = new ByteArrayOutputStream(); DataOutputStream dataout = new DataOutputStream(bout); dict.write(dataout); dataout.close(); ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray()); DataInputStream datain = new DataInputStream(bin); TrieDictionary<String> r = new TrieDictionary<String>(); r.readFields(datain); datain.close(); return r; } catch (IOException e) { throw new RuntimeException(e); } }
int times = Math.max(10 * 1000 * 1000 / n, 1); // run 10 million lookups int keep = 0; // make sure JIT don't OPT OUT function calls under test byte[] valueBytes = new byte[dict.getSizeOfValue()]; long start; for (int i = 0; i < times; i++) { for (int j = 0; j < n; j++) { keep |= dict.getIdFromValueBytesWithoutCache(array[j], 0, array[j].length, 0); for (int i = 0; i < times; i++) { for (int j = 0; j < n; j++) { keep |= dict.getValueBytesFromIdWithoutCache(j).length;
protected int getValueBytesFromIdImpl(int id, byte[] returnValue, int offset) { int seq = calcSeqNoFromId(id); return lookupValueFromSeqNo(headSize, seq, returnValue, offset); }
@Override public void dump(PrintStream out) { out.println("Total " + nValues + " values"); for (int i = 0; i < nValues; i++) { int id = calcIdFromSeqNo(i); T value = getValueFromId(id); out.println(id + " (" + Integer.toHexString(id) + "): " + value); } }
@Override protected int getIdFromValueBytesWithoutCache(byte[] value, int offset, int len, int roundingFlag) { int seq = lookupSeqNoFromValue(headSize, value, offset, offset + len, roundingFlag); int id = calcIdFromSeqNo(seq); if (id < 0) logger.debug("Not a valid value: " + bytesConvert.convertFromBytes(value, offset, len)); return id; }
private void assertSameBehaviorAsTrie(TrieDictionaryForest<String> dict, ArrayList<String> strs, int baseId) { TrieDictionaryBuilder<String> trieBuilder = new TrieDictionaryBuilder<>(new StringBytesConverter()); for (String s : strs) { if (s != null) trieBuilder.addValue(s); } TrieDictionary<String> trie = trieBuilder.build(baseId); assertEquals(trie.getMaxId(), dict.getMaxId()); assertEquals(trie.getMinId(), dict.getMinId()); assertEquals(trie.getSize(), dict.getSize()); assertEquals(trie.getSizeOfId(), dict.getSizeOfId()); assertEquals(trie.getSizeOfValue(), dict.getSizeOfValue()); }
assertEquals(id, dict.getIdFromValue(value)); assertEquals(value, dict.getValueFromId(id)); dict.getIdFromValue(s); fail("For not found value '" + s + "', IllegalArgumentException is expected"); } catch (IllegalArgumentException e) { int nullId = dict.getIdFromValue(null); assertNull(dict.getValueFromId(nullId));
assertEquals(id, dict.getIdFromValue(value)); assertEquals(value, dict.getValueFromId(id)); dict.getIdFromValue(s); fail("For not found value '" + s + "', IllegalArgumentException is expected"); } catch (IllegalArgumentException e) { int nullId = dict.getIdFromValue(null); assertNull(dict.getValueFromId(nullId)); int nullId2 = dict.getIdFromValueBytes(null, 0, 0); assertEquals(dict.getValueBytesFromId(nullId2, null, 0), -1); assertEquals(nullId, nullId2);
@Override final protected T getValueFromIdImpl(int id) { if (enableCache) { Object[] cache = idToValueCache.get(); // SoftReference to skip // cache gracefully when // short of memory if (cache != null) { int seq = calcSeqNoFromId(id); if (seq < 0 || seq >= nValues) throw new IllegalArgumentException("Not a valid ID: " + id); if (cache[seq] != null) return (T) cache[seq]; byte[] value = new byte[getSizeOfValue()]; int length = getValueBytesFromId(id, value, 0); T result = bytesConvert.convertFromBytes(value, 0, length); cache[seq] = result; return result; } } byte[] value = new byte[getSizeOfValue()]; int length = getValueBytesFromId(id, value, 0); return bytesConvert.convertFromBytes(value, 0, length); }
/** * Flatten the trie into a byte array for a minimized memory footprint. * Lookup remains fast. Cost is inflexibility to modify (becomes immutable). * <p> * Flattened node structure is HEAD + NODEs, for each node: * - o byte, offset to child node, o = stats.mbpn_sizeChildOffset * - 1 bit, isLastChild flag, the 1st MSB of o * - 1 bit, isEndOfValue flag, the 2nd MSB of o * - c byte, number of values beneath, c = stats.mbpn_sizeNoValueBeneath * - 1 byte, number of value bytes * - n byte, value bytes */ public TrieDictionary<T> build(int baseId) { byte[] trieBytes = buildTrieBytes(baseId); TrieDictionary<T> r = new TrieDictionary<T>(trieBytes); return r; }
ArrayList<TrieDictionary<T>> trees = new ArrayList<>(); for (int i = 0; i < treeSize; i++) { TrieDictionary<T> dict = new TrieDictionary<>(); dict.readFields(in); trees.add(dict);
private void initMaxValueForEachTrie() { //init max value this.maxValue = new ArrayList<>(); if (this.trees == null || trees.isEmpty()) { return; } for (int i = 0; i < trees.size(); i++) { T curTreeMax = trees.get(i).getValueFromId(trees.get(i).getMaxId()); byte[] b1 = bytesConvert.convertToBytes(curTreeMax); ByteArray ba1 = new ByteArray(b1, 0, b1.length); this.maxValue.add(ba1); } }
int times = 10 * 1000 * 1000 / n; // run 10 million lookups int keep = 0; // make sure JIT don't OPT OUT function calls under test byte[] valueBytes = new byte[dict.getSizeOfValue()]; long start; for (int i = 0; i < times; i++) { for (int j = 0; j < n; j++) { keep |= dict.getIdFromValueBytes(array[j], 0, array[j].length); for (int i = 0; i < times; i++) { for (int j = 0; j < n; j++) { keep |= dict.getValueBytesFromId(j, valueBytes, 0);
return checkFlag(headSize, BIT_IS_END_OF_VALUE) ? 0 : roundSeqNo(roundingFlag, -1, -1, 0); seq += BytesUtil.readUnsigned(trieBytes, n + sizeChildOffset, sizeNoValuesBeneath); return roundSeqNo(roundingFlag, seq - 1, -1, seq); // mismatch boolean isEndOfValue = checkFlag(n, BIT_IS_END_OF_VALUE); if (o == inpEnd) { return p == end && isEndOfValue ? seq : roundSeqNo(roundingFlag, seq - 1, -1, seq); // input all matched int c = getChildOffset(n); if (c == headSize) // has no children return roundSeqNo(roundingFlag, seq - 1, -1, seq); // input only partially matched byte inpByte = inp[o]; int comp; } else if (comp < 0) { // try next child seq += BytesUtil.readUnsigned(trieBytes, c + sizeChildOffset, sizeNoValuesBeneath); if (checkFlag(c, BIT_IS_LAST_CHILD)) return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no child can match the next byte of input c = p + BytesUtil.readUnsigned(trieBytes, p - 1, 1); } else { // children are ordered by their first value byte return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no child can match the next byte of input
@Test public void testAllNullValue() { ArrayList<String> strs = new ArrayList<String>(); strs.add(""); TrieDictionaryBuilder<String> builder = newDictBuilder(strs); TrieDictionary<String> dict = builder.build(0); assertEquals(1, dict.getSize()); assertEquals(0, dict.getIdFromValue("")); }
public static void main(String[] args) throws Exception { TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); b.addValue(""); b.print(); b.addValue("part"); b.print(); b.addValue("part"); b.print(); b.addValue("par"); b.print(); b.addValue("partition"); b.print(); b.addValue("party"); b.print(); b.addValue("parties"); b.print(); b.addValue("paint"); b.print(); TrieDictionary<String> dict = b.build(0); dict.dump(System.out); dict.getIdFromValueBytes(new byte[10], 0, 0); } }
private void initSizeOfValue() { int maxValue = 0; for (TrieDictionary<T> tree : trees) maxValue = Math.max(maxValue, tree.getSizeOfValue()); this.sizeOfValue = maxValue; }
boolean isEndOfValue = checkFlag(n, BIT_IS_END_OF_VALUE); if (isEndOfValue) { T curNodeValue = bytesConvert.convertFromBytes(returnValue, 0, o); int c = getChildOffset(n); if (c == headSize) // has no children return; while (true) { visitNode(c, returnValue, o, result); if (checkFlag(c, BIT_IS_LAST_CHILD)) return;
return checkFlag(headSize, BIT_IS_END_OF_VALUE) ? 0 : roundSeqNo(roundingFlag, -1, -1, 0); seq += BytesUtil.readUnsigned(trieBytes, n + sizeChildOffset, sizeNoValuesBeneath); return roundSeqNo(roundingFlag, seq - 1, -1, seq); // mismatch boolean isEndOfValue = checkFlag(n, BIT_IS_END_OF_VALUE); if (o == inpEnd) { return p == end && isEndOfValue ? seq : roundSeqNo(roundingFlag, seq - 1, -1, seq); // input return roundSeqNo(roundingFlag, seq - 1, -1, seq); // input only } else if (comp < 0) { // try next child seq += BytesUtil.readUnsigned(trieBytes, c + sizeChildOffset, sizeNoValuesBeneath); if (checkFlag(c, BIT_IS_LAST_CHILD)) return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no return roundSeqNo(roundingFlag, seq - 1, -1, seq); // no