public TrieDictionaryForestBuilder(BytesConverter<T> bytesConverter, int baseId, int maxTrieTreeSizeMB) { this.bytesConverter = bytesConverter; this.trieBuilder = new TrieDictionaryBuilder<T>(bytesConverter); this.baseId = baseId; this.curOffset = 0; this.maxTrieTreeSize = maxTrieTreeSizeMB * 1024 * 1024; }
private void reset() { curTreeSize = 0; trieBuilder = new TrieDictionaryBuilder<T>(bytesConverter); }
@Override public void init(DictionaryInfo info, int baseId, String hdfsDir) throws IOException { this.baseId = baseId; this.builder = new TrieDictionaryBuilder(new StringBytesConverter()); }
TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter());
public static void main(String[] args) throws Exception { TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); b.addValue("part"); b.print();
private static TrieDictionaryBuilder<String> newDictBuilder(Iterable<String> str) { TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); for (String s : str) b.addValue(s); return b; }
TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter());
int keep = 0; long startTime = System.currentTimeMillis(); TrieDictionaryBuilder<String> oldTrieBuilder = new TrieDictionaryBuilder<>(new StringBytesConverter()); for (String str : testData) oldTrieBuilder.addValue(str);
@SuppressWarnings("rawtypes") private static Dictionary strsToDict(Collection<String> strs) { TrieDictionaryBuilder<String> builder = new TrieDictionaryBuilder<>(new StringBytesConverter()); for (String str : strs) { builder.addValue(str); } return builder.build(0); }
strList.add("paint"); TrieDictionaryBuilder<String> dictBuilder = new TrieDictionaryBuilder<>(new StringBytesConverter()); for (String str : strList) { dictBuilder.addValue(str);
@Test public void testSuperLongStringValue() { String longPrefix = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" + "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"; TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); String v1 = longPrefix + "xyz"; b.addValue(v1); String strLen200 = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghid"; b.addValue(strLen200); StringBuilder sb = new StringBuilder(); for (int i = 0; i < 25; i++) { sb.append(strLen200); } String strLen5000 = sb.toString(); b.addValue(strLen5000); TrieDictionary<String> dict = b.build(0); dict.dump(System.out); sb.setLength(0); for (int j = 0; j < 7; j++) { sb.append(strLen5000); } String strLen35000 = sb.toString(); b.addValue(strLen35000); Exception ex = null; try { b.build(0); } catch (Exception e) { ex = e; } Assert.assertNotNull(ex); }
@Before public void before() { int dataSize = 100 * 10000; TrieDictionaryBuilder<String> b1 = new TrieDictionaryBuilder<>(new StringBytesConverter()); TrieDictionaryForestBuilder<String> b2 = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), 0, 5); this.rawData = genStringDataSet(dataSize); for (String str : this.rawData) { b1.addValue(str); b2.addValue(str); } this.oldDict = b1.build(0); this.newDict = b2.build(); System.out.println("new dict split tree size : " + ((TrieDictionaryForest<String>) newDict).getTrees().size()); }
private static Dictionary newDictionaryOfString() { TrieDictionaryBuilder<String> builder = new TrieDictionaryBuilder<>(new StringBytesConverter()); builder.addValue("Dong"); builder.addValue("George"); builder.addValue("Jason"); builder.addValue("Kejia"); builder.addValue("Luke"); builder.addValue("Mahone"); builder.addValue("Qianhao"); builder.addValue("Shaofeng"); builder.addValue("Xu"); builder.addValue("Yang"); return builder.build(0); }
private static Dictionary buildStringDict(List<byte[]> values, int baseId, int nSamples, ArrayList samples) { TrieDictionaryBuilder builder = new TrieDictionaryBuilder(new StringBytesConverter()); for (byte[] value : values) { String v = Bytes.toString(value); builder.addValue(v); if (samples.size() < nSamples && samples.contains(v) == false) samples.add(v); } return builder.build(baseId); }
private void assertSameBehaviorAsTrie(TrieDictionaryForest<String> dict, ArrayList<String> strs, int baseId) { TrieDictionaryBuilder<String> trieBuilder = new TrieDictionaryBuilder<>(new StringBytesConverter()); for (String s : strs) { if (s != null) trieBuilder.addValue(s); } TrieDictionary<String> trie = trieBuilder.build(baseId); assertEquals(trie.getMaxId(), dict.getMaxId()); assertEquals(trie.getMinId(), dict.getMinId()); assertEquals(trie.getSize(), dict.getSize()); assertEquals(trie.getSizeOfId(), dict.getSizeOfId()); assertEquals(trie.getSizeOfValue(), dict.getSizeOfValue()); }
public static void main(String[] args) throws Exception { TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); // b.addValue("part"); // b.print(); // b.addValue("part"); // b.print(); // b.addValue("par"); // b.print(); // b.addValue("partition"); // b.print(); // b.addValue("party"); // b.print(); // b.addValue("parties"); // b.print(); // b.addValue("paint"); // b.print(); b.addValue("-000000.41"); b.addValue("0000101.81"); b.addValue("6779331"); String t = "0000001.6131"; TrieDictionary<String> dict = b.build(0); System.out.println(dict.getIdFromValue(t, -1)); System.out.println(dict.getIdFromValue(t, 1)); } }
private static TrieDictionaryBuilder<String> newDictBuilder(ArrayList<String> str) { TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); for (String s : str) b.addValue(s); return b; }
public TrieDictionaryForestBuilder(BytesConverter<T> bytesConverter, int baseId, int maxTrieTreeSizeMB) { this.bytesConverter = bytesConverter; this.trieBuilder = new TrieDictionaryBuilder<T>(bytesConverter); this.baseId = baseId; this.curOffset = 0; this.maxTrieTreeSize = maxTrieTreeSizeMB * 1024 * 1024; }
@Test public void testSuperLongStringValue() { String longPrefix = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" + "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"; TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); String v1 = longPrefix + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; String v2 = longPrefix + "xyz"; b.addValue(v1); b.addValue(v2); TrieDictionary<String> dict = b.build(0); dict.dump(System.out); }
@Override public void init(DictionaryInfo info, int baseId, String hdfsDir) throws IOException { this.baseId = baseId; this.builder = new TrieDictionaryBuilder(new StringBytesConverter()); }