@Override public void init(DictionaryInfo info, int baseId, String hdfsDir) throws IOException { builder = new TrieDictionaryForestBuilder(new StringBytesConverter(), baseId); }
public static TrieDictionaryForestBuilder<String> newDictBuilder(Iterator<String> strs, int baseId, int treeSize) { TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), baseId); b.setMaxTrieTreeSize(treeSize); while (strs.hasNext()) b.addValue(strs.next()); return b; }
public static TrieDictionaryForestBuilder<String> newDictBuilder(Iterable<String> strs, int baseId) { TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), baseId); for (String s : strs) b.addValue(s); return b; }
public static TrieDictionaryForestBuilder<String> newDictBuilder(Iterable<String> strs, int baseId, int treeSize) { TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), baseId); b.setMaxTrieTreeSize(treeSize); for (String s : strs) { b.addValue(s); } return b; }
long startTime = System.currentTimeMillis(); BytesConverter<String> converter = new StringBytesConverter(); TrieDictionaryForestBuilder<String> newTrieBuilder = new TrieDictionaryForestBuilder<String>(converter, 0); for (String str : testData) newTrieBuilder.addValue(str);
System.out.println("data size:" + totalSize / 1024 + "KB max tree size:" + maxTreeSize / 1024 + "KB"); TrieDictionaryForestBuilder<String> builder = new TrieDictionaryForestBuilder<String>(converter); builder.setMaxTrieTreeSize(maxTreeSize); for (String str : strs) { strs.add("f"); strs.add("a"); builder = new TrieDictionaryForestBuilder<String>(converter); builder.setMaxTrieTreeSize(maxTreeSize); try {
@Before public void before() { int dataSize = 100 * 10000; TrieDictionaryBuilder<String> b1 = new TrieDictionaryBuilder<>(new StringBytesConverter()); TrieDictionaryForestBuilder<String> b2 = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), 0, 5); this.rawData = genStringDataSet(dataSize); for (String str : this.rawData) { b1.addValue(str); b2.addValue(str); } this.oldDict = b1.build(0); this.newDict = b2.build(); System.out.println("new dict split tree size : " + ((TrieDictionaryForest<String>) newDict).getTrees().size()); }
@Test public void emptyDictTest() throws Exception { TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new StringBytesConverter()); TrieDictionaryForest<String> dict = b.build(); try { int id = dict.getIdFromValue("123", 0); fail("id should not exist"); } catch (IllegalArgumentException e) { //right } try { String value = dict.getValueFromIdImpl(123); fail("value should not exist"); } catch (IllegalArgumentException e) { //right } }
@Override public void init(DictionaryInfo info, int baseId, String hdfsDir) throws IOException { builder = new TrieDictionaryForestBuilder(new StringBytesConverter(), baseId); }