private static Dictionary newDictionaryOfInteger() { NumberDictionaryForestBuilder builder = new NumberDictionaryForestBuilder(); builder.addValue("10"); builder.addValue("20"); builder.addValue("30"); builder.addValue("40"); builder.addValue("50"); builder.addValue("60"); builder.addValue("70"); builder.addValue("80"); builder.addValue("90"); builder.addValue("100"); return builder.build(); }
NumberDictionaryForestBuilder builder = new NumberDictionaryForestBuilder(0); builder.setMaxTrieTreeSize(size / treeNum); Iterator<String> it = set.iterator(); while (it.hasNext()) { set.remove(str); } else { builder.addValue(str); TrieDictionaryForest<String> dict = builder.build(); System.out.println("tree size:" + dict.getTrees().size()); System.out.println("--------------dict-----------------");
@Ignore @Test public void memoryUsageBenchmarkNewDictForestTest() throws Exception { System.out.println("max memory:" + Runtime.getRuntime().maxMemory()); System.gc(); Thread.currentThread().sleep(3000); NumberDictionaryForestBuilder b = new NumberDictionaryForestBuilder(0, 0); int k = 0; while (true) { b.addValue(k + ""); if (k % 100000 == 0) System.out.println(k); k++; } /* memory:1908932608(1800MB) maxTrieSize:500M entry:17500000 maxTrieSize:180M entry:47100000 maxTrieSize:100M entry:83800000 maxTrieSize:50M entry:128400000 maxTrieSize:25M entry:148100000 maxTrieSize:0M entry: 5000000 5-8 */ }
@Override public void init(DictionaryInfo info, int baseId, String hdfsDir) throws IOException { builder = new NumberDictionaryForestBuilder(baseId); }
@Override public boolean addValue(String value) { if (StringUtils.isBlank(value)) // empty string is treated as null return false; builder.addValue(value); return true; }
@Override public Dictionary<String> build() throws IOException { return builder.build(); }
TrieDictionaryForestBuilder builder = new NumberDictionaryForestBuilder(0); builder.setMaxTrieTreeSize(size / treeNum); Iterator<String> it = set.iterator();
@Override public boolean addValue(String value) { if (StringUtils.isBlank(value)) // empty string is treated as null return false; builder.addValue(value); return true; }
@Override public Dictionary<String> build() throws IOException { return builder.build(); }
@Test public void testDecimalsWithBeginZero() { List<String> testData = new ArrayList<>(); testData.add("000000000000000000000000000.4868"); testData.add("00000000000000000000000000000000000000"); NumberDictionaryForestBuilder b = new NumberDictionaryForestBuilder(); for (String str : testData) b.addValue(str); TrieDictionaryForest<String> dict = b.build(); //dict.dump(System.out); }
@Override public void init(DictionaryInfo info, int baseId, String hdfsDir) throws IOException { builder = new NumberDictionaryForestBuilder(baseId); }
@Test public void testVerySmallDouble() { List<String> testData = new ArrayList<>(); testData.add(-1.0 + ""); testData.add(Double.MIN_VALUE + ""); testData.add("1.01"); testData.add("2.0"); NumberDictionaryForestBuilder b = new NumberDictionaryForestBuilder(); for (String str : testData) b.addValue(str); TrieDictionaryForest<String> dict = b.build(); //dict.dump(System.out); NumberDictionaryBuilder b2 = new NumberDictionaryBuilder(); for (String str : testData) b2.addValue(str); NumberDictionary<String> dict2 = b2.build(0); //dict2.dump(System.out); }
@Test public void serializeTest() { List<String> testData = new ArrayList<>(); testData.add("1"); testData.add("2"); testData.add("100"); //TrieDictionaryForestBuilder.MaxTrieTreeSize = 0; NumberDictionaryForestBuilder b = new NumberDictionaryForestBuilder(); for (String str : testData) b.addValue(str); TrieDictionaryForest<String> dict = b.build(); dict = testSerialize(dict); //dict.dump(System.out); for (String str : testData) { assertEquals(str, dict.getValueFromId(dict.getIdFromValue(str))); } }
@Test public void testMerge() { // mimic the logic as in MergeCuboidMapper NumberDictionaryForestBuilder b1 = new NumberDictionaryForestBuilder(); b1.addValue("0"); b1.addValue("3"); b1.addValue("23"); TrieDictionaryForest<String> dict1 = b1.build(); NumberDictionaryForestBuilder b2 = new NumberDictionaryForestBuilder(); b2.addValue("0"); b2.addValue("2"); b2.addValue("3"); b2.addValue("15"); b2.addValue("23"); TrieDictionaryForest<String> dict2 = b2.build(); assertTrue(dict1.getSizeOfId() == dict2.getSizeOfId()); assertTrue(dict1.getSizeOfValue() == dict2.getSizeOfValue()); { int newId = dict2.getIdFromValue(dict1.getValueFromId(0)); assertTrue(newId == 0); } { int newId = dict2.getIdFromValue(dict1.getValueFromId(1)); assertTrue(newId == 2); } { int newId = dict2.getIdFromValue(dict1.getValueFromId(2)); assertTrue(newId == 4); } }
private void testData(List<String> humanList, List<String> expectedList, SelfDefineSortableKey.TypeFlag flag) { //stimulate map-reduce job ArrayList<SelfDefineSortableKey> keyList = createKeyList(humanList, (byte) flag.ordinal()); Collections.sort(keyList); //build tree NumberDictionaryForestBuilder b = new NumberDictionaryForestBuilder(0, 0); expectedList = numberSort(expectedList); for (String value : expectedList) { b.addValue(value); } TrieDictionaryForest<String> dict = b.build(); //dict.dump(System.out); ArrayList<Integer> resultIds = new ArrayList<>(); for (int i = 0; i < keyList.size(); i++) { SelfDefineSortableKey key = keyList.get(i); String fieldValue = getFieldValue(key); resultIds.add(dict.getIdFromValue(fieldValue)); assertEquals(expectedList.get(i), dict.getValueFromId(dict.getIdFromValue(fieldValue))); } assertTrue(isIncreasedOrder(resultIds, new Comparator<Integer>() { @Override public int compare(Integer o1, Integer o2) { return o1.compareTo(o2); } })); }