private static void benchmarkStringDictionary(Iterable<String> str) throws IOException { TrieDictionaryBuilder<String> b = newDictBuilder(str); b.stats().print(); TrieDictionary<String> dict = b.build(0); benchmark("Warm up", dict, set, map, strArray, array); benchmark("Benchmark", dict, set, map, strArray, array);
public static void main(String[] args) throws Exception { int count = (int) (Integer.MAX_VALUE * 0.8 / 64); benchmarkStringDictionary(new RandomStrings(count)); }
@Test public void englishWordsTest() throws Exception { InputStream is = new FileInputStream("src/test/resources/dict/english-words.80 (scowl-2015.05.18).txt"); ArrayList<String> str = loadStrings(is); testStringDictionary(str, null); }
private void testEnumeratorValues(String file) throws Exception { InputStream is = new FileInputStream(file); ArrayList<String> str = loadStrings(is); TrieDictionaryBuilder<String> b = newDictBuilder(str); TrieDictionary<String> dict = b.build(0); System.out.println("Dictionary size for file " + file + " is " + dict.getSize()); Stopwatch sw = new Stopwatch(); sw.start(); List<String> values1 = dict.enumeratorValuesByParent(); System.out.println("By iterating id visit the time cost " + sw.elapsed(TimeUnit.MILLISECONDS) + " ms"); sw.reset(); sw.start(); List<String> values2 = dict.enumeratorValues(); System.out.println("By pre-order visit the time cost " + sw.elapsed(TimeUnit.MILLISECONDS) + " ms"); sw.stop(); assertEquals(Sets.newHashSet(values1), Sets.newHashSet(values2)); }
TrieDictionaryBuilder<String> b = newDictBuilder(str); TrieDictionary<String> dict = b.build(0); dict = testSerialize(dict);
public static void main(String[] args) throws Exception { InputStream is = new FileInputStream("src/test/resources/dict/dw_category_grouping_names.dat"); // InputStream is = // Util.getPackageResourceAsStream(TrieDictionaryTest.class, // "eng_com.dic"); ArrayList<String> str = loadStrings(is); benchmarkStringDictionary(str); }
@Test public void emptyValueTest() { ArrayList<String> str = new ArrayList<String>(); str.add(""); str.add("part"); str.add("par"); str.add("partition"); str.add("party"); str.add("parties"); str.add("paint"); testStringDictionary(str, null); }
@Test public void dictionaryContainTest() { ArrayList<String> str = new ArrayList<String>(); str.add("part"); str.add("part"); // meant to be dup str.add("par"); str.add("partition"); str.add("party"); str.add("parties"); str.add("paint"); TrieDictionaryBuilder<String> b = newDictBuilder(str); int baseId = new Random().nextInt(100); TrieDictionary<String> dict = b.build(baseId); str.add("py"); b = newDictBuilder(str); baseId = new Random().nextInt(100); TrieDictionary<String> dict2 = b.build(baseId); assertEquals(true, dict2.contains(dict)); assertEquals(false, dict.contains(dict2)); }
@Test public void testEnumeratorValues() throws Exception { testEnumeratorValues("src/test/resources/dict/english-words.80 (scowl-2015.05.18).txt"); testEnumeratorValues("src/test/resources/dict/dw_category_grouping_names.dat"); }
private static void testStringDictionary(ArrayList<String> str, ArrayList<String> notFound) { TrieDictionaryBuilder<String> b = newDictBuilder(str); int baseId = new Random().nextInt(100); TrieDictionary<String> dict = b.build(baseId); dict = testSerialize(dict);
@Test public void simpleTrieTest() { ArrayList<String> str = new ArrayList<String>(); str.add("part"); str.add("part"); // meant to be dup str.add("par"); str.add("partition"); str.add("party"); str.add("parties"); str.add("paint"); ArrayList<String> notFound = new ArrayList<String>(); notFound.add(""); notFound.add("p"); notFound.add("pa"); notFound.add("pb"); notFound.add("parti"); notFound.add("partz"); notFound.add("partyz"); testStringDictionary(str, notFound); }
@Test public void testAllNullValue() { ArrayList<String> strs = new ArrayList<String>(); strs.add(""); TrieDictionaryBuilder<String> builder = newDictBuilder(strs); TrieDictionary<String> dict = builder.build(0); assertEquals(1, dict.getSize()); assertEquals(0, dict.getIdFromValue("")); }
@Test public void categoryNamesTest() throws Exception { InputStream is = new FileInputStream("src/test/resources/dict/dw_category_grouping_names.dat"); ArrayList<String> str = loadStrings(is); testStringDictionary(str, null); }
TrieDictionaryBuilder<String> b = newDictBuilder(str); TrieDictionary<String> dict = b.build(0); dict = testSerialize(dict);
private static void benchmarkStringDictionary(ArrayList<String> str) throws UnsupportedEncodingException { TrieDictionaryBuilder<String> b = newDictBuilder(str); b.stats().print(); TrieDictionary<String> dict = b.build(0); benchmark("Warm up", dict, set, map, strArray, array); benchmark("Benchmark", dict, set, map, strArray, array);
@Test public void emptyValueTest() { ArrayList<String> str = new ArrayList<String>(); str.add(""); str.add("part"); str.add("par"); str.add("partition"); str.add("party"); str.add("parties"); str.add("paint"); testStringDictionary(str, null); }
@Test public void englishWordsTest() throws Exception { InputStream is = new FileInputStream("src/test/resources/dict/eng_com.dic"); ArrayList<String> str = loadStrings(is); testStringDictionary(str, null); }
private static void testStringDictionary(ArrayList<String> str, ArrayList<String> notFound) { TrieDictionaryBuilder<String> b = newDictBuilder(str); int baseId = new Random().nextInt(100); TrieDictionary<String> dict = b.build(baseId); dict = testSerialize(dict);
@Test public void simpleTrieTest() { ArrayList<String> str = new ArrayList<String>(); str.add("part"); str.add("part"); // meant to be dup str.add("par"); str.add("partition"); str.add("party"); str.add("parties"); str.add("paint"); ArrayList<String> notFound = new ArrayList<String>(); notFound.add(""); notFound.add("p"); notFound.add("pa"); notFound.add("pb"); notFound.add("parti"); notFound.add("partz"); notFound.add("partyz"); testStringDictionary(str, notFound); }
@Test public void categoryNamesTest() throws Exception { InputStream is = new FileInputStream("src/test/resources/dict/dw_category_grouping_names.dat"); ArrayList<String> str = loadStrings(is); testStringDictionary(str, null); }