@Override public void init(DictionaryInfo info, int baseId, String hdfsDir) throws IOException { this.baseId = baseId; this.builder = new TrieDictionaryBuilder(new StringBytesConverter()); }
Iterator<String> it = new RandomStrings(10 * 10000).iterator(); int totalSize = 0; final StringBytesConverter converter = new StringBytesConverter(); while (it.hasNext()) { String str = it.next(); byte[] data = converter.convertToBytes(str); if (data != null) { totalSize += data.length;
Iterator<String> it = new RandomStrings(10000).iterator(); int totalSize = 0; final StringBytesConverter converter = new StringBytesConverter(); while (it.hasNext()) { String str = it.next(); byte[] data = converter.convertToBytes(str); if (data != null) { totalSize += data.length;
@Override public void init(DictionaryInfo info, int baseId, String hdfsDir) throws IOException { builder = new TrieDictionaryForestBuilder(new StringBytesConverter(), baseId); }
int k = -48481; int size = 0; StringBytesConverter converter = new StringBytesConverter(); for (int i = 0; i < num; i++) { String value = k + ""; k += 1; String basic = "-9999999999999952517"; size += converter.convertToBytes(basic).length;
private Comparator<String> getComparatorByType(DataType type) { Comparator<String> comparator; if (!type.isNumberFamily()) { comparator = new ByteComparator<>(new StringBytesConverter()); } else if (type.isIntegerFamily()) { comparator = new Comparator<String>() {
double k = -0.0; int size = 0; StringBytesConverter converter = new StringBytesConverter(); for (int i = 0; i < num; i++) { String value = k + ""; k += 1.55; String basic = "-9999999999999952517"; size += converter.convertToBytes(basic).length;
public static TrieDictionaryForestBuilder<String> newDictBuilder(Iterable<String> strs, int baseId) { TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), baseId); for (String s : strs) b.addValue(s); return b; }
private static TrieDictionaryBuilder<String> newDictBuilder(Iterable<String> str) { TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); for (String s : str) b.addValue(s); return b; }
TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter());
public static TrieDictionaryForestBuilder<String> newDictBuilder(Iterator<String> strs, int baseId, int treeSize) { TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), baseId); b.setMaxTrieTreeSize(treeSize); while (strs.hasNext()) b.addValue(strs.next()); return b; }
public synchronized void init() throws IOException { this.store = new GlobalDictHDFSStore(baseDir); store.prepareForWrite(workingDir, isAppendDictGlobal); Long[] versions = store.listAllVersions(); if (versions.length == 0 || !isAppendDictGlobal) { // build dict for the first time this.maxId = 0; this.maxValueLength = 0; this.nValues = 0; this.bytesConverter = new StringBytesConverter(); } else { // append values to last version GlobalDictMetadata metadata = store.getMetadata(versions[versions.length - 1]); this.maxId = metadata.maxId; this.maxValueLength = metadata.maxValueLength; this.nValues = metadata.nValues; this.bytesConverter = metadata.bytesConverter; this.sliceFileMap = new TreeMap<>(metadata.sliceFileMap); } }
public static TrieDictionaryForestBuilder<String> newDictBuilder(Iterable<String> strs, int baseId, int treeSize) { TrieDictionaryForestBuilder<String> b = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), baseId); b.setMaxTrieTreeSize(treeSize); for (String s : strs) { b.addValue(s); } return b; }
@Test public void categoryNamesTest() throws Exception { InputStream is = new FileInputStream("src/test/resources/dict/dw_category_grouping_names.dat"); ArrayList<String> str = loadStrings(is); Collections.sort(str, new ByteComparator<String>(new StringBytesConverter())); testStringDictionary(str, null); }
private ArrayList<String> getTestData(int count) { RandomStrings rs = new RandomStrings(count); Iterator<String> itr = rs.iterator(); ArrayList<String> testData = new ArrayList<>(); while (itr.hasNext()) testData.add(itr.next()); Collections.sort(testData, new ByteComparator<String>(new StringBytesConverter())); evaluateDataSize(testData); return testData; }
@SuppressWarnings("rawtypes") private static Dictionary strsToDict(Collection<String> strs) { TrieDictionaryBuilder<String> builder = new TrieDictionaryBuilder<>(new StringBytesConverter()); for (String str : strs) { builder.addValue(str); } return builder.build(0); }
@Test public void englishWordsTest() throws Exception { InputStream is = new FileInputStream("src/test/resources/dict/english-words.80 (scowl-2015.05.18).txt"); ArrayList<String> str = loadStrings(is); Collections.sort(str, new ByteComparator<String>(new StringBytesConverter())); testStringDictionary(str, null); }
@Test public void notFoundTest() { ArrayList<String> str = new ArrayList<String>(); str.add("part"); str.add("par"); str.add("partition"); str.add("party"); str.add("parties"); str.add("paint"); Collections.sort(str, new ByteComparator<String>(new StringBytesConverter())); ArrayList<String> notFound = new ArrayList<String>(); notFound.add(""); notFound.add("p"); notFound.add("pa"); notFound.add("pb"); notFound.add("parti"); notFound.add("partz"); notFound.add("partyz"); testStringDictionary(str, notFound); }
public static void main(String[] args) throws Exception { TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter()); b.addValue("part"); b.print();
@Before public void before() { int dataSize = 100 * 10000; TrieDictionaryBuilder<String> b1 = new TrieDictionaryBuilder<>(new StringBytesConverter()); TrieDictionaryForestBuilder<String> b2 = new TrieDictionaryForestBuilder<String>(new StringBytesConverter(), 0, 5); this.rawData = genStringDataSet(dataSize); for (String str : this.rawData) { b1.addValue(str); b2.addValue(str); } this.oldDict = b1.build(0); this.newDict = b2.build(); System.out.println("new dict split tree size : " + ((TrieDictionaryForest<String>) newDict).getTrees().size()); }