private void initDictInfo(Dictionary<String> newDict, DictionaryInfo newDictInfo) { newDictInfo.setCardinality(newDict.getSize()); newDictInfo.setDictionaryObject(newDict); newDictInfo.setDictionaryClass(newDict.getClass().getName()); }
public static Dictionary<?> buildDictionaryFromValueList(DictionaryInfo info, List<byte[]> values) { info.setCardinality(values.size()); Dictionary dict = null; int baseId = 0; // always 0 for now int nSamples = 5; ArrayList samples = new ArrayList(); // build dict, case by data type DataType dataType = DataType.getInstance(info.getDataType()); if (dataType.isDateTimeFamily()) dict = buildDateStrDict(values, baseId, nSamples, samples); else if (dataType.isNumberFamily()) dict = buildNumberDict(values, baseId, nSamples, samples); else dict = buildStringDict(values, baseId, nSamples, samples); // log a few samples StringBuilder buf = new StringBuilder(); for (Object s : samples) { if (buf.length() > 0) buf.append(", "); buf.append(s.toString()).append("=>").append(dict.getIdFromValue(s)); } logger.info("Dictionary value samples: " + buf.toString()); logger.info("Dictionary cardinality " + info.getCardinality()); if (values.size() > DICT_MAX_CARDINALITY) throw new IllegalArgumentException("Too high cardinality is not suitable for dictionary -- " + info.getSourceTable() + "." + info.getSourceColumn() + " cardinality: " + values.size()); return dict; }
private void initDictInfo(Dictionary<String> newDict, DictionaryInfo newDictInfo) { newDictInfo.setCardinality(newDict.getSize()); newDictInfo.setDictionaryObject(newDict); newDictInfo.setDictionaryClass(newDict.getClass().getName()); }
public DictionaryInfo trySaveNewDict(Dictionary<?> newDict, DictionaryInfo newDictInfo) throws IOException { String dupDict = checkDupByContent(newDictInfo, newDict); if (dupDict != null) { logger.info("Identical dictionary content " + newDict + ", reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict); } newDictInfo.setCardinality(newDict.getSize()); newDictInfo.setDictionaryObject(newDict); newDictInfo.setDictionaryClass(newDict.getClass().getName()); save(newDictInfo); dictCache.put(newDictInfo.getResourcePath(), newDictInfo); return newDictInfo; }