private DictionaryInfo findLargestDictInfo(DictionaryInfo dictInfo) throws IOException { final ResourceStore store = getStore(); final List<DictionaryInfo> allResources = store.getAllResources(dictInfo.getResourceDir(), DictionaryInfoSerializer.INFO_SERIALIZER); DictionaryInfo largestDict = null; for (DictionaryInfo dictionaryInfo : allResources) { if (largestDict == null) { largestDict = dictionaryInfo; continue; } if (largestDict.getCardinality() < dictionaryInfo.getCardinality()) { largestDict = dictionaryInfo; } } return largestDict; }
public static Dictionary<?> buildDictionaryFromValueList(DictionaryInfo info, List<byte[]> values) { info.setCardinality(values.size()); Dictionary dict = null; int baseId = 0; // always 0 for now int nSamples = 5; ArrayList samples = new ArrayList(); // build dict, case by data type DataType dataType = DataType.getInstance(info.getDataType()); if (dataType.isDateTimeFamily()) dict = buildDateStrDict(values, baseId, nSamples, samples); else if (dataType.isNumberFamily()) dict = buildNumberDict(values, baseId, nSamples, samples); else dict = buildStringDict(values, baseId, nSamples, samples); // log a few samples StringBuilder buf = new StringBuilder(); for (Object s : samples) { if (buf.length() > 0) buf.append(", "); buf.append(s.toString()).append("=>").append(dict.getIdFromValue(s)); } logger.info("Dictionary value samples: " + buf.toString()); logger.info("Dictionary cardinality " + info.getCardinality()); if (values.size() > DICT_MAX_CARDINALITY) throw new IllegalArgumentException("Too high cardinality is not suitable for dictionary -- " + info.getSourceTable() + "." + info.getSourceColumn() + " cardinality: " + values.size()); return dict; }
@SuppressWarnings("unchecked") private void touchDictValues(DictionaryInfo info1) { Dictionary<String> dict = (Dictionary<String>) info1.getDictionaryObject(); HashSet<String> set = new HashSet<String>(); for (int i = 0, n = info1.getCardinality(); i < n; i++) { set.add(dict.getValueFromId(i)); } assertEquals(info1.getCardinality(), set.size()); } }
private DictionaryInfo findLargestDictInfo(DictionaryInfo dictInfo) throws IOException { final ResourceStore store = getStore(); final List<DictionaryInfo> allResources = store.getAllResources(dictInfo.getResourceDir(), DictionaryInfoSerializer.INFO_SERIALIZER); DictionaryInfo largestDict = null; for (DictionaryInfo dictionaryInfo : allResources) { if (largestDict == null) { largestDict = dictionaryInfo; continue; } if (largestDict.getCardinality() < dictionaryInfo.getCardinality()) { largestDict = dictionaryInfo; } } return largestDict; }