return dicts.get(0); } else { Dictionary<String> newDict = DictionaryGenerator.mergeDictionaries(DataType.getType(newDictInfo.getDataType()), dicts); return trySaveNewDict(newDict, newDictInfo);
private Dictionary<String> buildDictFromReadableTable(IReadableTable inpTable, DictionaryInfo dictInfo, String builderClass, TblColRef col) throws IOException { Dictionary<String> dictionary; IDictionaryValueEnumerator columnValueEnumerator = null; try { columnValueEnumerator = new TableColumnValueEnumerator(inpTable.getReader(), dictInfo.getSourceColumnIndex()); if (builderClass == null) { dictionary = DictionaryGenerator.buildDictionary(DataType.getType(dictInfo.getDataType()), columnValueEnumerator); } else { IDictionaryBuilder builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass); dictionary = DictionaryGenerator.buildDictionary(builder, dictInfo, columnValueEnumerator); } } catch (Exception ex) { throw new RuntimeException("Failed to create dictionary on " + col, ex); } finally { if (columnValueEnumerator != null) columnValueEnumerator.close(); } return dictionary; }
public static Dictionary<?> buildDictionaryFromValueList(DictionaryInfo info, List<byte[]> values) { info.setCardinality(values.size()); Dictionary dict = null; int baseId = 0; // always 0 for now int nSamples = 5; ArrayList samples = new ArrayList(); // build dict, case by data type DataType dataType = DataType.getInstance(info.getDataType()); if (dataType.isDateTimeFamily()) dict = buildDateStrDict(values, baseId, nSamples, samples); else if (dataType.isNumberFamily()) dict = buildNumberDict(values, baseId, nSamples, samples); else dict = buildStringDict(values, baseId, nSamples, samples); // log a few samples StringBuilder buf = new StringBuilder(); for (Object s : samples) { if (buf.length() > 0) buf.append(", "); buf.append(s.toString()).append("=>").append(dict.getIdFromValue(s)); } logger.info("Dictionary value samples: " + buf.toString()); logger.info("Dictionary cardinality " + info.getCardinality()); if (values.size() > DICT_MAX_CARDINALITY) throw new IllegalArgumentException("Too high cardinality is not suitable for dictionary -- " + info.getSourceTable() + "." + info.getSourceColumn() + " cardinality: " + values.size()); return dict; }
public static org.apache.kylin.common.util.Dictionary<?> buildDictionaryFromValueEnumerator(DictionaryInfo info, IDictionaryValueEnumerator valueEnumerator) throws IOException{ org.apache.kylin.common.util.Dictionary dict = null; int baseId = 0; // always 0 for now final int nSamples = 5; ArrayList samples = Lists.newArrayListWithCapacity(nSamples); // build dict, case by data type DataType dataType = DataType.getInstance(info.getDataType()); if (dataType.isDateTimeFamily()) dict = buildDateStrDict(valueEnumerator, baseId, nSamples, samples); else if (dataType.isNumberFamily()) dict = buildNumberDict(valueEnumerator, baseId, nSamples, samples); else dict = buildStringDict(valueEnumerator, baseId, nSamples, samples); // log a few samples StringBuilder buf = new StringBuilder(); for (Object s : samples) { if (buf.length() > 0) buf.append(", "); buf.append(s.toString()).append("=>").append(dict.getIdFromValue(s)); } logger.info("Dictionary value samples: " + buf.toString()); logger.info("Dictionary cardinality: " + dict.getSize()); if (dict instanceof TrieDictionary && dict.getSize() > DICT_MAX_CARDINALITY) throw new IllegalArgumentException("Too high cardinality is not suitable for dictionary -- " + info.getSourceTable() + "." + info.getSourceColumn() + " cardinality: " + dict.getSize()); return dict; }
return dicts.get(0); } else { Dictionary<String> newDict = DictionaryGenerator.mergeDictionaries(DataType.getType(newDictInfo.getDataType()), dicts); return trySaveNewDict(newDict, newDictInfo);
private Dictionary<String> buildDictFromReadableTable(IReadableTable inpTable, DictionaryInfo dictInfo, String builderClass, TblColRef col) throws IOException { Dictionary<String> dictionary; IDictionaryValueEnumerator columnValueEnumerator = null; try { columnValueEnumerator = new TableColumnValueEnumerator(inpTable.getReader(), dictInfo.getSourceColumnIndex()); if (builderClass == null) { dictionary = DictionaryGenerator.buildDictionary(DataType.getType(dictInfo.getDataType()), columnValueEnumerator); } else { IDictionaryBuilder builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass); dictionary = DictionaryGenerator.buildDictionary(builder, dictInfo, columnValueEnumerator); } } catch (Exception ex) { throw new RuntimeException("Failed to create dictionary on " + col, ex); } finally { if (columnValueEnumerator != null) columnValueEnumerator.close(); } return dictionary; }