private DictionaryInfo createDictionaryInfo(TblColRef col, IReadableTable inpTable) throws IOException { TableSignature inputSig = inpTable.getSignature(); if (inputSig == null) // table does not exists throw new IllegalStateException("Input table does not exist: " + inpTable); DictionaryInfo dictInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype(), inputSig); return dictInfo; }
private static DictionaryInfo createDictInfo(String[] values) { MockDictionary mockDict = new MockDictionary(); mockDict.values = values; DictionaryInfo info = new DictionaryInfo(); info.setDictionaryObject(mockDict); return info; }
@SuppressWarnings("unchecked") public static Map<TblColRef, Dictionary<String>> writeDictionary(CubeSegment cubeSegment, Map<TblColRef, Dictionary<String>> dictionaryMap, long startOffset, long endOffset) { Map<TblColRef, Dictionary<String>> realDictMap = Maps.newHashMap(); for (Map.Entry<TblColRef, Dictionary<String>> entry : dictionaryMap.entrySet()) { final TblColRef tblColRef = entry.getKey(); final Dictionary<String> dictionary = entry.getValue(); IReadableTable.TableSignature signature = new IReadableTable.TableSignature(); signature.setLastModifiedTime(System.currentTimeMillis()); signature.setPath(String.format(Locale.ROOT, "streaming_%s_%s", startOffset, endOffset)); signature.setSize(endOffset - startOffset); DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature); logger.info("writing dictionary for TblColRef:" + tblColRef.toString()); DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig()); try { DictionaryInfo realDict = dictionaryManager.trySaveNewDict(dictionary, dictInfo); cubeSegment.putDictResPath(tblColRef, realDict.getResourcePath()); realDictMap.put(tblColRef, (Dictionary<String>) realDict.getDictionaryObject()); } catch (IOException e) { throw new RuntimeException("error save dictionary for column:" + tblColRef, e); } } return realDictMap; }
DictionaryInfo newDictInfo = new DictionaryInfo(firstDictInfo); TableSignature signature = newDictInfo.getInput(); signature.setSize(totalSize);
public void removeDictionaries(String srcTable, String srcCol) throws IOException { DictionaryInfo info = new DictionaryInfo(); info.setSourceTable(srcTable); info.setSourceColumn(srcCol); ResourceStore store = getStore(); NavigableSet<String> existings = store.listResources(info.getResourceDir()); if (existings == null) return; for (String existing : existings) removeDictionary(existing); }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); mos = new MultipleOutputs(context); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); List<TblColRef> uhcColumns = cubeDesc.getAllUHCColumns(); int taskId = context.getTaskAttemptID().getTaskID().getId(); col = uhcColumns.get(taskId); logger.info("column name: " + col.getIdentity()); if (cube.getDescriptor().getShardByColumns().contains(col)) { //for ShardByColumns builder = DictionaryGenerator.newDictionaryBuilder(col.getType()); builder.init(null, 0, null); } else { //for GlobalDictionaryColumns String hdfsDir = conf.get(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR); DictionaryInfo dictionaryInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype()); String builderClass = cubeDesc.getDictionaryBuilderClass(col); builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass); builder.init(dictionaryInfo, 0, hdfsDir); } }
public DictionaryInfo buildDictionary(DataModelDesc model, String dict, TblColRef col, String factColumnsPath) throws IOException { logger.info("building dictionary for " + col); Object[] tmp = decideSourceData(model, dict, col, factColumnsPath); String srcTable = (String) tmp[0]; String srcCol = (String) tmp[1]; int srcColIdx = (Integer) tmp[2]; ReadableTable inpTable = (ReadableTable) tmp[3]; DictionaryInfo dictInfo = new DictionaryInfo(srcTable, srcCol, srcColIdx, col.getDatatype(), inpTable.getSignature(), inpTable.getColumnDelimeter()); String dupDict = checkDupByInfo(dictInfo); if (dupDict != null) { logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict); } Dictionary<?> dictionary = DictionaryGenerator.buildDictionary(dictInfo, inpTable); return trySaveNewDict(dictionary, dictInfo); }
public void removeDictionaries(String srcTable, String srcCol) throws IOException { DictionaryInfo info = new DictionaryInfo(); info.setSourceTable(srcTable); info.setSourceColumn(srcCol); ResourceStore store = MetadataManager.getInstance(config).getStore(); ArrayList<String> existings = store.listResources(info.getResourceDir()); if (existings == null) return; for (String existing : existings) removeDictionary(existing); }
public DictionaryInfo mergeDictionary(List<DictionaryInfo> dicts) throws IOException { DictionaryInfo firstDictInfo = null; int totalSize = 0; for (DictionaryInfo info : dicts) { // check if (firstDictInfo == null) { firstDictInfo = info; } else { if (!firstDictInfo.isDictOnSameColumn(info)) { throw new IllegalArgumentException("Merging dictionaries are not structurally equal(regardless of signature)."); } } totalSize += info.getInput().getSize(); } if (firstDictInfo == null) { throw new IllegalArgumentException("DictionaryManager.mergeDictionary input cannot be null"); } DictionaryInfo newDictInfo = new DictionaryInfo(firstDictInfo); TableSignature signature = newDictInfo.getInput(); signature.setSize(totalSize); signature.setLastModifiedTime(System.currentTimeMillis()); signature.setPath("merged_with_no_original_path"); String dupDict = checkDupByInfo(newDictInfo); if (dupDict != null) { logger.info("Identical dictionary input " + newDictInfo.getInput() + ", reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict); } Dictionary<?> newDict = DictionaryGenerator.mergeDictionaries(newDictInfo, dicts); return trySaveNewDict(newDict, newDictInfo); }
private DictionaryInfo makeSharedDict() throws IOException { TableSignature signature = new TableSignature(); signature.setSize(100); signature.setLastModifiedTime(System.currentTimeMillis()); signature.setPath("fake_common_dict"); DictionaryInfo newDictInfo = new DictionaryInfo("", "", 0, "string", signature, ""); List<byte[]> values = new ArrayList<byte[]>(); values.add(new byte[] { 101, 101, 101 }); values.add(new byte[] { 102, 102, 102 }); Dictionary<?> dict = DictionaryGenerator.buildDictionaryFromValueList(newDictInfo, values); dictionaryManager.trySaveNewDict(dict, newDictInfo); ((TrieDictionary) dict).dump(System.out); return newDictInfo; }
signature.setPath("fake_dict_for" + lfn.getName() + segment.getName()); DictionaryInfo newDictInfo = new DictionaryInfo(lfn.getTable(), lfn.getColumn().getName(), lfn.getColumn().getZeroBasedIndex(), "string", signature, "");
private DictionaryInfo createDictionaryInfo(TblColRef col, IReadableTable inpTable) throws IOException { TableSignature inputSig = inpTable.getSignature(); if (inputSig == null) // table does not exists throw new IllegalStateException("Input table does not exist: " + inpTable); DictionaryInfo dictInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype(), inputSig); return dictInfo; }
@SuppressWarnings("unchecked") public static Map<TblColRef, Dictionary<String>> writeDictionary(CubeSegment cubeSegment, Map<TblColRef, Dictionary<String>> dictionaryMap, long startOffset, long endOffset) { Map<TblColRef, Dictionary<String>> realDictMap = Maps.newHashMap(); for (Map.Entry<TblColRef, Dictionary<String>> entry : dictionaryMap.entrySet()) { final TblColRef tblColRef = entry.getKey(); final Dictionary<String> dictionary = entry.getValue(); IReadableTable.TableSignature signature = new IReadableTable.TableSignature(); signature.setLastModifiedTime(System.currentTimeMillis()); signature.setPath(String.format(Locale.ROOT, "streaming_%s_%s", startOffset, endOffset)); signature.setSize(endOffset - startOffset); DictionaryInfo dictInfo = new DictionaryInfo(tblColRef.getColumnDesc(), tblColRef.getDatatype(), signature); logger.info("writing dictionary for TblColRef:" + tblColRef.toString()); DictionaryManager dictionaryManager = DictionaryManager.getInstance(cubeSegment.getCubeDesc().getConfig()); try { DictionaryInfo realDict = dictionaryManager.trySaveNewDict(dictionary, dictInfo); cubeSegment.putDictResPath(tblColRef, realDict.getResourcePath()); realDictMap.put(tblColRef, (Dictionary<String>) realDict.getDictionaryObject()); } catch (IOException e) { throw new RuntimeException("error save dictionary for column:" + tblColRef, e); } } return realDictMap; }
DictionaryInfo newDictInfo = new DictionaryInfo(firstDictInfo); TableSignature signature = newDictInfo.getInput(); signature.setSize(totalSize);
public void removeDictionaries(String srcTable, String srcCol) throws IOException { DictionaryInfo info = new DictionaryInfo(); info.setSourceTable(srcTable); info.setSourceColumn(srcCol); ResourceStore store = getStore(); NavigableSet<String> existings = store.listResources(info.getResourceDir()); if (existings == null) return; for (String existing : existings) removeDictionary(existing); }
DictionaryInfo newDictInfo = new DictionaryInfo(firstDictInfo); TableSignature signature = newDictInfo.getInput(); signature.setSize(totalSize);
public DictionaryInfo buildDictionary(DataModelDesc model, String dict, TblColRef col, String factColumnsPath) throws IOException { logger.info("building dictionary for " + col); Object[] tmp = decideSourceData(model, dict, col, factColumnsPath); String srcTable = (String) tmp[0]; String srcCol = (String) tmp[1]; int srcColIdx = (Integer) tmp[2]; ReadableTable inpTable = (ReadableTable) tmp[3]; if (!inpTable.exists()) return null; DictionaryInfo dictInfo = new DictionaryInfo(srcTable, srcCol, srcColIdx, col.getDatatype(), inpTable.getSignature()); String dupDict = checkDupByInfo(dictInfo); if (dupDict != null) { logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict); } Dictionary<?> dictionary = DictionaryGenerator.buildDictionary(dictInfo, inpTable); return trySaveNewDict(dictionary, dictInfo); }
public void removeDictionaries(String srcTable, String srcCol) throws IOException { DictionaryInfo info = new DictionaryInfo(); info.setSourceTable(srcTable); info.setSourceColumn(srcCol); ResourceStore store = MetadataManager.getInstance(config).getStore(); ArrayList<String> existings = store.listResources(info.getResourceDir()); if (existings == null) return; for (String existing : existings) removeDictionary(existing); }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); mos = new MultipleOutputs(context); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); List<TblColRef> uhcColumns = cubeDesc.getAllUHCColumns(); int taskId = context.getTaskAttemptID().getTaskID().getId(); col = uhcColumns.get(taskId); logger.info("column name: " + col.getIdentity()); if (cube.getDescriptor().getShardByColumns().contains(col)) { //for ShardByColumns builder = DictionaryGenerator.newDictionaryBuilder(col.getType()); builder.init(null, 0, null); } else { //for GlobalDictionaryColumns String hdfsDir = conf.get(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR); DictionaryInfo dictionaryInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype()); String builderClass = cubeDesc.getDictionaryBuilderClass(col); builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass); builder.init(dictionaryInfo, 0, hdfsDir); } }