private Boolean checkNeedMerging(TblColRef col) throws IOException { Boolean ret = dictsNeedMerging.get(col); if (ret != null) return ret; else { ret = cubeDesc.getRowkey().isUseDictionary(col); if (ret) { String dictTable = (String) DictionaryManager.getInstance(config).decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0]; ret = cubeDesc.getFactTable().equalsIgnoreCase(dictTable); } dictsNeedMerging.put(col, ret); return ret; } }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf); cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); cube = CubeManager.getInstance(config).getCube(cubeName); cubeDesc = cube.getDescriptor(); intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); List<TblColRef> columns = baseCuboid.getColumns(); ArrayList<Integer> factDictCols = new ArrayList<Integer>(); RowKeyDesc rowkey = cubeDesc.getRowkey(); DictionaryManager dictMgr = DictionaryManager.getInstance(config); for (int i = 0; i < columns.size(); i++) { TblColRef col = columns.get(i); if (rowkey.isUseDictionary(col) == false) continue; String scanTable = (String) dictMgr.decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0]; if (cubeDesc.getModel().isFactTable(scanTable)) { factDictCols.add(i); } } this.factDictCols = new int[factDictCols.size()]; for (int i = 0; i < factDictCols.size(); i++) this.factDictCols[i] = factDictCols.get(i); schema = HCatInputFormat.getTableSchema(context.getConfiguration()); }
public DictionaryInfo buildDictionary(DataModelDesc model, String dict, TblColRef col, String factColumnsPath) throws IOException { logger.info("building dictionary for " + col); Object[] tmp = decideSourceData(model, dict, col, factColumnsPath); String srcTable = (String) tmp[0]; String srcCol = (String) tmp[1]; int srcColIdx = (Integer) tmp[2]; ReadableTable inpTable = (ReadableTable) tmp[3]; DictionaryInfo dictInfo = new DictionaryInfo(srcTable, srcCol, srcColIdx, col.getDatatype(), inpTable.getSignature(), inpTable.getColumnDelimeter()); String dupDict = checkDupByInfo(dictInfo); if (dupDict != null) { logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict); } Dictionary<?> dictionary = DictionaryGenerator.buildDictionary(dictInfo, inpTable); return trySaveNewDict(dictionary, dictInfo); }
for (TblColRef col : dim.getColumnRefs()) { if (newSeg.getCubeDesc().getRowkey().isUseDictionary(col)) { String dictTable = (String) dictMgr.decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0]; if (cubeDesc.getFactTable().equalsIgnoreCase(dictTable)) { colsNeedMeringDict.add(col);
private Boolean checkNeedMerging(TblColRef col) throws IOException { Boolean ret = dictsNeedMerging.get(col); if (ret != null) return ret; else { ret = cubeDesc.getRowkey().isUseDictionary(col); if (ret) { String dictTable = (String) DictionaryManager.getInstance(config).decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0]; ret = cubeDesc.getFactTable().equalsIgnoreCase(dictTable); } dictsNeedMerging.put(col, ret); return ret; } }
@Override protected void setup(Context context) throws IOException { super.publishConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(conf); cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); cube = CubeManager.getInstance(config).getCube(cubeName); cubeDesc = cube.getDescriptor(); intermediateTableDesc = new CubeJoinedFlatTableDesc(cubeDesc, null); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findById(cubeDesc, baseCuboidId); List<TblColRef> columns = baseCuboid.getColumns(); ArrayList<Integer> factDictCols = new ArrayList<Integer>(); RowKeyDesc rowkey = cubeDesc.getRowkey(); DictionaryManager dictMgr = DictionaryManager.getInstance(config); for (int i = 0; i < columns.size(); i++) { TblColRef col = columns.get(i); if (rowkey.isUseDictionary(col) == false) continue; String scanTable = (String) dictMgr.decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0]; if (cubeDesc.getModel().isFactTable(scanTable)) { factDictCols.add(i); } } this.factDictCols = new int[factDictCols.size()]; for (int i = 0; i < factDictCols.size(); i++) this.factDictCols[i] = factDictCols.get(i); schema = HCatInputFormat.getTableSchema(context.getConfiguration()); }
for (TblColRef col : dim.getColumnRefs()) { if (newSeg.getCubeDesc().getRowkey().isUseDictionary(col)) { String dictTable = (String) dictMgr.decideSourceData(cubeDesc.getModel(), cubeDesc.getRowkey().getDictionary(col), col, null)[0]; if (cubeDesc.getFactTable().equalsIgnoreCase(dictTable)) { colsNeedMeringDict.add(col);
public DictionaryInfo buildDictionary(DataModelDesc model, String dict, TblColRef col, String factColumnsPath) throws IOException { logger.info("building dictionary for " + col); Object[] tmp = decideSourceData(model, dict, col, factColumnsPath); String srcTable = (String) tmp[0]; String srcCol = (String) tmp[1]; int srcColIdx = (Integer) tmp[2]; ReadableTable inpTable = (ReadableTable) tmp[3]; if (!inpTable.exists()) return null; DictionaryInfo dictInfo = new DictionaryInfo(srcTable, srcCol, srcColIdx, col.getDatatype(), inpTable.getSignature()); String dupDict = checkDupByInfo(dictInfo); if (dupDict != null) { logger.info("Identical dictionary input " + dictInfo.getInput() + ", reuse existing dictionary at " + dupDict); return getDictionaryInfo(dupDict); } Dictionary<?> dictionary = DictionaryGenerator.buildDictionary(dictInfo, inpTable); return trySaveNewDict(dictionary, dictInfo); }