@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); mos = new MultipleOutputs(context); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); List<TblColRef> uhcColumns = cubeDesc.getAllUHCColumns(); int taskId = context.getTaskAttemptID().getTaskID().getId(); col = uhcColumns.get(taskId); logger.info("column name: " + col.getIdentity()); if (cube.getDescriptor().getShardByColumns().contains(col)) { //for ShardByColumns builder = DictionaryGenerator.newDictionaryBuilder(col.getType()); builder.init(null, 0, null); } else { //for GlobalDictionaryColumns String hdfsDir = conf.get(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR); DictionaryInfo dictionaryInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype()); String builderClass = cubeDesc.getDictionaryBuilderClass(col); builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass); builder.init(dictionaryInfo, 0, hdfsDir); } }
@Override public void doReduce(SelfDefineSortableKey skey, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { Text key = skey.getText(); String value = Bytes.toString(key.getBytes(), 1, key.getLength() - 1); builder.addValue(value); }
@Override protected void doCleanup(Context context) throws IOException, InterruptedException { Dictionary<String> dict = builder.build(); outputDict(col, dict); }
builder.init(null, 0, null);
builder.addValue(value); } else { byte[] keyBytes = Bytes.copy(key.getBytes(), 1, key.getLength() - 1);
@Override protected void doCleanup(Context context) throws IOException, InterruptedException { if (isStatistics) { //output the hll info; List<Long> allCuboids = Lists.newArrayList(); allCuboids.addAll(cuboidHLLMap.keySet()); Collections.sort(allCuboids); logMapperAndCuboidStatistics(allCuboids); // for human check outputStatistics(allCuboids); } else { //dimension col if (cubeDesc.listDimensionColumnsExcludingDerived(true).contains(col)) { outputDimRangeInfo(); } // dic col if (buildDictInReducer) { Dictionary<String> dict = builder.build(); outputDict(col, dict); } } mos.close(); }
builder.init(null, 0, null);
@Override public void doReduce(SelfDefineSortableKey skey, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException { Text key = skey.getText(); String value = Bytes.toString(key.getBytes(), 1, key.getLength() - 1); builder.addValue(value); }
@Override protected void doCleanup(Context context) throws IOException, InterruptedException { Dictionary<String> dict = builder.build(); outputDict(col, dict); }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); Configuration conf = context.getConfiguration(); mos = new MultipleOutputs(context); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); String cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName); CubeDesc cubeDesc = cube.getDescriptor(); List<TblColRef> uhcColumns = cubeDesc.getAllUHCColumns(); int taskId = context.getTaskAttemptID().getTaskID().getId(); col = uhcColumns.get(taskId); logger.info("column name: " + col.getIdentity()); if (cube.getDescriptor().getShardByColumns().contains(col)) { //for ShardByColumns builder = DictionaryGenerator.newDictionaryBuilder(col.getType()); builder.init(null, 0, null); } else { //for GlobalDictionaryColumns String hdfsDir = conf.get(BatchConstants.CFG_GLOBAL_DICT_BASE_DIR); DictionaryInfo dictionaryInfo = new DictionaryInfo(col.getColumnDesc(), col.getDatatype()); String builderClass = cubeDesc.getDictionaryBuilderClass(col); builder = (IDictionaryBuilder) ClassUtil.newInstance(builderClass); builder.init(dictionaryInfo, 0, hdfsDir); } }
builder.addValue(value); } else { byte[] keyBytes = Bytes.copy(key.getBytes(), 1, key.getLength() - 1);
@Override protected void doCleanup(Context context) throws IOException, InterruptedException { if (isStatistics) { //output the hll info; List<Long> allCuboids = Lists.newArrayList(); allCuboids.addAll(cuboidHLLMap.keySet()); Collections.sort(allCuboids); logMapperAndCuboidStatistics(allCuboids); // for human check outputStatistics(allCuboids); } else { //dimension col if (cubeDesc.listDimensionColumnsExcludingDerived(true).contains(col)) { outputDimRangeInfo(); } // dic col if (buildDictInReducer) { Dictionary<String> dict = builder.build(); outputDict(col, dict); } } mos.close(); }
builder.init(null, 0, null);
builder.init(null, 0, null);