public InputConverterUnitForRawData(CubeDesc cubeDesc, IJoinedFlatTableDesc flatDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) { this.flatDesc = new CubeJoinedFlatTableEnrich(flatDesc, cubeDesc); this.measureCount = cubeDesc.getMeasures().size(); this.measureDescs = cubeDesc.getMeasures().toArray(new MeasureDesc[measureCount]); this.measureIngesters = MeasureIngester.create(cubeDesc.getMeasures()); this.dictionaryMap = dictionaryMap; this.kvBuilder = new KeyValueBuilder(this.flatDesc); }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME); segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); final KylinConfig kylinConfig = AbstractHadoopJob.loadKylinPropsAndMetadata(); cube = CubeManager.getInstance(kylinConfig).getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSegment = cube.getSegmentById(segmentID); CubeJoinedFlatTableEnrich intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); Map<TblColRef, Dictionary<String>> dictionaryMap = DictionaryGetterUtil.getDictionaryMap(cubeSegment, context.getInputSplit(), context.getConfiguration()); baseCuboidBuilder = new BaseCuboidBuilder(kylinConfig, cubeDesc, cubeSegment, intermediateTableDesc, dictionaryMap); }
@Override protected void doSetup(Context context) throws IOException { Configuration conf = context.getConfiguration(); bindCurrentConfiguration(conf); config = AbstractHadoopJob.loadKylinPropsAndMetadata(); cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); cube = CubeManager.getInstance(config).getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSeg = cube.getSegmentById(conf.get(BatchConstants.CFG_CUBE_SEGMENT_ID)); flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat(); intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSeg), cubeDesc); globalColumns = cubeDesc.getAllGlobalDictColumns(); globalColumnIndex = new int[globalColumns.size()]; globalColumnValues = Lists.newArrayListWithExpectedSize(globalColumns.size()); for (int i = 0; i < globalColumns.size(); i++) { TblColRef colRef = globalColumns.get(i); int columnIndexOnFlatTbl = intermediateTableDesc.getColumnIndex(colRef); globalColumnIndex[i] = columnIndexOnFlatTbl; globalColumnValues.add(Sets.<String> newHashSet()); } splitKey = DictionaryGetterUtil.getInputSplitSignature(cubeSeg, context.getInputSplit()); }
public static Map<Long, HLLCounter> sampling(CubeDesc cubeDesc, IJoinedFlatTableDesc flatDescIn, Iterable<List<String>> streams) { final CubeJoinedFlatTableEnrich flatDesc = new CubeJoinedFlatTableEnrich(flatDescIn, cubeDesc); final int rowkeyLength = cubeDesc.getRowkey().getRowKeyColumns().length; final Set<Long> allCuboidIds = cubeDesc.getInitialCuboidScheduler().getAllCuboidIds();
@Override protected void doSetup(Context context) throws IOException { Configuration conf = context.getConfiguration(); bindCurrentConfiguration(conf); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); cube = CubeManager.getInstance(config).getCube(cubeName); cubeSeg = cube.getSegmentById(conf.get(BatchConstants.CFG_CUBE_SEGMENT_ID)); cubeDesc = cube.getDescriptor(); baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); reducerMapping = new FactDistinctColumnsReducerMapping(cube); allCols = reducerMapping.getAllDimDictCols(); flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat(); intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSeg), cubeDesc); columnIndex = new int[allCols.size()]; for (int i = 0; i < allCols.size(); i++) { TblColRef colRef = allCols.get(i); int columnIndexOnFlatTbl = intermediateTableDesc.getColumnIndex(colRef); columnIndex[i] = columnIndexOnFlatTbl; } }
String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); cubeSegment = cube.getSegmentById(segmentID); flatDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
@Override public Tuple2<ByteArray, Object[]> call(String[] rowArray) throws Exception { if (initialized == false) { synchronized (SparkCubingByLayer.class) { if (initialized == false) { KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); CubeDesc cubeDesc = cubeInstance.getDescriptor(); CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId); CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich( EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId); baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc, AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid), MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap()); initialized = true; } } } } baseCuboidBuilder.resetAggrs(); byte[] rowKey = baseCuboidBuilder.buildKey(rowArray); Object[] result = baseCuboidBuilder.buildValueObjects(rowArray); return new Tuple2<>(new ByteArray(rowKey), result); } }
private void init() { KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); CubeDesc cubeDesc = cubeInstance.getDescriptor(); CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId); CubeJoinedFlatTableEnrich intermediateTableDesc = new CubeJoinedFlatTableEnrich( EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); keyValueBuilder = new KeyValueBuilder(intermediateTableDesc); reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance); tmpbuf = ByteBuffer.allocate(4096); int[] rokeyColumnIndexes = intermediateTableDesc.getRowKeyColumnIndexes(); Long[] cuboidIds = getCuboidIds(cubeSegment); Integer[][] cuboidsBitSet = CuboidUtil.getCuboidBitSet(cuboidIds, rokeyColumnIndexes.length); boolean isNewAlgorithm = isUsePutRowKeyToHllNewAlgorithm(cubeDesc); HLLCounter[] cuboidsHLL = getInitCuboidsHLL(cuboidIds.length, cubeDesc.getConfig().getCubeStatsHLLPrecision()); cuboidStatCalculator = new CuboidStatCalculator(rokeyColumnIndexes, cuboidIds, cuboidsBitSet, isNewAlgorithm, cuboidsHLL); allCols = reducerMapping.getAllDimDictCols(); initDictColDeduper(cubeDesc); initColumnIndex(intermediateTableDesc); initialized = true; } }
public InputConverterUnitForRawData(CubeDesc cubeDesc, IJoinedFlatTableDesc flatDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) { this.flatDesc = new CubeJoinedFlatTableEnrich(flatDesc, cubeDesc); this.measureCount = cubeDesc.getMeasures().size(); this.measureDescs = cubeDesc.getMeasures().toArray(new MeasureDesc[measureCount]); this.measureIngesters = MeasureIngester.create(cubeDesc.getMeasures()); this.dictionaryMap = dictionaryMap; this.kvBuilder = new KeyValueBuilder(this.flatDesc); }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME); segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); final KylinConfig kylinConfig = AbstractHadoopJob.loadKylinPropsAndMetadata(); cube = CubeManager.getInstance(kylinConfig).getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSegment = cube.getSegmentById(segmentID); CubeJoinedFlatTableEnrich intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); Map<TblColRef, Dictionary<String>> dictionaryMap = DictionaryGetterUtil.getDictionaryMap(cubeSegment, context.getInputSplit(), context.getConfiguration()); baseCuboidBuilder = new BaseCuboidBuilder(kylinConfig, cubeDesc, cubeSegment, intermediateTableDesc, dictionaryMap); }
@Override protected void doSetup(Context context) throws IOException { Configuration conf = context.getConfiguration(); bindCurrentConfiguration(conf); config = AbstractHadoopJob.loadKylinPropsAndMetadata(); cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); cube = CubeManager.getInstance(config).getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSeg = cube.getSegmentById(conf.get(BatchConstants.CFG_CUBE_SEGMENT_ID)); flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat(); intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSeg), cubeDesc); globalColumns = cubeDesc.getAllGlobalDictColumns(); globalColumnIndex = new int[globalColumns.size()]; globalColumnValues = Lists.newArrayListWithExpectedSize(globalColumns.size()); for (int i = 0; i < globalColumns.size(); i++) { TblColRef colRef = globalColumns.get(i); int columnIndexOnFlatTbl = intermediateTableDesc.getColumnIndex(colRef); globalColumnIndex[i] = columnIndexOnFlatTbl; globalColumnValues.add(Sets.<String> newHashSet()); } splitKey = DictionaryGetterUtil.getInputSplitSignature(cubeSeg, context.getInputSplit()); }
public static Map<Long, HLLCounter> sampling(CubeDesc cubeDesc, IJoinedFlatTableDesc flatDescIn, Iterable<List<String>> streams) { final CubeJoinedFlatTableEnrich flatDesc = new CubeJoinedFlatTableEnrich(flatDescIn, cubeDesc); final int rowkeyLength = cubeDesc.getRowkey().getRowKeyColumns().length; final Set<Long> allCuboidIds = cubeDesc.getInitialCuboidScheduler().getAllCuboidIds();
@Override protected void doSetup(Context context) throws IOException { Configuration conf = context.getConfiguration(); bindCurrentConfiguration(conf); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); cube = CubeManager.getInstance(config).getCube(cubeName); cubeSeg = cube.getSegmentById(conf.get(BatchConstants.CFG_CUBE_SEGMENT_ID)); cubeDesc = cube.getDescriptor(); baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); reducerMapping = new FactDistinctColumnsReducerMapping(cube); allCols = reducerMapping.getAllDimDictCols(); flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat(); intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSeg), cubeDesc); columnIndex = new int[allCols.size()]; for (int i = 0; i < allCols.size(); i++) { TblColRef colRef = allCols.get(i); int columnIndexOnFlatTbl = intermediateTableDesc.getColumnIndex(colRef); columnIndex[i] = columnIndexOnFlatTbl; } }
String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); cubeSegment = cube.getSegmentById(segmentID); flatDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
@Override public Tuple2<ByteArray, Object[]> call(String[] rowArray) throws Exception { if (initialized == false) { synchronized (SparkCubingByLayer.class) { if (initialized == false) { KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); CubeDesc cubeDesc = cubeInstance.getDescriptor(); CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId); CubeJoinedFlatTableEnrich interDesc = new CubeJoinedFlatTableEnrich( EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); long baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); Cuboid baseCuboid = Cuboid.findForMandatory(cubeDesc, baseCuboidId); baseCuboidBuilder = new BaseCuboidBuilder(kConfig, cubeDesc, cubeSegment, interDesc, AbstractRowKeyEncoder.createInstance(cubeSegment, baseCuboid), MeasureIngester.create(cubeDesc.getMeasures()), cubeSegment.buildDictionaryMap()); initialized = true; } } } } baseCuboidBuilder.resetAggrs(); byte[] rowKey = baseCuboidBuilder.buildKey(rowArray); Object[] result = baseCuboidBuilder.buildValueObjects(rowArray); return new Tuple2<>(new ByteArray(rowKey), result); } }
private void init() { KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); CubeDesc cubeDesc = cubeInstance.getDescriptor(); CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId); CubeJoinedFlatTableEnrich intermediateTableDesc = new CubeJoinedFlatTableEnrich( EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); keyValueBuilder = new KeyValueBuilder(intermediateTableDesc); reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance); tmpbuf = ByteBuffer.allocate(4096); int[] rokeyColumnIndexes = intermediateTableDesc.getRowKeyColumnIndexes(); Long[] cuboidIds = getCuboidIds(cubeSegment); Integer[][] cuboidsBitSet = CuboidUtil.getCuboidBitSet(cuboidIds, rokeyColumnIndexes.length); boolean isNewAlgorithm = isUsePutRowKeyToHllNewAlgorithm(cubeDesc); HLLCounter[] cuboidsHLL = getInitCuboidsHLL(cuboidIds.length, cubeDesc.getConfig().getCubeStatsHLLPrecision()); cuboidStatCalculator = new CuboidStatCalculator(rokeyColumnIndexes, cuboidIds, cuboidsBitSet, isNewAlgorithm, cuboidsHLL); allCols = reducerMapping.getAllDimDictCols(); initDictColDeduper(cubeDesc); initColumnIndex(intermediateTableDesc); initialized = true; } }