public static Map<Long, HLLCounter> sampling(CubeDesc cubeDesc, IJoinedFlatTableDesc flatDescIn, Iterable<List<String>> streams) { final CubeJoinedFlatTableEnrich flatDesc = new CubeJoinedFlatTableEnrich(flatDescIn, cubeDesc); final int rowkeyLength = cubeDesc.getRowkey().getRowKeyColumns().length; final Set<Long> allCuboidIds = cubeDesc.getInitialCuboidScheduler().getAllCuboidIds(); final String cell = row.get(flatDesc.getRowKeyColumnIndexes()[i]); if (cell != null) { row_hashcodes[i] = hc.putString(cell).hash().asBytes();
@Override protected void doSetup(Context context) throws IOException { Configuration conf = context.getConfiguration(); bindCurrentConfiguration(conf); config = AbstractHadoopJob.loadKylinPropsAndMetadata(); cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); cube = CubeManager.getInstance(config).getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSeg = cube.getSegmentById(conf.get(BatchConstants.CFG_CUBE_SEGMENT_ID)); flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat(); intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSeg), cubeDesc); globalColumns = cubeDesc.getAllGlobalDictColumns(); globalColumnIndex = new int[globalColumns.size()]; globalColumnValues = Lists.newArrayListWithExpectedSize(globalColumns.size()); for (int i = 0; i < globalColumns.size(); i++) { TblColRef colRef = globalColumns.get(i); int columnIndexOnFlatTbl = intermediateTableDesc.getColumnIndex(colRef); globalColumnIndex[i] = columnIndexOnFlatTbl; globalColumnValues.add(Sets.<String> newHashSet()); } splitKey = DictionaryGetterUtil.getInputSplitSignature(cubeSeg, context.getInputSplit()); }
public KeyValueBuilder(CubeJoinedFlatTableEnrich intermediateTableDesc) { flatDesc = intermediateTableDesc; cubeDesc = flatDesc.getCubeDesc(); initNullStrings(); }
public InputConverterUnitForRawData(CubeDesc cubeDesc, IJoinedFlatTableDesc flatDesc, Map<TblColRef, Dictionary<String>> dictionaryMap) { this.flatDesc = new CubeJoinedFlatTableEnrich(flatDesc, cubeDesc); this.measureCount = cubeDesc.getMeasures().size(); this.measureDescs = cubeDesc.getMeasures().toArray(new MeasureDesc[measureCount]); this.measureIngesters = MeasureIngester.create(cubeDesc.getMeasures()); this.dictionaryMap = dictionaryMap; this.kvBuilder = new KeyValueBuilder(this.flatDesc); }
public String[] buildKey(String[] row) { int keySize = flatDesc.getRowKeyColumnIndexes().length; String[] key = new String[keySize]; for (int i = 0; i < keySize; i++) { key[i] = getCell(flatDesc.getRowKeyColumnIndexes()[i], row); } return key; }
private void initColumnIndex(CubeJoinedFlatTableEnrich intermediateTableDesc) { columnIndex = new int[allCols.size()]; for (int i = 0; i < allCols.size(); i++) { TblColRef colRef = allCols.get(i); int columnIndexOnFlatTbl = intermediateTableDesc.getColumnIndex(colRef); columnIndex[i] = columnIndexOnFlatTbl; } }
public CubeJoinedFlatTableEnrich(IJoinedFlatTableDesc flatDesc, CubeDesc cubeDesc) { // != works due to object cache if (cubeDesc.getModel() != flatDesc.getDataModel()) throw new IllegalArgumentException(); this.cubeDesc = cubeDesc; this.flatDesc = flatDesc; parseCubeDesc(); }
public String[] buildValueOf(int idxOfMeasure, String[] row) { MeasureDesc measure = cubeDesc.getMeasures().get(idxOfMeasure); FunctionDesc function = measure.getFunction(); int[] colIdxOnFlatTable = flatDesc.getMeasureColumnIndexes()[idxOfMeasure]; int paramCount = function.getParameterCount(); List<String> inputToMeasure = Lists.newArrayListWithExpectedSize(paramCount); // pick up parameter values ParameterDesc param = function.getParameter(); int colParamIdx = 0; // index among parameters of column type for (int i = 0; i < paramCount; i++, param = param.getNextParameter()) { String value; if (function.isCount()) { value = "1"; } else if (param.isColumnType()) { value = getCell(colIdxOnFlatTable[colParamIdx++], row); } else { value = param.getValue(); } inputToMeasure.add(value); } return inputToMeasure.toArray(new String[inputToMeasure.size()]); } }
@Override protected void doSetup(Context context) throws IOException { super.bindCurrentConfiguration(context.getConfiguration()); cubeName = context.getConfiguration().get(BatchConstants.CFG_CUBE_NAME); segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); final KylinConfig kylinConfig = AbstractHadoopJob.loadKylinPropsAndMetadata(); cube = CubeManager.getInstance(kylinConfig).getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSegment = cube.getSegmentById(segmentID); CubeJoinedFlatTableEnrich intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); Map<TblColRef, Dictionary<String>> dictionaryMap = DictionaryGetterUtil.getDictionaryMap(cubeSegment, context.getInputSplit(), context.getConfiguration()); baseCuboidBuilder = new BaseCuboidBuilder(kylinConfig, cubeDesc, cubeSegment, intermediateTableDesc, dictionaryMap); }
cuboidIdSplit = Arrays.copyOfRange(cuboidIds, start, end); CuboidStatCalculator calculator = new CuboidStatCalculator(i, intermediateTableDesc.getRowKeyColumnIndexes(), cuboidIdSplit, cuboidsBitSetSplit, isUsePutRowKeyToHllNewAlgorithm, cuboidsHLLSplit); cuboidStatCalculators[i] = calculator;
private void initColumnIndex(CubeJoinedFlatTableEnrich intermediateTableDesc) { columnIndex = new int[allCols.size()]; for (int i = 0; i < allCols.size(); i++) { TblColRef colRef = allCols.get(i); int columnIndexOnFlatTbl = intermediateTableDesc.getColumnIndex(colRef); columnIndex[i] = columnIndexOnFlatTbl; } }
public CubeJoinedFlatTableEnrich(IJoinedFlatTableDesc flatDesc, CubeDesc cubeDesc) { // != works due to object cache if (cubeDesc.getModel() != flatDesc.getDataModel()) throw new IllegalArgumentException(); this.cubeDesc = cubeDesc; this.flatDesc = flatDesc; parseCubeDesc(); }
public String[] buildValueOf(int idxOfMeasure, String[] row) { MeasureDesc measure = cubeDesc.getMeasures().get(idxOfMeasure); FunctionDesc function = measure.getFunction(); int[] colIdxOnFlatTable = flatDesc.getMeasureColumnIndexes()[idxOfMeasure]; int paramCount = function.getParameterCount(); List<String> inputToMeasure = Lists.newArrayListWithExpectedSize(paramCount); // pick up parameter values ParameterDesc param = function.getParameter(); int colParamIdx = 0; // index among parameters of column type for (int i = 0; i < paramCount; i++, param = param.getNextParameter()) { String value; if (function.isCount()) { value = "1"; } else if (param.isColumnType()) { value = getCell(colIdxOnFlatTable[colParamIdx++], row); } else { value = param.getValue(); } inputToMeasure.add(value); } return inputToMeasure.toArray(new String[inputToMeasure.size()]); } }
private void init() { KylinConfig kConfig = AbstractHadoopJob.loadKylinConfigFromHdfs(conf, metaUrl); try (KylinConfig.SetAndUnsetThreadLocalConfig autoUnset = KylinConfig .setAndUnsetThreadLocalConfig(kConfig)) { CubeInstance cubeInstance = CubeManager.getInstance(kConfig).getCube(cubeName); CubeDesc cubeDesc = cubeInstance.getDescriptor(); CubeSegment cubeSegment = cubeInstance.getSegmentById(segmentId); CubeJoinedFlatTableEnrich intermediateTableDesc = new CubeJoinedFlatTableEnrich( EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc); keyValueBuilder = new KeyValueBuilder(intermediateTableDesc); reducerMapping = new FactDistinctColumnsReducerMapping(cubeInstance); tmpbuf = ByteBuffer.allocate(4096); int[] rokeyColumnIndexes = intermediateTableDesc.getRowKeyColumnIndexes(); Long[] cuboidIds = getCuboidIds(cubeSegment); Integer[][] cuboidsBitSet = CuboidUtil.getCuboidBitSet(cuboidIds, rokeyColumnIndexes.length); boolean isNewAlgorithm = isUsePutRowKeyToHllNewAlgorithm(cubeDesc); HLLCounter[] cuboidsHLL = getInitCuboidsHLL(cuboidIds.length, cubeDesc.getConfig().getCubeStatsHLLPrecision()); cuboidStatCalculator = new CuboidStatCalculator(rokeyColumnIndexes, cuboidIds, cuboidsBitSet, isNewAlgorithm, cuboidsHLL); allCols = reducerMapping.getAllDimDictCols(); initDictColDeduper(cubeDesc); initColumnIndex(intermediateTableDesc); initialized = true; } }
@Override protected void doSetup(Context context) throws IOException { Configuration conf = context.getConfiguration(); bindCurrentConfiguration(conf); KylinConfig config = AbstractHadoopJob.loadKylinPropsAndMetadata(); cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); cube = CubeManager.getInstance(config).getCube(cubeName); cubeSeg = cube.getSegmentById(conf.get(BatchConstants.CFG_CUBE_SEGMENT_ID)); cubeDesc = cube.getDescriptor(); baseCuboidId = Cuboid.getBaseCuboidId(cubeDesc); reducerMapping = new FactDistinctColumnsReducerMapping(cube); allCols = reducerMapping.getAllDimDictCols(); flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat(); intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSeg), cubeDesc); columnIndex = new int[allCols.size()]; for (int i = 0; i < allCols.size(); i++) { TblColRef colRef = allCols.get(i); int columnIndexOnFlatTbl = intermediateTableDesc.getColumnIndex(colRef); columnIndex[i] = columnIndexOnFlatTbl; } }
String segmentID = context.getConfiguration().get(BatchConstants.CFG_CUBE_SEGMENT_ID); cubeSegment = cube.getSegmentById(segmentID); flatDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSegment), cubeDesc);
public String[] buildKey(String[] row) { int keySize = flatDesc.getRowKeyColumnIndexes().length; String[] key = new String[keySize]; for (int i = 0; i < keySize; i++) { key[i] = getCell(flatDesc.getRowKeyColumnIndexes()[i], row); } return key; }
public KeyValueBuilder(CubeJoinedFlatTableEnrich intermediateTableDesc) { flatDesc = intermediateTableDesc; cubeDesc = flatDesc.getCubeDesc(); initNullStrings(); }
public static Map<Long, HLLCounter> sampling(CubeDesc cubeDesc, IJoinedFlatTableDesc flatDescIn, Iterable<List<String>> streams) { final CubeJoinedFlatTableEnrich flatDesc = new CubeJoinedFlatTableEnrich(flatDescIn, cubeDesc); final int rowkeyLength = cubeDesc.getRowkey().getRowKeyColumns().length; final Set<Long> allCuboidIds = cubeDesc.getInitialCuboidScheduler().getAllCuboidIds(); final String cell = row.get(flatDesc.getRowKeyColumnIndexes()[i]); if (cell != null) { row_hashcodes[i] = hc.putString(cell).hash().asBytes();
@Override protected void doSetup(Context context) throws IOException { Configuration conf = context.getConfiguration(); bindCurrentConfiguration(conf); config = AbstractHadoopJob.loadKylinPropsAndMetadata(); cubeName = conf.get(BatchConstants.CFG_CUBE_NAME); cube = CubeManager.getInstance(config).getCube(cubeName); cubeDesc = cube.getDescriptor(); cubeSeg = cube.getSegmentById(conf.get(BatchConstants.CFG_CUBE_SEGMENT_ID)); flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat(); intermediateTableDesc = new CubeJoinedFlatTableEnrich(EngineFactory.getJoinedFlatTableDesc(cubeSeg), cubeDesc); globalColumns = cubeDesc.getAllGlobalDictColumns(); globalColumnIndex = new int[globalColumns.size()]; globalColumnValues = Lists.newArrayListWithExpectedSize(globalColumns.size()); for (int i = 0; i < globalColumns.size(); i++) { TblColRef colRef = globalColumns.get(i); int columnIndexOnFlatTbl = intermediateTableDesc.getColumnIndex(colRef); globalColumnIndex[i] = columnIndexOnFlatTbl; globalColumnValues.add(Sets.<String> newHashSet()); } splitKey = DictionaryGetterUtil.getInputSplitSignature(cubeSeg, context.getInputSplit()); }