private <T> CuboidResult createBaseCuboid(RecordConsumeBlockingQueueController<T> input) throws IOException { long startTime = System.currentTimeMillis(); logger.info("Calculating base cuboid {}", baseCuboidId); GridTable baseCuboid = newGridTableByCuboidID(baseCuboidId); GTBuilder baseBuilder = baseCuboid.rebuild(); IGTScanner baseInput = new InputConverter(baseCuboid.getInfo(), input); Pair<ImmutableBitSet, ImmutableBitSet> dimensionMetricsBitSet = InMemCubeBuilderUtils.getDimensionAndMetricColumnBitSet(baseCuboidId, measureCount); GTScanRequest req = new GTScanRequestBuilder().setInfo(baseCuboid.getInfo()).setRanges(null).setDimensions(null).setAggrGroupBy(dimensionMetricsBitSet.getFirst()).setAggrMetrics(dimensionMetricsBitSet.getSecond()).setAggrMetricsFuncs(metricsAggrFuncs).setFilterPushDown(null).createGTScanRequest(); GTAggregateScanner aggregationScanner = new GTAggregateScanner(baseInput, req); aggregationScanner.trackMemoryLevel(baseCuboidMemTracker); int count = 0; try { for (GTRecord r : aggregationScanner) { if (count == 0) { baseCuboidMemTracker.markHigh(); } baseBuilder.write(r); count++; } } finally { aggregationScanner.close(); baseBuilder.close(); } long timeSpent = System.currentTimeMillis() - startTime; logger.info("Cuboid {} has {} rows, build takes {}ms", baseCuboidId, count, timeSpent); int mbEstimateBaseAggrCache = (int) (aggregationScanner.getEstimateSizeOfAggrCache() / MemoryBudgetController.ONE_MB); logger.info("Wild estimate of base aggr cache is {} MB", mbEstimateBaseAggrCache); return updateCuboidResult(baseCuboidId, baseCuboid, count, timeSpent, 0, input.inputConverterUnit.ifChange()); }
public long estimatedMemSize() { if (aggBufMap.isEmpty()) return 0; byte[] sampleKey = aggBufMap.firstKey(); MeasureAggregator<?>[] sampleValue = aggBufMap.get(sampleKey); return estimateSizeOfAggrCache(sampleKey, sampleValue, aggBufMap.size()); }
public static long estimateSizeOfAggrCache(byte[] keySample, MeasureAggregator<?>[] aggrSample, int size) { // Aggregation cache is basically a tree map. The tree map entry overhead is // - 40 according to http://java-performance.info/memory-consumption-of-java-data-types-2/ // - 41~52 according to AggregationCacheMemSizeTest return (estimateSizeOf(keySample) + estimateSizeOf(aggrSample) + 64) * size; }
@Test public void testAggregationCacheSpill() throws IOException { IGTScanner inputScanner = new IGTScanner() { @Override public GTInfo getInfo() { return INFO; } @Override public void close() throws IOException { } @Override public Iterator<GTRecord> iterator() { return TEST_DATA.iterator(); } }; GTScanRequest scanRequest = new GTScanRequestBuilder().setInfo(INFO).setRanges(null).setDimensions(new ImmutableBitSet(0, 3)).setAggrGroupBy(new ImmutableBitSet(0, 3)).setAggrMetrics(new ImmutableBitSet(3, 6)).setAggrMetricsFuncs(new String[] { "SUM", "SUM", "COUNT_DISTINCT" }).setFilterPushDown(null).setAggCacheMemThreshold(0.5).createGTScanRequest(); GTAggregateScanner scanner = new GTAggregateScanner(inputScanner, scanRequest); int count = 0; for (GTRecord record : scanner) { assertNotNull(record); Object[] returnRecord = record.getValues(); assertEquals(20, ((Long) returnRecord[3]).longValue()); assertEquals(21, ((BigDecimal) returnRecord[4]).longValue()); count++; //System.out.println(record); } assertEquals(DATA_CARDINALITY, count); scanner.close(); }
.setAggrGroupBy(dimensionMetricsBitSet.getFirst()).setAggrMetrics(dimensionMetricsBitSet.getSecond()) .setAggrMetricsFuncs(metricsAggrFuncs).setFilterPushDown(null).createGTScanRequest(); try (GTAggregateScanner aggregationScanner = new GTAggregateScanner(baseInput, req)) { aggregationScanner.trackMemoryLevel(baseCuboidMemTracker); logger.info("Cuboid {} has {} rows, build takes {}ms", baseCuboidId, count, sw.elapsedMillis()); int mbEstimateBaseAggrCache = (int) (aggregationScanner.getEstimateSizeOfAggrCache() / MemoryBudgetController.ONE_MB); logger.info("Wild estimate of base aggr cache is {} MB", mbEstimateBaseAggrCache);
private CuboidResult scanAndAggregateGridTable(GridTable gridTable, long parentId, long cuboidId, ImmutableBitSet aggregationColumns, ImmutableBitSet measureColumns) throws IOException { long startTime = System.currentTimeMillis(); logger.info("Calculating cuboid {}", cuboidId); GTAggregateScanner scanner = prepareGTAggregationScanner(gridTable, parentId, cuboidId, aggregationColumns, measureColumns); GridTable newGridTable = newGridTableByCuboidID(cuboidId); GTBuilder builder = newGridTable.rebuild(); ImmutableBitSet allNeededColumns = aggregationColumns.or(measureColumns); GTRecord newRecord = new GTRecord(newGridTable.getInfo()); int count = 0; try { for (GTRecord record : scanner) { count++; for (int i = 0; i < allNeededColumns.trueBitCount(); i++) { int c = allNeededColumns.trueBitAt(i); newRecord.set(i, record.get(c)); } builder.write(newRecord); } } finally { scanner.close(); builder.close(); } long timeSpent = System.currentTimeMillis() - startTime; logger.info("Cuboid {} has {} rows, build takes {}ms", cuboidId, count, timeSpent); return updateCuboidResult(cuboidId, newGridTable, count, timeSpent, 0); }
logger.info("pre aggregating results before returning"); this.doingStorageAggregation = true; result = new GTAggregateScanner(result, this, spillEnabled); } else { logger.info("has no aggregation, skip it");
private GTAggregateScanner prepareGTAggregationScanner(GridTable gridTable, long parentId, long cuboidId, ImmutableBitSet aggregationColumns, ImmutableBitSet measureColumns) throws IOException { GTInfo info = gridTable.getInfo(); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null).setAggrGroupBy(aggregationColumns).setAggrMetrics(measureColumns).setAggrMetricsFuncs(metricsAggrFuncs).setFilterPushDown(null).createGTScanRequest(); GTAggregateScanner scanner = (GTAggregateScanner) gridTable.scan(req); // for child cuboid, some measures don't need aggregation. if (parentId != cuboidId) { boolean[] aggrMask = new boolean[measureDescs.length]; for (int i = 0; i < measureDescs.length; i++) { aggrMask[i] = !measureDescs[i].getFunction().getMeasureType().onlyAggrInBaseCuboid(); if (!aggrMask[i]) { logger.info("{} doesn't need aggregation.", measureDescs[i]); } } scanner.setAggrMask(aggrMask); } return scanner; }
if (getNumOfSpills() == 0 && storageLimitLevel == StorageLimitLevel.LIMIT_ON_SCAN && aggBufMap.size() >= storagePushDownLimit) { return false;
spillBuffMap(getEstimateSizeOfAggrCache()); // TODO allow merge in-mem map with spilled dumps
? ((GTAggregateScanner) finalScanner).getInputRowCount() : finalRowCount;
GTAggregateScanner scanner = new GTAggregateScanner(inputScanner, scanRequest); scanner.close();
.setAggrGroupBy(dimensionMetricsBitSet.getFirst()).setAggrMetrics(dimensionMetricsBitSet.getSecond()) .setAggrMetricsFuncs(metricsAggrFuncs).setFilterPushDown(null).createGTScanRequest(); try (GTAggregateScanner aggregationScanner = new GTAggregateScanner(baseInput, req)) { aggregationScanner.trackMemoryLevel(baseCuboidMemTracker); logger.info("Cuboid {} has {} rows, build takes {}ms", baseCuboidId, count, sw.elapsedMillis()); int mbEstimateBaseAggrCache = (int) (aggregationScanner.getEstimateSizeOfAggrCache() / MemoryBudgetController.ONE_MB); logger.info("Wild estimate of base aggr cache is {} MB", mbEstimateBaseAggrCache);
scanner.close(); builder.close();
logger.info("pre aggregating results before returning"); this.doingStorageAggregation = true; result = new GTAggregateScanner(result, this, spillEnabled); } else { logger.info("has no aggregation, skip it");
private GTAggregateScanner prepareGTAggregationScanner(GridTable gridTable, long parentId, long cuboidId, ImmutableBitSet aggregationColumns, ImmutableBitSet measureColumns) throws IOException { GTInfo info = gridTable.getInfo(); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null) .setAggrGroupBy(aggregationColumns).setAggrMetrics(measureColumns).setAggrMetricsFuncs(metricsAggrFuncs) .setFilterPushDown(null).createGTScanRequest(); GTAggregateScanner scanner = (GTAggregateScanner) gridTable.scan(req); // for child cuboid, some measures don't need aggregation. if (parentId != cuboidId) { boolean[] aggrMask = new boolean[measureDescs.length]; for (int i = 0; i < measureDescs.length; i++) { aggrMask[i] = !measureDescs[i].getFunction().getMeasureType().onlyAggrInBaseCuboid(); if (!aggrMask[i]) { logger.info("{} doesn't need aggregation.", measureDescs[i]); } } scanner.setAggrMask(aggrMask); } return scanner; }
if (getNumOfSpills() == 0 && storageLimitLevel == StorageLimitLevel.LIMIT_ON_SCAN && aggBufMap.size() >= storagePushDownLimit) { return false;
spillBuffMap(getEstimateSizeOfAggrCache()); // TODO allow merge in-mem map with spilled dumps
? ((GTAggregateScanner) finalScanner).getInputRowCount() : finalRowCount;
private <T> CuboidResult createBaseCuboid(RecordConsumeBlockingQueueController<T> input) throws IOException { long startTime = System.currentTimeMillis(); logger.info("Calculating base cuboid {}", baseCuboidId); GridTable baseCuboid = newGridTableByCuboidID(baseCuboidId); GTBuilder baseBuilder = baseCuboid.rebuild(); IGTScanner baseInput = new InputConverter(baseCuboid.getInfo(), input); Pair<ImmutableBitSet, ImmutableBitSet> dimensionMetricsBitSet = InMemCubeBuilderUtils.getDimensionAndMetricColumnBitSet(baseCuboidId, measureCount); GTScanRequest req = new GTScanRequestBuilder().setInfo(baseCuboid.getInfo()).setRanges(null).setDimensions(null).setAggrGroupBy(dimensionMetricsBitSet.getFirst()).setAggrMetrics(dimensionMetricsBitSet.getSecond()).setAggrMetricsFuncs(metricsAggrFuncs).setFilterPushDown(null).createGTScanRequest(); GTAggregateScanner aggregationScanner = new GTAggregateScanner(baseInput, req); aggregationScanner.trackMemoryLevel(baseCuboidMemTracker); int count = 0; try { for (GTRecord r : aggregationScanner) { if (count == 0) { baseCuboidMemTracker.markHigh(); } baseBuilder.write(r); count++; } } finally { aggregationScanner.close(); baseBuilder.close(); } long timeSpent = System.currentTimeMillis() - startTime; logger.info("Cuboid {} has {} rows, build takes {}ms", baseCuboidId, count, timeSpent); int mbEstimateBaseAggrCache = (int) (aggregationScanner.getEstimateSizeOfAggrCache() / MemoryBudgetController.ONE_MB); logger.info("Wild estimate of base aggr cache is {} MB", mbEstimateBaseAggrCache); return updateCuboidResult(baseCuboidId, baseCuboid, count, timeSpent, 0, input.inputConverterUnit.ifChange()); }