@SuppressWarnings("unused") private void testAggregate(ImmutableBitSet groupBy) throws IOException { long t = System.currentTimeMillis(); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(dimensions).setAggrGroupBy(groupBy).setAggrMetrics(metrics).setAggrMetricsFuncs(aggrFuncs).setFilterPushDown(null).createGTScanRequest(); IGTScanner scanner = req.decorateScanner(gen.generate(N)); long count = 0; for (GTRecord rec : scanner) { count++; } t = System.currentTimeMillis() - t; System.out.println(N + " records aggregated to " + count + ", " + calcSpeed(t) + "K rec/sec"); }
@SuppressWarnings("unused") private void testAggregate(ImmutableBitSet groupBy) throws IOException { long t = System.currentTimeMillis(); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(dimensions).setAggrGroupBy(groupBy).setAggrMetrics(metrics).setAggrMetricsFuncs(aggrFuncs).setFilterPushDown(null).createGTScanRequest(); IGTScanner scanner = req.decorateScanner(gen.generate(N)); long count = 0; for (GTRecord rec : scanner) { count++; } t = System.currentTimeMillis() - t; System.out.println(N + " records aggregated to " + count + ", " + calcSpeed(t) + "K rec/sec"); }
private GTAggregateScanner prepareGTAggregationScanner(GridTable gridTable, long parentId, long cuboidId, ImmutableBitSet aggregationColumns, ImmutableBitSet measureColumns) throws IOException { GTInfo info = gridTable.getInfo(); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null).setAggrGroupBy(aggregationColumns).setAggrMetrics(measureColumns).setAggrMetricsFuncs(metricsAggrFuncs).setFilterPushDown(null).createGTScanRequest(); GTAggregateScanner scanner = (GTAggregateScanner) gridTable.scan(req); // for child cuboid, some measures don't need aggregation. if (parentId != cuboidId) { boolean[] aggrMask = new boolean[measureDescs.length]; for (int i = 0; i < measureDescs.length; i++) { aggrMask[i] = !measureDescs[i].getFunction().getMeasureType().onlyAggrInBaseCuboid(); if (!aggrMask[i]) { logger.info("{} doesn't need aggregation.", measureDescs[i]); } } scanner.setAggrMask(aggrMask); } return scanner; }
private <T> CuboidResult createBaseCuboid(RecordConsumeBlockingQueueController<T> input) throws IOException { long startTime = System.currentTimeMillis(); logger.info("Calculating base cuboid {}", baseCuboidId); GridTable baseCuboid = newGridTableByCuboidID(baseCuboidId); GTBuilder baseBuilder = baseCuboid.rebuild(); IGTScanner baseInput = new InputConverter(baseCuboid.getInfo(), input); Pair<ImmutableBitSet, ImmutableBitSet> dimensionMetricsBitSet = InMemCubeBuilderUtils.getDimensionAndMetricColumnBitSet(baseCuboidId, measureCount); GTScanRequest req = new GTScanRequestBuilder().setInfo(baseCuboid.getInfo()).setRanges(null).setDimensions(null).setAggrGroupBy(dimensionMetricsBitSet.getFirst()).setAggrMetrics(dimensionMetricsBitSet.getSecond()).setAggrMetricsFuncs(metricsAggrFuncs).setFilterPushDown(null).createGTScanRequest(); GTAggregateScanner aggregationScanner = new GTAggregateScanner(baseInput, req); aggregationScanner.trackMemoryLevel(baseCuboidMemTracker); int count = 0; try { for (GTRecord r : aggregationScanner) { if (count == 0) { baseCuboidMemTracker.markHigh(); } baseBuilder.write(r); count++; } } finally { aggregationScanner.close(); baseBuilder.close(); } long timeSpent = System.currentTimeMillis() - startTime; logger.info("Cuboid {} has {} rows, build takes {}ms", baseCuboidId, count, timeSpent); int mbEstimateBaseAggrCache = (int) (aggregationScanner.getEstimateSizeOfAggrCache() / MemoryBudgetController.ONE_MB); logger.info("Wild estimate of base aggr cache is {} MB", mbEstimateBaseAggrCache); return updateCuboidResult(baseCuboidId, baseCuboid, count, timeSpent, 0, input.inputConverterUnit.ifChange()); }
private GTAggregateScanner prepareGTAggregationScanner(GridTable gridTable, long parentId, long cuboidId, ImmutableBitSet aggregationColumns, ImmutableBitSet measureColumns) throws IOException { GTInfo info = gridTable.getInfo(); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null) .setAggrGroupBy(aggregationColumns).setAggrMetrics(measureColumns).setAggrMetricsFuncs(metricsAggrFuncs) .setFilterPushDown(null).createGTScanRequest(); GTAggregateScanner scanner = (GTAggregateScanner) gridTable.scan(req); // for child cuboid, some measures don't need aggregation. if (parentId != cuboidId) { boolean[] aggrMask = new boolean[measureDescs.length]; for (int i = 0; i < measureDescs.length; i++) { aggrMask[i] = !measureDescs[i].getFunction().getMeasureType().onlyAggrInBaseCuboid(); if (!aggrMask[i]) { logger.info("{} doesn't need aggregation.", measureDescs[i]); } } scanner.setAggrMask(aggrMask); } return scanner; }
.getDimensionAndMetricColumnBitSet(baseCuboidId, measureCount); GTScanRequest req = new GTScanRequestBuilder().setInfo(baseCuboid.getInfo()).setRanges(null).setDimensions(null) .setAggrGroupBy(dimensionMetricsBitSet.getFirst()).setAggrMetrics(dimensionMetricsBitSet.getSecond()) .setAggrMetricsFuncs(metricsAggrFuncs).setFilterPushDown(null).createGTScanRequest(); try (GTAggregateScanner aggregationScanner = new GTAggregateScanner(baseInput, req)) {
private IGTScanner scanAndAggregate(GridTable table) throws IOException { GTScanRequest req = new GTScanRequestBuilder().setInfo(table.getInfo()).setRanges(null).setDimensions(null).setAggrGroupBy(setOf(0, 2)).setAggrMetrics(setOf(3, 4)).setAggrMetricsFuncs(new String[] { "count", "sum" }).setFilterPushDown(null).createGTScanRequest(); IGTScanner scanner = table.scan(req); int i = 0;
public GTScanRequest planScanRequest() { GTScanRequest scanRequest; List<GTScanRange> scanRanges = this.planScanRanges(); if (scanRanges != null && !scanRanges.isEmpty()) { scanRequest = new GTScanRequestBuilder().setInfo(gtInfo).setRanges(scanRanges).setDimensions(gtDimensions) .setAggrGroupBy(gtAggrGroups).setAggrMetrics(gtAggrMetrics).setAggrMetricsFuncs(gtAggrFuncs) .setFilterPushDown(gtFilter)// .setRtAggrMetrics(gtRtAggrMetrics).setDynamicColumns(gtDynColumns) .setExprsPushDown(tupleExpressionMap)// .setAllowStorageAggregation(context.isNeedStorageAggregation()) .setAggCacheMemThreshold(cubeSegment.getConfig().getQueryCoprocessorMemGB())// .setStoragePushDownLimit(context.getFinalPushDownLimit()) .setStorageLimitLevel(context.getStorageLimitLevel()).setHavingFilterPushDown(havingFilter) .createGTScanRequest(); } else { scanRequest = null; } return scanRequest; }
.setAggrGroupBy(sAggGroupBy).setAggrMetrics(sAggrMetrics).setAggrMetricsFuncs(sAggrMetricFuncs) .setRtAggrMetrics(aRuntimeAggrMetrics).setDynamicColumns(aDynCols) .setExprsPushDown(sTupleExpressionMap)
public static CubeVisitProtos.CubeVisitRequest mockScanRequestWithRuntimeDimensions(GTInfo gtInfo, List<RawScan> rawScans) throws IOException { ImmutableBitSet dimensions = setOf(); ImmutableBitSet aggrGroupBy = setOf(3); ImmutableBitSet aggrMetrics = setOf(2); String[] aggrMetricsFuncs = { "SUM" }; ImmutableBitSet dynColumns = setOf(3); TupleFilter whenFilter = getCompareTupleFilter(1, "Ken"); TupleExpression thenExpr = new NumberTupleExpression(1); List<Pair<TupleFilter, TupleExpression>> whenList = Lists.newArrayList(); whenList.add(new Pair<>(whenFilter, thenExpr)); TupleExpression elseExpr = new NumberTupleExpression(2); /** * case * when user = 'Ken' then 1 * else 2 * end */ TupleExpression caseExpression = new CaseTupleExpression(whenList, elseExpr); Map<Integer, TupleExpression> tupleExpressionMap = Maps.newHashMap(); tupleExpressionMap.put(3, caseExpression); GTScanRequest scanRequest = new GTScanRequestBuilder().setInfo(gtInfo).setRanges(null)// .setDimensions(dimensions).setAggrGroupBy(aggrGroupBy)// .setAggrMetrics(aggrMetrics).setAggrMetricsFuncs(aggrMetricsFuncs)// .setDynamicColumns(dynColumns).setExprsPushDown(tupleExpressionMap)// .setStartTime(System.currentTimeMillis()).createGTScanRequest(); final List<CubeVisitProtos.CubeVisitRequest.IntList> intListList = mockIntList(setOf(2)); return mockScanRequest(rawScans, scanRequest, intListList); }
.setDimensions(dimensions).setAggrGroupBy(aggrGroupBy)//
@Test public void testAggregationCacheSpill() throws IOException { IGTScanner inputScanner = new IGTScanner() { @Override public GTInfo getInfo() { return INFO; } @Override public void close() throws IOException { } @Override public Iterator<GTRecord> iterator() { return TEST_DATA.iterator(); } }; GTScanRequest scanRequest = new GTScanRequestBuilder().setInfo(INFO).setRanges(null).setDimensions(new ImmutableBitSet(0, 3)).setAggrGroupBy(new ImmutableBitSet(0, 3)).setAggrMetrics(new ImmutableBitSet(3, 6)).setAggrMetricsFuncs(new String[] { "SUM", "SUM", "COUNT_DISTINCT" }).setFilterPushDown(null).setAggCacheMemThreshold(0.5).createGTScanRequest(); GTAggregateScanner scanner = new GTAggregateScanner(inputScanner, scanRequest); int count = 0; for (GTRecord record : scanner) { assertNotNull(record); Object[] returnRecord = record.getValues(); assertEquals(20, ((Long) returnRecord[3]).longValue()); assertEquals(21, ((BigDecimal) returnRecord[4]).longValue()); count++; //System.out.println(record); } assertEquals(DATA_CARDINALITY, count); scanner.close(); }
GTScanRequest scanRequest = new GTScanRequestBuilder().setInfo(INFO).setRanges(null).setDimensions(new ImmutableBitSet(0, 3)).setAggrGroupBy(new ImmutableBitSet(1, 3)).setAggrMetrics(new ImmutableBitSet(3, 6)).setAggrMetricsFuncs(new String[] { "SUM", "SUM", "COUNT_DISTINCT" }).setFilterPushDown(null).setAggCacheMemThreshold(0.5).createGTScanRequest();
@Test public void verifyAggregateAndHavingFilter() throws IOException { GTInfo info = table.getInfo(); TblColRef havingCol = TblColRef.newInnerColumn("SUM_OF_BIGDECIMAL", InnerDataTypeEnum.LITERAL); havingCol.getColumnDesc().setId("1"); // point to the first aggregated measure CompareTupleFilter havingFilter = compare(havingCol, FilterOperatorEnum.GT, "20"); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null) .setAggrGroupBy(setOf(1)).setAggrMetrics(setOf(4)).setAggrMetricsFuncs(new String[] { "sum" }) .setHavingFilterPushDown(havingFilter).createGTScanRequest(); doScanAndVerify(table, useDeserializedGTScanRequest(req), "[null, 20, null, null, 42.0]", "[null, 30, null, null, 52.5]"); }
@Test public void verifyScanWithEvaluatableFilter() throws IOException { GTInfo info = table.getInfo(); CompareTupleFilter fComp1 = compare(info.colRef(0), FilterOperatorEnum.GT, enc(info, 0, "2015-01-14")); CompareTupleFilter fComp2 = compare(info.colRef(1), FilterOperatorEnum.GT, enc(info, 1, "10")); LogicalTupleFilter filter = and(fComp1, fComp2); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null) .setAggrGroupBy(setOf(0)).setAggrMetrics(setOf(3)).setAggrMetricsFuncs(new String[] { "sum" }) .setFilterPushDown(filter).createGTScanRequest(); // note the evaluatable column 1 in filter is added to returned columns but not in group by assertEquals( "GTScanRequest [range=[[null, null]-[null, null]], columns={0, 1, 3}, filterPushDown=AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 GT [\\x00]], aggrGroupBy={0}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString()); doScanAndVerify(table, useDeserializedGTScanRequest(req), "[1421280000000, 20, null, 30, null]", "[1421366400000, 20, null, 40, null]"); }
@Test public void verifyScanWithUnevaluatableFilter() throws IOException { GTInfo info = table.getInfo(); CompareTupleFilter fComp = compare(info.colRef(0), FilterOperatorEnum.GT, enc(info, 0, "2015-01-14")); ExtractTupleFilter fUnevaluatable = unevaluatable(info.colRef(1)); LogicalTupleFilter fNotPlusUnevaluatable = not(unevaluatable(info.colRef(1))); LogicalTupleFilter filter = and(fComp, fUnevaluatable, fNotPlusUnevaluatable); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null) .setAggrGroupBy(setOf(0)).setAggrMetrics(setOf(3)).setAggrMetricsFuncs(new String[] { "sum" }) .setFilterPushDown(filter).createGTScanRequest(); // note the unEvaluatable column 1 in filter is added to group by assertEquals( "GTScanRequest [range=[[null, null]-[null, null]], columns={0, 1, 3}, filterPushDown=AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], [null], [null]], aggrGroupBy={0, 1}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString()); doScanAndVerify(table, useDeserializedGTScanRequest(req), "[1421280000000, 20, null, 20, null]", "[1421280000000, 30, null, 10, null]", "[1421366400000, 20, null, 20, null]", "[1421366400000, 30, null, 20, null]", "[1421452800000, 10, null, 10, null]"); }
@SuppressWarnings("unused") private void testAggregate(ImmutableBitSet groupBy) throws IOException { long t = System.currentTimeMillis(); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(dimensions).setAggrGroupBy(groupBy).setAggrMetrics(metrics).setAggrMetricsFuncs(aggrFuncs).setFilterPushDown(null).createGTScanRequest(); IGTScanner scanner = req.decorateScanner(gen.generate(N)); long count = 0; for (GTRecord rec : scanner) { count++; } t = System.currentTimeMillis() - t; System.out.println(N + " records aggregated to " + count + ", " + calcSpeed(t) + "K rec/sec"); }
@SuppressWarnings("unused") private void testAggregate(ImmutableBitSet groupBy) throws IOException { long t = System.currentTimeMillis(); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(dimensions).setAggrGroupBy(groupBy).setAggrMetrics(metrics).setAggrMetricsFuncs(aggrFuncs).setFilterPushDown(null).createGTScanRequest(); IGTScanner scanner = req.decorateScanner(gen.generate(N)); long count = 0; for (GTRecord rec : scanner) { count++; } t = System.currentTimeMillis() - t; System.out.println(N + " records aggregated to " + count + ", " + calcSpeed(t) + "K rec/sec"); }
private GTAggregateScanner prepareGTAggregationScanner(GridTable gridTable, long parentId, long cuboidId, ImmutableBitSet aggregationColumns, ImmutableBitSet measureColumns) throws IOException { GTInfo info = gridTable.getInfo(); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null).setAggrGroupBy(aggregationColumns).setAggrMetrics(measureColumns).setAggrMetricsFuncs(metricsAggrFuncs).setFilterPushDown(null).createGTScanRequest(); GTAggregateScanner scanner = (GTAggregateScanner) gridTable.scan(req); // for child cuboid, some measures don't need aggregation. if (parentId != cuboidId) { boolean[] aggrMask = new boolean[measureDescs.length]; for (int i = 0; i < measureDescs.length; i++) { aggrMask[i] = !measureDescs[i].getFunction().getMeasureType().onlyAggrInBaseCuboid(); if (!aggrMask[i]) { logger.info("{} doesn't need aggregation.", measureDescs[i]); } } scanner.setAggrMask(aggrMask); } return scanner; }
private GTAggregateScanner prepareGTAggregationScanner(GridTable gridTable, long parentId, long cuboidId, ImmutableBitSet aggregationColumns, ImmutableBitSet measureColumns) throws IOException { GTInfo info = gridTable.getInfo(); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null) .setAggrGroupBy(aggregationColumns).setAggrMetrics(measureColumns).setAggrMetricsFuncs(metricsAggrFuncs) .setFilterPushDown(null).createGTScanRequest(); GTAggregateScanner scanner = (GTAggregateScanner) gridTable.scan(req); // for child cuboid, some measures don't need aggregation. if (parentId != cuboidId) { boolean[] aggrMask = new boolean[measureDescs.length]; for (int i = 0; i < measureDescs.length; i++) { aggrMask[i] = !measureDescs[i].getFunction().getMeasureType().onlyAggrInBaseCuboid(); if (!aggrMask[i]) { logger.info("{} doesn't need aggregation.", measureDescs[i]); } } scanner.setAggrMask(aggrMask); } return scanner; }