protected void outputCuboid(long cuboidId, GridTable gridTable, ICuboidWriter output) throws IOException { long startTime = System.currentTimeMillis(); GTScanRequest req = new GTScanRequestBuilder().setInfo(gridTable.getInfo()).setRanges(null).setDimensions(null).setFilterPushDown(null).createGTScanRequest(); IGTScanner scanner = gridTable.scan(req); for (GTRecord record : scanner) { output.write(cuboidId, record); } scanner.close(); logger.debug("Cuboid " + cuboidId + " output takes " + (System.currentTimeMillis() - startTime) + "ms"); }
@Override public void write(long cuboidId, GridTable gridTable) throws IOException { long startTime = System.currentTimeMillis(); GTScanRequest req = new GTScanRequestBuilder().setInfo(gridTable.getInfo()).setRanges(null).setDimensions(null).setFilterPushDown(null).createGTScanRequest(); IGTScanner scanner = gridTable.scan(req); for (GTRecord record : scanner) { write(cuboidId, record); } scanner.close(); logger.info("Cuboid " + cuboidId + " output takes " + (System.currentTimeMillis() - startTime) + "ms"); } }
public boolean fetchNext() throws IOException { if (recordIterator == null) { if (cuboidIterator.hasNext()) { CuboidResult cuboid = cuboidIterator.next(); currentCuboidId = cuboid.cuboidId; scanner = cuboid.table.scan(new GTScanRequestBuilder().setInfo(cuboid.table.getInfo()).setRanges(null).setDimensions(null).setFilterPushDown(null).createGTScanRequest()); recordIterator = scanner.iterator(); } else { return false; } } if (recordIterator.hasNext()) { currentRecord = recordIterator.next(); return true; } else { scanner.close(); recordIterator = null; return fetchNext(); } }
public boolean fetchNext() throws IOException { if (recordIterator == null) { currentCuboidId = splitResult.cuboidId; scanner = splitResult.table.scan(new GTScanRequestBuilder().setInfo(splitResult.table.getInfo()) .setRanges(null).setDimensions(null).setFilterPushDown(null).createGTScanRequest()); recordIterator = scanner.iterator(); } if (recordIterator.hasNext()) { currentRecord = recordIterator.next(); return true; } else { scanner.close(); recordIterator = null; return false; } }
private CuboidResult scanAndAggregateGridTable(GridTable gridTable, long parentId, long cuboidId, ImmutableBitSet aggregationColumns, ImmutableBitSet measureColumns) throws IOException { long startTime = System.currentTimeMillis(); logger.info("Calculating cuboid {}", cuboidId); GTAggregateScanner scanner = prepareGTAggregationScanner(gridTable, parentId, cuboidId, aggregationColumns, measureColumns); GridTable newGridTable = newGridTableByCuboidID(cuboidId); GTBuilder builder = newGridTable.rebuild(); ImmutableBitSet allNeededColumns = aggregationColumns.or(measureColumns); GTRecord newRecord = new GTRecord(newGridTable.getInfo()); int count = 0; try { for (GTRecord record : scanner) { count++; for (int i = 0; i < allNeededColumns.trueBitCount(); i++) { int c = allNeededColumns.trueBitAt(i); newRecord.set(i, record.get(c)); } builder.write(newRecord); } } finally { scanner.close(); builder.close(); } long timeSpent = System.currentTimeMillis() - startTime; logger.info("Cuboid {} has {} rows, build takes {}ms", cuboidId, count, timeSpent); return updateCuboidResult(cuboidId, newGridTable, count, timeSpent, 0); }
static GTBuilder rebuild(GridTable table) throws IOException { GTBuilder builder = table.rebuild(); for (GTRecord rec : UnitTestSupport.mockupData(table.getInfo(), 10)) { builder.write(rec); } builder.close(); System.out.println("Written Row Count: " + builder.getWrittenRowCount()); return builder; }
public static GridTable newTestTable() throws IOException { GTInfo info = newInfo(); GTSimpleMemStore store = new GTSimpleMemStore(info); GridTable table = new GridTable(info, store); GTRecord r = new GTRecord(table.getInfo()); GTBuilder builder = table.rebuild(); builder.write(r.setValues("2015-01-14", "30", "Yang", new Long(10), new BigDecimal("10.5"))); builder.write(r.setValues("2015-01-14", "30", "Luke", new Long(10), new BigDecimal("10.5"))); builder.write(r.setValues("2015-01-15", "20", "Dong", new Long(10), new BigDecimal("10.5"))); builder.write(r.setValues("2015-01-15", "20", "Jason", new Long(10), new BigDecimal("10.5"))); builder.write(r.setValues("2015-01-15", "30", "Xu", new Long(10), new BigDecimal("10.5"))); builder.write(r.setValues("2015-01-16", "20", "Mahone", new Long(10), new BigDecimal("10.5"))); builder.write(r.setValues("2015-01-16", "20", "Qianhao", new Long(10), new BigDecimal("10.5"))); builder.write(r.setValues("2015-01-16", "30", "George", new Long(10), new BigDecimal("10.5"))); builder.write(r.setValues("2015-01-16", "30", "Shaofeng", new Long(10), new BigDecimal("10.5"))); builder.write(r.setValues("2015-01-17", "10", "Kejia", new Long(10), new BigDecimal("10.5"))); builder.close(); return table; }
@Test public void verifyConvertFilterConstants1() { GTInfo info = table.getInfo(); TableDesc extTable = TableDesc.mockup("ext"); TblColRef extColA = TblColRef.mockup(extTable, 1, "A", "timestamp"); TblColRef extColB = TblColRef.mockup(extTable, 2, "B", "integer"); CompareTupleFilter fComp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14"); CompareTupleFilter fComp2 = compare(extColB, FilterOperatorEnum.EQ, "10"); LogicalTupleFilter filter = and(fComp1, fComp2); List<TblColRef> colMapping = Lists.newArrayList(); colMapping.add(extColA); colMapping.add(extColB); TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null); assertEquals( "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 EQ [\\x00]]", newFilter.toString()); }
@Test public void verifyConvertFilterConstants4() { GTInfo info = table.getInfo(); TableDesc extTable = TableDesc.mockup("ext"); TblColRef extColA = TblColRef.mockup(extTable, 1, "A", "timestamp"); TblColRef extColB = TblColRef.mockup(extTable, 2, "B", "integer"); CompareTupleFilter fComp1 = compare(extColA, FilterOperatorEnum.GT, "2015-01-14"); CompareTupleFilter fComp2 = compare(extColB, FilterOperatorEnum.IN, "9", "10", "15"); LogicalTupleFilter filter = and(fComp1, fComp2); List<TblColRef> colMapping = Lists.newArrayList(); colMapping.add(extColA); colMapping.add(extColB); // $1 in ("9", "10", "15") has only "10" left TupleFilter newFilter = GTUtil.convertFilterColumnsAndConstants(filter, info, colMapping, null); assertEquals( "AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 IN [\\x00]]", newFilter.toString()); }
private IGTScanner scan(GridTable table) throws IOException { GTScanRequest req = new GTScanRequestBuilder().setInfo(table.getInfo()).setRanges(null).setDimensions(null).setFilterPushDown(null).createGTScanRequest(); IGTScanner scanner = table.scan(req); for (GTRecord r : scanner) { Object[] v = r.getValues(); assertTrue(((String) v[0]).startsWith("2015-")); assertTrue(((String) v[2]).equals("Food")); assertTrue(((Long) v[3]).longValue() == 10); assertTrue(((BigDecimal) v[4]).doubleValue() == 10.5); System.out.println(r); } scanner.close(); return scanner; }
private GTAggregateScanner prepareGTAggregationScanner(GridTable gridTable, long parentId, long cuboidId, ImmutableBitSet aggregationColumns, ImmutableBitSet measureColumns) throws IOException { GTInfo info = gridTable.getInfo(); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null).setAggrGroupBy(aggregationColumns).setAggrMetrics(measureColumns).setAggrMetricsFuncs(metricsAggrFuncs).setFilterPushDown(null).createGTScanRequest(); GTAggregateScanner scanner = (GTAggregateScanner) gridTable.scan(req); // for child cuboid, some measures don't need aggregation. if (parentId != cuboidId) { boolean[] aggrMask = new boolean[measureDescs.length]; for (int i = 0; i < measureDescs.length; i++) { aggrMask[i] = !measureDescs[i].getFunction().getMeasureType().onlyAggrInBaseCuboid(); if (!aggrMask[i]) { logger.info("{} doesn't need aggregation.", measureDescs[i]); } } scanner.setAggrMask(aggrMask); } return scanner; }
public void run() { try { IGTScanner scanner = table.scan(new GTScanRequestBuilder().setInfo(table.getInfo()).setRanges(null).setDimensions(null).setFilterPushDown(null).createGTScanRequest()); int i = 0; for (GTRecord r : scanner) { assertEquals(data.get(i++), r); } scanner.close(); } catch (Exception ex) { ex.printStackTrace(); } } };
private GTAggregateScanner prepareGTAggregationScanner(GridTable gridTable, long parentId, long cuboidId, ImmutableBitSet aggregationColumns, ImmutableBitSet measureColumns) throws IOException { GTInfo info = gridTable.getInfo(); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null) .setAggrGroupBy(aggregationColumns).setAggrMetrics(measureColumns).setAggrMetricsFuncs(metricsAggrFuncs) .setFilterPushDown(null).createGTScanRequest(); GTAggregateScanner scanner = (GTAggregateScanner) gridTable.scan(req); // for child cuboid, some measures don't need aggregation. if (parentId != cuboidId) { boolean[] aggrMask = new boolean[measureDescs.length]; for (int i = 0; i < measureDescs.length; i++) { aggrMask[i] = !measureDescs[i].getFunction().getMeasureType().onlyAggrInBaseCuboid(); if (!aggrMask[i]) { logger.info("{} doesn't need aggregation.", measureDescs[i]); } } scanner.setAggrMask(aggrMask); } return scanner; }
private IGTScanner scanAndAggregate(GridTable table) throws IOException { GTScanRequest req = new GTScanRequestBuilder().setInfo(table.getInfo()).setRanges(null).setDimensions(null).setAggrGroupBy(setOf(0, 2)).setAggrMetrics(setOf(3, 4)).setAggrMetricsFuncs(new String[] { "count", "sum" }).setFilterPushDown(null).createGTScanRequest(); IGTScanner scanner = table.scan(req); int i = 0;
private void verifyWriteAndRead(GridTable table) throws IOException { GTInfo info = table.getInfo(); GTBuilder builder = table.rebuild(); for (GTRecord r : data) { builder.write(r); } builder.close(); IGTScanner scanner = table.scan(new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null).setFilterPushDown(null).createGTScanRequest()); int i = 0; for (GTRecord r : scanner) { assertEquals(data.get(i++), r); } scanner.close(); } }
@Test public void verifyFirstRow() throws IOException { doScanAndVerify(table, new GTScanRequestBuilder().setInfo(table.getInfo()).setRanges(null).setDimensions(null) .setFilterPushDown(null).createGTScanRequest(), "[1421193600000, 30, Yang, 10, 10.5]", // "[1421193600000, 30, Luke, 10, 10.5]", // "[1421280000000, 20, Dong, 10, 10.5]", // "[1421280000000, 20, Jason, 10, 10.5]", // "[1421280000000, 30, Xu, 10, 10.5]", // "[1421366400000, 20, Mahone, 10, 10.5]", // "[1421366400000, 20, Qianhao, 10, 10.5]", // "[1421366400000, 30, George, 10, 10.5]", // "[1421366400000, 30, Shaofeng, 10, 10.5]", // "[1421452800000, 10, Kejia, 10, 10.5]"); }
@Test public void verifyAggregateAndHavingFilter() throws IOException { GTInfo info = table.getInfo(); TblColRef havingCol = TblColRef.newInnerColumn("SUM_OF_BIGDECIMAL", InnerDataTypeEnum.LITERAL); havingCol.getColumnDesc().setId("1"); // point to the first aggregated measure CompareTupleFilter havingFilter = compare(havingCol, FilterOperatorEnum.GT, "20"); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null) .setAggrGroupBy(setOf(1)).setAggrMetrics(setOf(4)).setAggrMetricsFuncs(new String[] { "sum" }) .setHavingFilterPushDown(havingFilter).createGTScanRequest(); doScanAndVerify(table, useDeserializedGTScanRequest(req), "[null, 20, null, null, 42.0]", "[null, 30, null, null, 52.5]"); }
@Test public void verifyScanWithEvaluatableFilter() throws IOException { GTInfo info = table.getInfo(); CompareTupleFilter fComp1 = compare(info.colRef(0), FilterOperatorEnum.GT, enc(info, 0, "2015-01-14")); CompareTupleFilter fComp2 = compare(info.colRef(1), FilterOperatorEnum.GT, enc(info, 1, "10")); LogicalTupleFilter filter = and(fComp1, fComp2); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null) .setAggrGroupBy(setOf(0)).setAggrMetrics(setOf(3)).setAggrMetricsFuncs(new String[] { "sum" }) .setFilterPushDown(filter).createGTScanRequest(); // note the evaluatable column 1 in filter is added to returned columns but not in group by assertEquals( "GTScanRequest [range=[[null, null]-[null, null]], columns={0, 1, 3}, filterPushDown=AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.1 GT [\\x00]], aggrGroupBy={0}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString()); doScanAndVerify(table, useDeserializedGTScanRequest(req), "[1421280000000, 20, null, 30, null]", "[1421366400000, 20, null, 40, null]"); }
@Test public void verifyScanWithUnevaluatableFilter() throws IOException { GTInfo info = table.getInfo(); CompareTupleFilter fComp = compare(info.colRef(0), FilterOperatorEnum.GT, enc(info, 0, "2015-01-14")); ExtractTupleFilter fUnevaluatable = unevaluatable(info.colRef(1)); LogicalTupleFilter fNotPlusUnevaluatable = not(unevaluatable(info.colRef(1))); LogicalTupleFilter filter = and(fComp, fUnevaluatable, fNotPlusUnevaluatable); GTScanRequest req = new GTScanRequestBuilder().setInfo(info).setRanges(null).setDimensions(null) .setAggrGroupBy(setOf(0)).setAggrMetrics(setOf(3)).setAggrMetricsFuncs(new String[] { "sum" }) .setFilterPushDown(filter).createGTScanRequest(); // note the unEvaluatable column 1 in filter is added to group by assertEquals( "GTScanRequest [range=[[null, null]-[null, null]], columns={0, 1, 3}, filterPushDown=AND [UNKNOWN_MODEL:NULL.GT_MOCKUP_TABLE.0 GT [\\x00\\x00\\x01J\\xE5\\xBD\\x5C\\x00], [null], [null]], aggrGroupBy={0, 1}, aggrMetrics={3}, aggrMetricsFuncs=[sum]]", req.toString()); doScanAndVerify(table, useDeserializedGTScanRequest(req), "[1421280000000, 20, null, 20, null]", "[1421280000000, 30, null, 10, null]", "[1421366400000, 20, null, 20, null]", "[1421366400000, 30, null, 20, null]", "[1421452800000, 10, null, 10, null]"); }
@Before public void setup() throws IOException { this.createTestMetadata(); table = newTestTable(); info = table.getInfo(); timeComp0 = compare(info.colRef(0), FilterOperatorEnum.LT, enc(info, 0, "2015-01-14")); timeComp1 = compare(info.colRef(0), FilterOperatorEnum.GT, enc(info, 0, "2015-01-14")); timeComp2 = compare(info.colRef(0), FilterOperatorEnum.LT, enc(info, 0, "2015-01-13")); timeComp3 = compare(info.colRef(0), FilterOperatorEnum.LT, enc(info, 0, "2015-01-15")); timeComp4 = compare(info.colRef(0), FilterOperatorEnum.EQ, enc(info, 0, "2015-01-15")); timeComp5 = compare(info.colRef(0), FilterOperatorEnum.GT, enc(info, 0, "2015-01-15")); timeComp6 = compare(info.colRef(0), FilterOperatorEnum.EQ, enc(info, 0, "2015-01-14")); timeComp7 = compare(info.colRef(0), FilterOperatorEnum.EQ, enc(info, 0, "1970-01-01")); ageComp1 = compare(info.colRef(1), FilterOperatorEnum.EQ, enc(info, 1, "10")); ageComp2 = compare(info.colRef(1), FilterOperatorEnum.EQ, enc(info, 1, "20")); ageComp3 = compare(info.colRef(1), FilterOperatorEnum.EQ, enc(info, 1, "30")); ageComp4 = compare(info.colRef(1), FilterOperatorEnum.NEQ, enc(info, 1, "30")); }