@Override public int getMaxIndex() { return hash.getGroupCount() - 1; } }
public int size() { return hash.getGroupCount(); }
public long getGroupCount() { return groupByHash.getGroupCount(); }
public int size() { return hash.getGroupCount(); }
private IntIterator consecutiveGroupIds() { return IntIterators.fromTo(0, groupByHash.getGroupCount()); }
private IntIterator hashSortedGroupIds() { IntBigArray groupIds = new IntBigArray(); groupIds.ensureCapacity(groupByHash.getGroupCount()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { groupIds.set(i, i); } groupIds.sort(0, groupByHash.getGroupCount(), (leftGroupId, rightGroupId) -> Long.compare(groupByHash.getRawHash(leftGroupId), groupByHash.getRawHash(rightGroupId))); return new AbstractIntIterator() { private final int totalPositions = groupByHash.getGroupCount(); private int position; @Override public boolean hasNext() { return position < totalPositions; } @Override public int nextInt() { return groupIds.get(position++); } }; }
@Test public void testAddPage() { GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); for (int tries = 0; tries < 2; tries++) { for (int value = 0; value < MAX_GROUP_ID; value++) { Block block = BlockAssertions.createLongsBlock(value); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(BIGINT), block); Page page = new Page(block, hashBlock); for (int addValuesTries = 0; addValuesTries < 10; addValuesTries++) { groupByHash.addPage(page).process(); assertEquals(groupByHash.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); // add the page again using get group ids and make sure the group count didn't change Work<GroupByIdBlock> work = groupByHash.getGroupIds(page); work.process(); GroupByIdBlock groupIds = work.getResult(); assertEquals(groupByHash.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); assertEquals(groupIds.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); // verify the first position assertEquals(groupIds.getPositionCount(), 1); long groupId = groupIds.getGroupId(0); assertEquals(groupId, value); } } } }
@Test public void testAppendToMultipleTuplesPerGroup() { List<Long> values = new ArrayList<>(); for (long i = 0; i < 100; i++) { values.add(i % 50); } Block valuesBlock = BlockAssertions.createLongsBlock(values); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(BIGINT), valuesBlock); GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)).process(); assertEquals(groupByHash.getGroupCount(), 50); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(i, pageBuilder, 0); } Page outputPage = pageBuilder.build(); assertEquals(outputPage.getPositionCount(), 50); BlockAssertions.assertBlockEquals(BIGINT, outputPage.getBlock(0), BlockAssertions.createLongSequenceBlock(0, 50)); }
@Test public void testAppendTo() { Block valuesBlock = BlockAssertions.createStringSequenceBlock(0, 100); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(VARCHAR), valuesBlock); GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(VARCHAR), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); Work<GroupByIdBlock> work = groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)); work.process(); GroupByIdBlock groupIds = work.getResult(); for (int i = 0; i < groupIds.getPositionCount(); i++) { assertEquals(groupIds.getGroupId(i), i); } assertEquals(groupByHash.getGroupCount(), 100); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(i, pageBuilder, 0); } Page page = pageBuilder.build(); // Ensure that all blocks have the same positionCount for (int i = 0; i < groupByHash.getTypes().size(); i++) { assertEquals(page.getBlock(i).getPositionCount(), 100); } assertEquals(page.getPositionCount(), 100); BlockAssertions.assertBlockEquals(VARCHAR, page.getBlock(0), valuesBlock); BlockAssertions.assertBlockEquals(BIGINT, page.getBlock(1), hashBlock); }
@Benchmark @OperationsPerInvocation(POSITIONS) public Object bigintGroupByHash(SingleChannelBenchmarkData data) { GroupByHash groupByHash = new BigintGroupByHash(0, data.getHashEnabled(), EXPECTED_SIZE, NOOP); data.getPages().forEach(p -> groupByHash.addPage(p).process()); ImmutableList.Builder<Page> pages = ImmutableList.builder(); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int groupId = 0; groupId < groupByHash.getGroupCount(); groupId++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(groupId, pageBuilder, 0); if (pageBuilder.isFull()) { pages.add(pageBuilder.build()); pageBuilder.reset(); } } pages.add(pageBuilder.build()); return pageBuilder.build(); }
@Benchmark @OperationsPerInvocation(POSITIONS) public Object groupByHashPreCompute(BenchmarkData data) { GroupByHash groupByHash = new MultiChannelGroupByHash(data.getTypes(), data.getChannels(), data.getHashChannel(), EXPECTED_SIZE, false, getJoinCompiler(data.isGroupByUsesEqual()), NOOP); data.getPages().forEach(p -> groupByHash.getGroupIds(p).process()); ImmutableList.Builder<Page> pages = ImmutableList.builder(); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int groupId = 0; groupId < groupByHash.getGroupCount(); groupId++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(groupId, pageBuilder, 0); if (pageBuilder.isFull()) { pages.add(pageBuilder.build()); pageBuilder.reset(); } } pages.add(pageBuilder.build()); return pageBuilder.build(); }
@Benchmark @OperationsPerInvocation(POSITIONS) public Object addPagePreCompute(BenchmarkData data) { GroupByHash groupByHash = new MultiChannelGroupByHash(data.getTypes(), data.getChannels(), data.getHashChannel(), EXPECTED_SIZE, false, getJoinCompiler(data.isGroupByUsesEqual()), NOOP); data.getPages().forEach(p -> groupByHash.addPage(p).process()); ImmutableList.Builder<Page> pages = ImmutableList.builder(); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int groupId = 0; groupId < groupByHash.getGroupCount(); groupId++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(groupId, pageBuilder, 0); if (pageBuilder.isFull()) { pages.add(pageBuilder.build()); pageBuilder.reset(); } } pages.add(pageBuilder.build()); return pageBuilder.build(); }
@Override public int getMaxIndex() { return hash.getGroupCount() - 1; } }
public int size() { return hash.getGroupCount(); }
public int size() { return hash.getGroupCount(); }
@Test public void testAddPage() throws Exception { GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] { 0}, Optional.<Integer>empty(), Optional.of(1), 100); for (int tries = 0; tries < 2; tries++) { for (int value = 0; value < MAX_GROUP_ID; value++) { Block block = BlockAssertions.createLongsBlock(value); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(BIGINT), block); Page page = new Page(block, hashBlock); for (int addValuesTries = 0; addValuesTries < 10; addValuesTries++) { groupByHash.addPage(page); assertEquals(groupByHash.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); // add the page again using get group ids and make sure the group count didn't change GroupByIdBlock groupIds = groupByHash.getGroupIds(page); assertEquals(groupByHash.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); assertEquals(groupIds.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); // verify the first position assertEquals(groupIds.getPositionCount(), 1); long groupId = groupIds.getGroupId(0); assertEquals(groupId, value); } } } }
@Test public void testAppendToMultipleTuplesPerGroup() throws Exception { List<Long> values = new ArrayList<>(); for (long i = 0; i < 100; i++) { values.add(i % 50); } Block valuesBlock = BlockAssertions.createLongsBlock(values); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(BIGINT), valuesBlock); GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] { 0 }, Optional.<Integer>empty(), Optional.of(1), 100); groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)); assertEquals(groupByHash.getGroupCount(), 50); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(i, pageBuilder, 0); } Page outputPage = pageBuilder.build(); assertEquals(outputPage.getPositionCount(), 50); BlockAssertions.assertBlockEquals(BIGINT, outputPage.getBlock(1), BlockAssertions.createLongSequenceBlock(0, 50)); }
@Benchmark @OperationsPerInvocation(POSITIONS) public Object bigintGroupByHash(SingleChannelBenchmarkData data) { GroupByHash groupByHash = new BigintGroupByHash(0, Optional.empty(), data.getHashEnabled(), EXPECTED_SIZE); data.getPages().forEach(groupByHash::addPage); ImmutableList.Builder<Page> pages = ImmutableList.builder(); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int groupId = 0; groupId < groupByHash.getGroupCount(); groupId++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(groupId, pageBuilder, 0); if (pageBuilder.isFull()) { pages.add(pageBuilder.build()); pageBuilder.reset(); } } pages.add(pageBuilder.build()); return pageBuilder.build(); }
@Benchmark @OperationsPerInvocation(POSITIONS) public Object addPagePreCompute(BenchmarkData data) { GroupByHash groupByHash = new MultiChannelGroupByHash(data.getTypes(), data.getChannels(), Optional.empty(), data.getHashChannel(), EXPECTED_SIZE, false); data.getPages().forEach(groupByHash::addPage); ImmutableList.Builder<Page> pages = ImmutableList.builder(); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int groupId = 0; groupId < groupByHash.getGroupCount(); groupId++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(groupId, pageBuilder, 0); if (pageBuilder.isFull()) { pages.add(pageBuilder.build()); pageBuilder.reset(); } } pages.add(pageBuilder.build()); return pageBuilder.build(); }
@Benchmark @OperationsPerInvocation(POSITIONS) public Object groupByHashPreCompute(BenchmarkData data) { GroupByHash groupByHash = new MultiChannelGroupByHash(data.getTypes(), data.getChannels(), Optional.empty(), data.getHashChannel(), EXPECTED_SIZE, false); data.getPages().forEach(groupByHash::getGroupIds); ImmutableList.Builder<Page> pages = ImmutableList.builder(); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int groupId = 0; groupId < groupByHash.getGroupCount(); groupId++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(groupId, pageBuilder, 0); if (pageBuilder.isFull()) { pages.add(pageBuilder.build()); pageBuilder.reset(); } } pages.add(pageBuilder.build()); return pageBuilder.build(); }