@Override public Work<?> processPage(Page page) { if (aggregators.isEmpty()) { return groupByHash.addPage(page); } else { return new TransformWork<>( groupByHash.getGroupIds(page), groupByIdBlock -> { for (Aggregator aggregator : aggregators) { aggregator.processPage(groupByIdBlock, page); } // we do not need any output from TransformWork for this case return null; }); } }
public boolean contains(int position, Page page) { return hash.contains(position, page, hashChannels); }
public GroupByHashPageIndexer(List<? extends Type> hashTypes, JoinCompiler joinCompiler) { this(GroupByHash.createGroupByHash( hashTypes, IntStream.range(0, hashTypes.size()).toArray(), Optional.empty(), 20, false, joinCompiler, NOOP)); }
private IntIterator hashSortedGroupIds() { IntBigArray groupIds = new IntBigArray(); groupIds.ensureCapacity(groupByHash.getGroupCount()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { groupIds.set(i, i); } groupIds.sort(0, groupByHash.getGroupCount(), (leftGroupId, rightGroupId) -> Long.compare(groupByHash.getRawHash(leftGroupId), groupByHash.getRawHash(rightGroupId))); return new AbstractIntIterator() { private final int totalPositions = groupByHash.getGroupCount(); private int position; @Override public boolean hasNext() { return position < totalPositions; } @Override public int nextInt() { return groupIds.get(position++); } }; }
@Test public void testTypes() { GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(VARCHAR), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); // Additional bigint channel for hash assertEquals(groupByHash.getTypes(), ImmutableList.of(VARCHAR, BIGINT)); }
@Test public void testAppendToMultipleTuplesPerGroup() { List<Long> values = new ArrayList<>(); for (long i = 0; i < 100; i++) { values.add(i % 50); } Block valuesBlock = BlockAssertions.createLongsBlock(values); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(BIGINT), valuesBlock); GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)).process(); assertEquals(groupByHash.getGroupCount(), 50); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int i = 0; i < groupByHash.getGroupCount(); i++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(i, pageBuilder, 0); } Page outputPage = pageBuilder.build(); assertEquals(outputPage.getPositionCount(), 50); BlockAssertions.assertBlockEquals(BIGINT, outputPage.getBlock(0), BlockAssertions.createLongSequenceBlock(0, 50)); }
@Test public void testForceRehash() { // Create a page with positionCount >> expected size of groupByHash Block valuesBlock = BlockAssertions.createStringSequenceBlock(0, 100); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(VARCHAR), valuesBlock); // Create group by hash with extremely small size GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(VARCHAR), new int[] {0}, Optional.of(1), 4, JOIN_COMPILER); groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)).process(); // Ensure that all groups are present in group by hash for (int i = 0; i < valuesBlock.getPositionCount(); i++) { assertTrue(groupByHash.contains(i, new Page(valuesBlock, hashBlock), CONTAINS_CHANNELS)); } }
@Benchmark @OperationsPerInvocation(POSITIONS) public Object bigintGroupByHash(SingleChannelBenchmarkData data) { GroupByHash groupByHash = new BigintGroupByHash(0, data.getHashEnabled(), EXPECTED_SIZE, NOOP); data.getPages().forEach(p -> groupByHash.addPage(p).process()); ImmutableList.Builder<Page> pages = ImmutableList.builder(); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int groupId = 0; groupId < groupByHash.getGroupCount(); groupId++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(groupId, pageBuilder, 0); if (pageBuilder.isFull()) { pages.add(pageBuilder.build()); pageBuilder.reset(); } } pages.add(pageBuilder.build()); return pageBuilder.build(); }
@Test public void testAddPage() { GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); for (int tries = 0; tries < 2; tries++) { for (int value = 0; value < MAX_GROUP_ID; value++) { Block block = BlockAssertions.createLongsBlock(value); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(BIGINT), block); Page page = new Page(block, hashBlock); for (int addValuesTries = 0; addValuesTries < 10; addValuesTries++) { groupByHash.addPage(page).process(); assertEquals(groupByHash.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); // add the page again using get group ids and make sure the group count didn't change Work<GroupByIdBlock> work = groupByHash.getGroupIds(page); work.process(); GroupByIdBlock groupIds = work.getResult(); assertEquals(groupByHash.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); assertEquals(groupIds.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); // verify the first position assertEquals(groupIds.getPositionCount(), 1); long groupId = groupIds.getGroupId(0); assertEquals(groupId, value); } } } }
@Benchmark @OperationsPerInvocation(POSITIONS) public Object bigintGroupByHash(SingleChannelBenchmarkData data) { GroupByHash groupByHash = new BigintGroupByHash(0, Optional.empty(), data.getHashEnabled(), EXPECTED_SIZE); data.getPages().forEach(groupByHash::addPage); ImmutableList.Builder<Page> pages = ImmutableList.builder(); PageBuilder pageBuilder = new PageBuilder(groupByHash.getTypes()); for (int groupId = 0; groupId < groupByHash.getGroupCount(); groupId++) { pageBuilder.declarePosition(); groupByHash.appendValuesTo(groupId, pageBuilder, 0); if (pageBuilder.isFull()) { pages.add(pageBuilder.build()); pageBuilder.reset(); } } pages.add(pageBuilder.build()); return pageBuilder.build(); }
@Test public void testNullGroup() { GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); Block block = createLongsBlock((Long) null); Block hashBlock = getHashBlock(ImmutableList.of(BIGINT), block); Page page = new Page(block, hashBlock); groupByHash.addPage(page).process(); // Add enough values to force a rehash block = createLongSequenceBlock(1, 132748); hashBlock = getHashBlock(ImmutableList.of(BIGINT), block); page = new Page(block, hashBlock); groupByHash.addPage(page).process(); block = createLongsBlock(0); hashBlock = getHashBlock(ImmutableList.of(BIGINT), block); page = new Page(block, hashBlock); assertFalse(groupByHash.contains(0, page, CONTAINS_CHANNELS)); }
@Override public void addInput(Page page) { checkState(!finishing, "Operator is already finishing"); requireNonNull(page, "page is null"); checkState(!hasUnfinishedInput()); inputPage = page; if (groupByHash.isPresent()) { unfinishedWork = groupByHash.get().getGroupIds(inputPage); processUnfinishedWork(); } updateMemoryReservation(); }
@Override public int getMaxIndex() { return hash.getGroupCount() - 1; } }
private WorkProcessor<Page> buildResult(IntIterator groupIds) { final PageBuilder pageBuilder = new PageBuilder(buildTypes()); return WorkProcessor.create(() -> { if (!groupIds.hasNext()) { return ProcessState.finished(); } pageBuilder.reset(); List<Type> types = groupByHash.getTypes(); while (!pageBuilder.isFull() && groupIds.hasNext()) { int groupId = groupIds.nextInt(); groupByHash.appendValuesTo(groupId, pageBuilder, 0); pageBuilder.declarePosition(); for (int i = 0; i < aggregators.size(); i++) { Aggregator aggregator = aggregators.get(i); BlockBuilder output = pageBuilder.getBlockBuilder(types.size() + i); aggregator.evaluate(groupId, output); } } return ProcessState.ofResult(pageBuilder.build()); }); }
@Test public void testGetGroupIds() { GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(BIGINT), new int[] {0}, Optional.of(1), 100, JOIN_COMPILER); for (int tries = 0; tries < 2; tries++) { for (int value = 0; value < MAX_GROUP_ID; value++) { Block block = BlockAssertions.createLongsBlock(value); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(BIGINT), block); Page page = new Page(block, hashBlock); for (int addValuesTries = 0; addValuesTries < 10; addValuesTries++) { Work<GroupByIdBlock> work = groupByHash.getGroupIds(page); work.process(); GroupByIdBlock groupIds = work.getResult(); assertEquals(groupIds.getGroupCount(), tries == 0 ? value + 1 : MAX_GROUP_ID); assertEquals(groupIds.getPositionCount(), 1); long groupId = groupIds.getGroupId(0); assertEquals(groupId, value); } } } }
public long getEstimatedSizeInBytes() { return hash.getEstimatedSize(); }
if (groupByHash.isPresent()) { GroupByHash hash = groupByHash.get(); long groupByHashSize = hash.getEstimatedSize(); partitionIds = Optional.of(hash.getGroupIds(page)); operatorContext.reserveMemory(hash.getEstimatedSize() - groupByHashSize);
public Type getType() { return hash.getTypes().get(0); }
public Iterator<Page> build() List<Type> types = new ArrayList<>(groupByHash.getTypes()); for (Aggregator aggregator : aggregators) { types.add(aggregator.getType()); return new AbstractIterator<Page>() private final int groupCount = groupByHash.getGroupCount(); private int groupId;
public void addPage(Page page) { hash.addPage(page); if (operatorContext != null) { operatorContext.setMemoryReservation(hash.getEstimatedSize()); } } }