Refine search
private Page createPageWithExtractedDictionary(Page page) { Block[] blocks = new Block[page.getChannelCount()]; Block dictionary = ((DictionaryBlock) page.getBlock(channels[0])).getDictionary(); // extract data dictionary blocks[channels[0]] = dictionary; // extract hash dictionary if (inputHashChannel.isPresent()) { blocks[inputHashChannel.get()] = ((DictionaryBlock) page.getBlock(inputHashChannel.get())).getDictionary(); } return new Page(dictionary.getPositionCount(), blocks); }
private static List<Page> splitPage(Page page, long maxPageSizeInBytes, long previousPageSize) { checkArgument(page.getPositionCount() > 0, "page is empty"); checkArgument(maxPageSizeInBytes > 0, "maxPageSizeInBytes must be > 0"); // for Pages with certain types of Blocks (e.g., RLE blocks) the size in bytes may remain constant // through the recursive calls, which causes the recursion to only terminate when page.getPositionCount() == 1 // and create potentially a large number of Page's of size 1. So we check here that // if the size of the page doesn't improve from the previous call we terminate the recursion. if (page.getSizeInBytes() == previousPageSize || page.getSizeInBytes() <= maxPageSizeInBytes || page.getPositionCount() == 1) { return ImmutableList.of(page); } ImmutableList.Builder<Page> outputPages = ImmutableList.builder(); long previousSize = page.getSizeInBytes(); int positionCount = page.getPositionCount(); int half = positionCount / 2; Page leftHalf = page.getRegion(0, half); outputPages.addAll(splitPage(leftHalf, maxPageSizeInBytes, previousSize)); Page rightHalf = page.getRegion(half, positionCount - half); outputPages.addAll(splitPage(rightHalf, maxPageSizeInBytes, previousSize)); return outputPages.build(); } }
public void append(Page dataPage) { // getRegionSizeInBytes for each row can be expensive; use getRetainedSizeInBytes for estimation hiveWriterStats.addInputPageSizesInBytes(dataPage.getRetainedSizeInBytes()); fileWriter.appendRows(dataPage); rowCount += dataPage.getPositionCount(); inputSizeInBytes += dataPage.getSizeInBytes(); }
private static Page extractColumns(Page page, int[] columns) { Block[] blocks = new Block[columns.length]; for (int i = 0; i < columns.length; i++) { int dataColumn = columns[i]; blocks[i] = page.getBlock(dataColumn); } return new Page(page.getPositionCount(), blocks); }
private static boolean containsNullValue(int position, Page page) { for (int channel = 0; channel < page.getChannelCount(); channel++) { Block block = page.getBlock(channel); if (block.isNull(position)) { return true; } } return false; }
public int[] partitionPage(Page partitionColumns, Block bucketBlock) { if (bucketBlock != null) { Block[] blocks = new Block[partitionColumns.getChannelCount() + 1]; for (int i = 0; i < partitionColumns.getChannelCount(); i++) { blocks[i] = partitionColumns.getBlock(i); } blocks[blocks.length - 1] = bucketBlock; partitionColumns = new Page(partitionColumns.getPositionCount(), blocks); } return pageIndexer.indexPage(partitionColumns); }
@Test public void testSelectAllFilter() { PageProcessor pageProcessor = new PageProcessor(Optional.of(new SelectAllFilter()), ImmutableList.of(new InputPageProjection(0, BIGINT)), OptionalInt.of(MAX_BATCH_SIZE)); Page inputPage = new Page(createLongSequenceBlock(0, 100)); Iterator<Optional<Page>> output = processAndAssertRetainedPageSize(pageProcessor, inputPage); List<Optional<Page>> outputPages = ImmutableList.copyOf(output); assertEquals(outputPages.size(), 1); assertPageEquals(ImmutableList.of(BIGINT), outputPages.get(0).orElse(null), new Page(createLongSequenceBlock(0, 100))); }
@Test public void testProjectNoColumns() { PageProcessor pageProcessor = new PageProcessor(Optional.empty(), ImmutableList.of(), OptionalInt.of(MAX_BATCH_SIZE)); Page inputPage = new Page(createLongSequenceBlock(0, 100)); Iterator<Optional<Page>> output = processAndAssertRetainedPageSize(pageProcessor, inputPage); List<Optional<Page>> outputPages = ImmutableList.copyOf(output); assertEquals(outputPages.size(), 1); Page outputPage = outputPages.get(0).orElse(null); assertEquals(outputPage.getChannelCount(), 0); assertEquals(outputPage.getPositionCount(), inputPage.getPositionCount()); }
@Test public void testSanityColumnarDictionary() { PageProcessor processor = compiler.compilePageProcessor(Optional.empty(), ImmutableList.of(field(0, VARCHAR)), MAX_BATCH_SIZE).get(); Page page = new Page(createDictionaryBlock(createExpectedValues(10), 100)); Page outputPage = getOnlyElement( processor.process( null, new DriverYieldSignal(), newSimpleAggregatedMemoryContext().newLocalMemoryContext(PageProcessor.class.getSimpleName()), page)) .orElseThrow(() -> new AssertionError("page is not present")); assertEquals(outputPage.getPositionCount(), 100); assertTrue(outputPage.getBlock(0) instanceof DictionaryBlock); DictionaryBlock dictionaryBlock = (DictionaryBlock) outputPage.getBlock(0); assertEquals(dictionaryBlock.getDictionary().getPositionCount(), 10); }
@Test public void testForceRehash() { // Create a page with positionCount >> expected size of groupByHash Block valuesBlock = BlockAssertions.createStringSequenceBlock(0, 100); Block hashBlock = TypeUtils.getHashBlock(ImmutableList.of(VARCHAR), valuesBlock); // Create group by hash with extremely small size GroupByHash groupByHash = createGroupByHash(TEST_SESSION, ImmutableList.of(VARCHAR), new int[] {0}, Optional.of(1), 4, JOIN_COMPILER); groupByHash.getGroupIds(new Page(valuesBlock, hashBlock)).process(); // Ensure that all groups are present in group by hash for (int i = 0; i < valuesBlock.getPositionCount(); i++) { assertTrue(groupByHash.contains(i, new Page(valuesBlock, hashBlock), CONTAINS_CHANNELS)); } }
@Override public void addInput(Page page) { requireNonNull(page, "page is null"); checkState(!isFinished(), "Operator is already finished"); Block sourceBlock = page.getBlock(setChannel); Page sourcePage = hashChannel.isPresent() ? new Page(sourceBlock, page.getBlock(hashChannel.get())) : new Page(sourceBlock); unfinishedWork = channelSetBuilder.addPage(sourcePage); processUnfinishedWork(); }
@Test public void testReverseSortedPositionLinks() { JoinFilterFunction filterFunction = (leftAddress, rightPosition, rightPage) -> BIGINT.getLong(TEST_PAGE.getBlock(0), leftAddress) < 4; PositionLinks.FactoryBuilder factoryBuilder = buildSortedPositionLinks(); PositionLinks positionLinks = factoryBuilder.build().create(ImmutableList.of(filterFunction)); assertEquals(positionLinks.start(0, 0, TEST_PAGE), 0); assertEquals(positionLinks.next(0, 0, TEST_PAGE), 1); assertEquals(positionLinks.next(1, 0, TEST_PAGE), 2); assertEquals(positionLinks.next(2, 0, TEST_PAGE), 3); assertEquals(positionLinks.next(3, 0, TEST_PAGE), -1); assertEquals(positionLinks.start(10, 0, TEST_PAGE), -1); }
@Test public void testExpressionProfiler() internalOperator(ADD, BIGINT.getTypeSignature(), ImmutableList.of(BIGINT.getTypeSignature(), BIGINT.getTypeSignature())), BIGINT, field(0, BIGINT), Supplier<PageProjection> projectionSupplier = functionCompiler.compileProjection(add10Expression, Optional.empty()); PageProjection projection = projectionSupplier.get(); Page page = new Page(createLongSequenceBlock(1, 11)); ExpressionProfiler profiler = new ExpressionProfiler(testingTicker, SPLIT_RUN_QUANTA); for (int i = 0; i < 100; i++) { profiler.start(); Work<Block> work = projection.project(SESSION, new DriverYieldSignal(), page, SelectedPositions.positionsRange(0, page.getPositionCount())); if (i < 10) { profiler.stop(page.getPositionCount()); assertTrue(profiler.isExpressionExpensive()); profiler.stop(page.getPositionCount()); assertFalse(profiler.isExpressionExpensive());
@Test public void testBinaryMergeIteratorOverEmptyPageAndNonEmptyPage() { Page emptyPage = new Page(0, BIGINT.createFixedSizeBlockBuilder(0).build()); Page page = rowPagesBuilder(BIGINT).row(42).build().get(0); WorkProcessor<Page> mergedPage = new MergeHashSort(newSimpleAggregatedMemoryContext()).merge( ImmutableList.of(BIGINT), ImmutableList.of(BIGINT), ImmutableList.of(ImmutableList.of(emptyPage, page).iterator()).stream() .map(WorkProcessor::fromIterator) .collect(toImmutableList()), new DriverYieldSignal()); assertTrue(mergedPage.process()); Page actualPage = mergedPage.getResult(); assertEquals(actualPage.getPositionCount(), 1); assertEquals(actualPage.getChannelCount(), 1); assertEquals(actualPage.getBlock(0).getLong(0, 0), 42); assertFinishes(mergedPage); }
private static PagesHashStrategy pagesHashStrategy() { return new SimplePagesHashStrategy( ImmutableList.of(BIGINT), ImmutableList.of(), ImmutableList.of(ImmutableList.of(TEST_PAGE.getBlock(0))), ImmutableList.of(), OptionalInt.empty(), Optional.of(0), MetadataManager.createTestMetadataManager().getFunctionRegistry(), new FeaturesConfig().isGroupByUsesEqualTo()); }
public IndexedData streamIndexDataForSingleKey(UpdateRequest updateRequest) { Page indexKeyTuple = updateRequest.getPage().getRegion(0, 1); PageBuffer pageBuffer = new PageBuffer(100); DriverFactory driverFactory = indexBuildDriverFactoryProvider.createStreaming(pageBuffer, indexKeyTuple); Driver driver = driverFactory.createDriver(pipelineContext.addDriverContext()); PageRecordSet pageRecordSet = new PageRecordSet(keyTypes, indexKeyTuple); PlanNodeId planNodeId = driverFactory.getSourceId().get(); ScheduledSplit split = new ScheduledSplit(0, planNodeId, new Split(INDEX_CONNECTOR_ID, new ConnectorTransactionHandle() {}, new IndexSplit(pageRecordSet))); driver.updateSource(new TaskSource(planNodeId, ImmutableSet.of(split), true)); return new StreamingIndexedData(outputTypes, keyTypes, indexKeyTuple, pageBuffer, driver); }
public static Object distinctAggregation(InternalAggregationFunction function, Page... pages) { Optional<Integer> maskChannel = Optional.of(pages[0].getChannelCount()); // Execute normally Object aggregation = aggregation(function, createArgs(function), maskChannel, maskPages(true, pages)); Page[] dupedPages = new Page[pages.length * 2]; // Create two copies of each page with one of them masked off System.arraycopy(maskPages(true, pages), 0, dupedPages, 0, pages.length); System.arraycopy(maskPages(false, pages), 0, dupedPages, pages.length, pages.length); // Execute with masked pages and assure equal to normal execution Object aggregationWithDupes = aggregation(function, createArgs(function), maskChannel, dupedPages); assertEquals(aggregationWithDupes, aggregation, "Inconsistent results with mask"); return aggregation; }
@Test public void testBigintSerializedSize() { BlockBuilder builder = BIGINT.createBlockBuilder(null, 5); // empty page Page page = new Page(builder.build()); int pageSize = serializedSize(ImmutableList.of(BIGINT), page); assertEquals(pageSize, 48); // page overhead ideally 35 but since a 0 sized block will be a RLEBlock we have an overhead of 13 // page with one value BIGINT.writeLong(builder, 123); pageSize = 35; // Now we have moved to the normal block implementation so the page size overhead is 35 page = new Page(builder.build()); int firstValueSize = serializedSize(ImmutableList.of(BIGINT), page) - pageSize; assertEquals(firstValueSize, 9); // value size + value overhead // page with two values BIGINT.writeLong(builder, 456); page = new Page(builder.build()); int secondValueSize = serializedSize(ImmutableList.of(BIGINT), page) - (pageSize + firstValueSize); assertEquals(secondValueSize, 8); // value size (value overhead is shared with previous value) }
@Test public void testBinaryMergeIteratorOverEmptyPage() { Page emptyPage = new Page(0, BIGINT.createFixedSizeBlockBuilder(0).build()); WorkProcessor<Page> mergedPage = new MergeHashSort(newSimpleAggregatedMemoryContext()).merge( ImmutableList.of(BIGINT), ImmutableList.of(BIGINT), ImmutableList.of(ImmutableList.of(emptyPage).iterator()).stream() .map(WorkProcessor::fromIterator) .collect(toImmutableList()), new DriverYieldSignal()); assertFinishes(mergedPage); }
@Test public void testFileVarbinarySpiller() throws Exception { List<Type> types = ImmutableList.of(BIGINT, DOUBLE, VARBINARY); BlockBuilder col1 = BIGINT.createBlockBuilder(null, 1); BlockBuilder col2 = DOUBLE.createBlockBuilder(null, 1); BlockBuilder col3 = VARBINARY.createBlockBuilder(null, 1); col1.writeLong(42).closeEntry(); col2.writeLong(doubleToLongBits(43.0)).closeEntry(); col3.writeLong(doubleToLongBits(43.0)).writeLong(1).closeEntry(); Page page = new Page(col1.build(), col2.build(), col3.build()); try (Spiller spiller = factory.create(TYPES, bytes -> {}, memoryContext)) { testSpiller(types, spiller, ImmutableList.of(page)); } }