this.stripeFilePositions = stripeFilePositions.build(); orcDataSource = wrapWithCacheIfTinyStripes(orcDataSource, this.stripes, maxMergeDistance, tinyStripeThreshold); this.orcDataSource = orcDataSource; this.splitLength = splitLength;
DataSize tinyStripeThreshold = new DataSize(8, Unit.MEGABYTE); OrcDataSource actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(), assertInstanceOf(actual, CachingOrcDataSource.class); actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10)), assertInstanceOf(actual, CachingOrcDataSource.class); actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 10, 10, 10)), assertInstanceOf(actual, CachingOrcDataSource.class); actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 1048576 * 8 - 20, 10, 10)), actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 1048576 * 8 - 20 + 1, 10, 10)),
public void doIntegration(TestingOrcDataSource orcDataSource, DataSize maxMergeDistance, DataSize maxReadSize, DataSize tinyStripeThreshold) throws IOException { OrcReader orcReader = new OrcReader(orcDataSource, ORC, maxMergeDistance, maxReadSize, tinyStripeThreshold, new DataSize(1, Unit.MEGABYTE)); // 1 for reading file footer assertEquals(orcDataSource.getReadCount(), 1); List<StripeInformation> stripes = orcReader.getFooter().getStripes(); // Sanity check number of stripes. This can be three or higher because of orc writer low memory mode. assertGreaterThanOrEqual(stripes.size(), 3); //verify wrapped by CachingOrcReader assertInstanceOf(wrapWithCacheIfTinyStripes(orcDataSource, stripes, maxMergeDistance, tinyStripeThreshold), CachingOrcDataSource.class); OrcRecordReader orcRecordReader = orcReader.createRecordReader( ImmutableMap.of(0, VARCHAR), (numberOfRows, statisticsByColumnIndex) -> true, HIVE_STORAGE_TIME_ZONE, newSimpleAggregatedMemoryContext(), INITIAL_BATCH_SIZE); int positionCount = 0; while (true) { int batchSize = orcRecordReader.nextBatch(); if (batchSize <= 0) { break; } Block block = orcRecordReader.readBlock(VARCHAR, 0); positionCount += block.getPositionCount(); } assertEquals(positionCount, POSITION_COUNT); }
this.stripeFilePositions = stripeFilePositions.build(); orcDataSource = wrapWithCacheIfTinyStripes(orcDataSource, this.stripes, maxMergeDistance, maxReadSize); this.orcDataSource = orcDataSource; this.splitLength = splitLength;
this.stripeFilePositions = stripeFilePositions.build(); orcDataSource = wrapWithCacheIfTinyStripes(orcDataSource, this.stripes, maxMergeDistance, tinyStripeThreshold); this.orcDataSource = orcDataSource; this.splitLength = splitLength;
DataSize tinyStripeThreshold = new DataSize(8, Unit.MEGABYTE); OrcDataSource actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(), assertInstanceOf(actual, CachingOrcDataSource.class); actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10)), assertInstanceOf(actual, CachingOrcDataSource.class); actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 10, 10, 10)), assertInstanceOf(actual, CachingOrcDataSource.class); actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 1048576 * 8 - 20, 10, 10)), actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 1048576 * 8 - 20 + 1, 10, 10)),
DataSize maxReadSize = new DataSize(8, Unit.MEGABYTE); OrcDataSource actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(), assertInstanceOf(actual, CachingOrcDataSource.class); actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10)), assertInstanceOf(actual, CachingOrcDataSource.class); actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 10, 10, 10)), assertInstanceOf(actual, CachingOrcDataSource.class); actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 1048576 * 8 - 20, 10, 10)), actual = wrapWithCacheIfTinyStripes( FakeOrcDataSource.INSTANCE, ImmutableList.of(new StripeInformation(123, 3, 10, 10, 10), new StripeInformation(123, 33, 10, 10, 10), new StripeInformation(123, 63, 1048576 * 8 - 20 + 1, 10, 10)),
public void doIntegration(TestingOrcDataSource orcDataSource, DataSize maxMergeDistance, DataSize maxReadSize, DataSize tinyStripeThreshold) throws IOException { OrcReader orcReader = new OrcReader(orcDataSource, ORC, maxMergeDistance, maxReadSize, tinyStripeThreshold, new DataSize(1, Unit.MEGABYTE)); // 1 for reading file footer assertEquals(orcDataSource.getReadCount(), 1); List<StripeInformation> stripes = orcReader.getFooter().getStripes(); // Sanity check number of stripes. This can be three or higher because of orc writer low memory mode. assertGreaterThanOrEqual(stripes.size(), 3); //verify wrapped by CachingOrcReader assertInstanceOf(wrapWithCacheIfTinyStripes(orcDataSource, stripes, maxMergeDistance, tinyStripeThreshold), CachingOrcDataSource.class); OrcRecordReader orcRecordReader = orcReader.createRecordReader( ImmutableMap.of(0, VARCHAR), (numberOfRows, statisticsByColumnIndex) -> true, HIVE_STORAGE_TIME_ZONE, newSimpleAggregatedMemoryContext(), INITIAL_BATCH_SIZE); int positionCount = 0; while (true) { int batchSize = orcRecordReader.nextBatch(); if (batchSize <= 0) { break; } Block block = orcRecordReader.readBlock(VARCHAR, 0); positionCount += block.getPositionCount(); } assertEquals(positionCount, POSITION_COUNT); }
public void doIntegration(TestingOrcDataSource orcDataSource, DataSize maxMergeDistance, DataSize maxReadSize) throws IOException { OrcReader orcReader = new OrcReader(orcDataSource, new OrcMetadataReader(), maxMergeDistance, maxReadSize); // 1 for reading file footer assertEquals(orcDataSource.getReadCount(), 1); List<StripeInformation> stripes = orcReader.getFooter().getStripes(); // Sanity check number of stripes. This can be three or higher because of orc writer low memory mode. assertGreaterThanOrEqual(stripes.size(), 3); //verify wrapped by CachingOrcReader assertInstanceOf(wrapWithCacheIfTinyStripes(orcDataSource, stripes, maxMergeDistance, maxReadSize), CachingOrcDataSource.class); OrcRecordReader orcRecordReader = orcReader.createRecordReader( ImmutableMap.of(0, VARCHAR), (numberOfRows, statisticsByColumnIndex) -> true, HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext()); int positionCount = 0; while (true) { int batchSize = orcRecordReader.nextBatch(); if (batchSize <= 0) { break; } Block block = orcRecordReader.readBlock(VARCHAR, 0); positionCount += block.getPositionCount(); } assertEquals(positionCount, POSITION_COUNT); }