@Override public int writeFooter(SliceOutput output, Footer footer) throws IOException { OrcProto.Footer footerProtobuf = OrcProto.Footer.newBuilder() .setWriter(PRESTO_WRITER_ID) .setNumberOfRows(footer.getNumberOfRows()) .setRowIndexStride(footer.getRowsInRowGroup()) .addAllStripes(footer.getStripes().stream() .map(OrcMetadataWriter::toStripeInformation) .collect(toList())) .addAllTypes(footer.getTypes().stream() .map(OrcMetadataWriter::toType) .collect(toList())) .addAllStatistics(footer.getFileStats().stream() .map(OrcMetadataWriter::toColumnStatistics) .collect(toList())) .addAllMetadata(footer.getUserMetadata().entrySet().stream() .map(OrcMetadataWriter::toUserMetadata) .collect(toList())) .build(); return writeProtobufObject(output, footerProtobuf); }
@Override public int writeFooter(SliceOutput output, Footer footer) throws IOException { DwrfProto.Footer footerProtobuf = DwrfProto.Footer.newBuilder() .setNumberOfRows(footer.getNumberOfRows()) .setRowIndexStride(footer.getRowsInRowGroup()) .addAllStripes(footer.getStripes().stream() .map(DwrfMetadataWriter::toStripeInformation) .collect(toImmutableList())) .addAllTypes(footer.getTypes().stream() .map(DwrfMetadataWriter::toType) .collect(toImmutableList())) .addAllStatistics(footer.getFileStats().stream() .map(DwrfMetadataWriter::toColumnStatistics) .collect(toImmutableList())) .addAllMetadata(footer.getUserMetadata().entrySet().stream() .map(DwrfMetadataWriter::toUserMetadata) .collect(toImmutableList())) .addAllMetadata(STATIC_METADATA.entrySet().stream() .map(DwrfMetadataWriter::toUserMetadata) .collect(toImmutableList())) .build(); return writeProtobufObject(output, footerProtobuf); }
requireNonNull(predicate, "predicate is null"), footer.getNumberOfRows(), footer.getStripes(), footer.getFileStats(), metadata.getStripeStatsList(),
writeValidation.get().validateMetadata(orcDataSource.getId(), footer.getUserMetadata()); writeValidation.get().validateFileStatistics(orcDataSource.getId(), footer.getFileStats()); writeValidation.get().validateStripeStatistics(orcDataSource.getId(), footer.getStripes(), metadata.getStripeStatsList());
public void doIntegration(TestingOrcDataSource orcDataSource, DataSize maxMergeDistance, DataSize maxReadSize, DataSize tinyStripeThreshold) throws IOException { OrcReader orcReader = new OrcReader(orcDataSource, ORC, maxMergeDistance, maxReadSize, tinyStripeThreshold, new DataSize(1, Unit.MEGABYTE)); // 1 for reading file footer assertEquals(orcDataSource.getReadCount(), 1); List<StripeInformation> stripes = orcReader.getFooter().getStripes(); // Sanity check number of stripes. This can be three or higher because of orc writer low memory mode. assertGreaterThanOrEqual(stripes.size(), 3); //verify wrapped by CachingOrcReader assertInstanceOf(wrapWithCacheIfTinyStripes(orcDataSource, stripes, maxMergeDistance, tinyStripeThreshold), CachingOrcDataSource.class); OrcRecordReader orcRecordReader = orcReader.createRecordReader( ImmutableMap.of(0, VARCHAR), (numberOfRows, statisticsByColumnIndex) -> true, HIVE_STORAGE_TIME_ZONE, newSimpleAggregatedMemoryContext(), INITIAL_BATCH_SIZE); int positionCount = 0; while (true) { int batchSize = orcRecordReader.nextBatch(); if (batchSize <= 0) { break; } Block block = orcRecordReader.readBlock(VARCHAR, 0); positionCount += block.getPositionCount(); } assertEquals(positionCount, POSITION_COUNT); }
Footer footer = new OrcReader(orcDataSource, ORC, dataSize, dataSize, dataSize, dataSize).getFooter(); for (StripeInformation stripe : footer.getStripes()) {
@Override public int writeFooter(SliceOutput output, Footer footer) throws IOException { OrcProto.Footer footerProtobuf = OrcProto.Footer.newBuilder() .setWriter(PRESTO_WRITER_ID) .setNumberOfRows(footer.getNumberOfRows()) .setRowIndexStride(footer.getRowsInRowGroup()) .addAllStripes(footer.getStripes().stream() .map(OrcMetadataWriter::toStripeInformation) .collect(toList())) .addAllTypes(footer.getTypes().stream() .map(OrcMetadataWriter::toType) .collect(toList())) .addAllStatistics(footer.getFileStats().stream() .map(OrcMetadataWriter::toColumnStatistics) .collect(toList())) .addAllMetadata(footer.getUserMetadata().entrySet().stream() .map(OrcMetadataWriter::toUserMetadata) .collect(toList())) .build(); return writeProtobufObject(output, footerProtobuf); }
@Override public int writeFooter(SliceOutput output, Footer footer) throws IOException { DwrfProto.Footer footerProtobuf = DwrfProto.Footer.newBuilder() .setNumberOfRows(footer.getNumberOfRows()) .setRowIndexStride(footer.getRowsInRowGroup()) .addAllStripes(footer.getStripes().stream() .map(DwrfMetadataWriter::toStripeInformation) .collect(toImmutableList())) .addAllTypes(footer.getTypes().stream() .map(DwrfMetadataWriter::toType) .collect(toImmutableList())) .addAllStatistics(footer.getFileStats().stream() .map(DwrfMetadataWriter::toColumnStatistics) .collect(toImmutableList())) .addAllMetadata(footer.getUserMetadata().entrySet().stream() .map(DwrfMetadataWriter::toUserMetadata) .collect(toImmutableList())) .addAllMetadata(STATIC_METADATA.entrySet().stream() .map(DwrfMetadataWriter::toUserMetadata) .collect(toImmutableList())) .build(); return writeProtobufObject(output, footerProtobuf); }
public OrcRecordReader createRecordReader( Map<Integer, Type> includedColumns, OrcPredicate predicate, long offset, long length, DateTimeZone hiveStorageTimeZone, AbstractAggregatedMemoryContext systemMemoryUsage) throws IOException { return new OrcRecordReader( requireNonNull(includedColumns, "includedColumns is null"), requireNonNull(predicate, "predicate is null"), footer.getNumberOfRows(), footer.getStripes(), footer.getFileStats(), metadata.getStripeStatsList(), orcDataSource, offset, length, footer.getTypes(), compressionKind, bufferSize, footer.getRowsInRowGroup(), requireNonNull(hiveStorageTimeZone, "hiveStorageTimeZone is null"), metadataReader, maxMergeDistance, maxReadSize, systemMemoryUsage); }
requireNonNull(predicate, "predicate is null"), footer.getNumberOfRows(), footer.getStripes(), footer.getFileStats(), metadata.getStripeStatsList(),
writeValidation.get().validateMetadata(orcDataSource.getId(), footer.getUserMetadata()); writeValidation.get().validateFileStatistics(orcDataSource.getId(), footer.getFileStats()); writeValidation.get().validateStripeStatistics(orcDataSource.getId(), footer.getStripes(), metadata.getStripeStatsList());
public void doIntegration(TestingOrcDataSource orcDataSource, DataSize maxMergeDistance, DataSize maxReadSize, DataSize tinyStripeThreshold) throws IOException { OrcReader orcReader = new OrcReader(orcDataSource, ORC, maxMergeDistance, maxReadSize, tinyStripeThreshold, new DataSize(1, Unit.MEGABYTE)); // 1 for reading file footer assertEquals(orcDataSource.getReadCount(), 1); List<StripeInformation> stripes = orcReader.getFooter().getStripes(); // Sanity check number of stripes. This can be three or higher because of orc writer low memory mode. assertGreaterThanOrEqual(stripes.size(), 3); //verify wrapped by CachingOrcReader assertInstanceOf(wrapWithCacheIfTinyStripes(orcDataSource, stripes, maxMergeDistance, tinyStripeThreshold), CachingOrcDataSource.class); OrcRecordReader orcRecordReader = orcReader.createRecordReader( ImmutableMap.of(0, VARCHAR), (numberOfRows, statisticsByColumnIndex) -> true, HIVE_STORAGE_TIME_ZONE, newSimpleAggregatedMemoryContext(), INITIAL_BATCH_SIZE); int positionCount = 0; while (true) { int batchSize = orcRecordReader.nextBatch(); if (batchSize <= 0) { break; } Block block = orcRecordReader.readBlock(VARCHAR, 0); positionCount += block.getPositionCount(); } assertEquals(positionCount, POSITION_COUNT); }
public void doIntegration(TestingOrcDataSource orcDataSource, DataSize maxMergeDistance, DataSize maxReadSize) throws IOException { OrcReader orcReader = new OrcReader(orcDataSource, new OrcMetadataReader(), maxMergeDistance, maxReadSize); // 1 for reading file footer assertEquals(orcDataSource.getReadCount(), 1); List<StripeInformation> stripes = orcReader.getFooter().getStripes(); // Sanity check number of stripes. This can be three or higher because of orc writer low memory mode. assertGreaterThanOrEqual(stripes.size(), 3); //verify wrapped by CachingOrcReader assertInstanceOf(wrapWithCacheIfTinyStripes(orcDataSource, stripes, maxMergeDistance, maxReadSize), CachingOrcDataSource.class); OrcRecordReader orcRecordReader = orcReader.createRecordReader( ImmutableMap.of(0, VARCHAR), (numberOfRows, statisticsByColumnIndex) -> true, HIVE_STORAGE_TIME_ZONE, new AggregatedMemoryContext()); int positionCount = 0; while (true) { int batchSize = orcRecordReader.nextBatch(); if (batchSize <= 0) { break; } Block block = orcRecordReader.readBlock(VARCHAR, 0); positionCount += block.getPositionCount(); } assertEquals(positionCount, POSITION_COUNT); }
Footer footer = new OrcReader(orcDataSource, ORC, dataSize, dataSize, dataSize, dataSize).getFooter(); for (StripeInformation stripe : footer.getStripes()) {