public static LinearProbeRangeFinder createTinyStripesRangeFinder(List<StripeInformation> stripes, DataSize maxMergeDistance, DataSize tinyStripeThreshold) { if (stripes.size() == 0) { return new LinearProbeRangeFinder(ImmutableList.of()); } List<DiskRange> scratchDiskRanges = stripes.stream() .map(stripe -> new DiskRange(stripe.getOffset(), toIntExact(stripe.getTotalLength()))) .collect(Collectors.toList()); List<DiskRange> diskRanges = mergeAdjacentDiskRanges(scratchDiskRanges, maxMergeDistance, tinyStripeThreshold); return new LinearProbeRangeFinder(diskRanges); } }
public void validateStripeStatistics(OrcDataSourceId orcDataSourceId, List<StripeInformation> actualStripes, List<StripeStatistics> actualStripeStatistics) throws OrcCorruptionException { requireNonNull(actualStripes, "actualStripes is null"); requireNonNull(actualStripeStatistics, "actualStripeStatistics is null"); if (actualStripeStatistics.isEmpty()) { // DWRF does not have stripe statistics return; } if (actualStripeStatistics.size() != stripeStatistics.size()) { throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected number of columns in stripe statistics"); } for (int stripeIndex = 0; stripeIndex < actualStripes.size(); stripeIndex++) { long stripeOffset = actualStripes.get(stripeIndex).getOffset(); StripeStatistics actual = actualStripeStatistics.get(stripeIndex); validateStripeStatistics(orcDataSourceId, stripeOffset, actual.getColumnStatistics()); } }
if (rowGroupStatisticsValidation.isPresent()) { StatisticsValidation statisticsValidation = rowGroupStatisticsValidation.get(); long offset = stripes.get(currentStripe).getOffset(); writeValidation.get().validateRowGroupStatistics(orcDataSource.getId(), offset, currentRowGroup, statisticsValidation.build()); statisticsValidation.reset();
Collections.sort(stripeInfos, comparingLong(info -> info.getStripe().getOffset()));
if (stripeStatisticsValidation.isPresent()) { StatisticsValidation statisticsValidation = stripeStatisticsValidation.get(); long offset = stripes.get(currentStripe).getOffset(); writeValidation.get().validateStripeStatistics(orcDataSource.getId(), offset, statisticsValidation.build()); statisticsValidation.reset();
public StripeFooter readStripeFooter(StripeInformation stripe, AggregatedMemoryContext systemMemoryUsage) throws IOException { long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(); int tailLength = toIntExact(stripe.getFooterLength()); // read the footer byte[] tailBuffer = new byte[tailLength]; orcDataSource.readFully(offset, tailBuffer); try (InputStream inputStream = new OrcInputStream(orcDataSource.getId(), Slices.wrappedBuffer(tailBuffer).getInput(), decompressor, systemMemoryUsage, tailLength)) { return metadataReader.readStripeFooter(types, inputStream); } }
Map<StreamId, OrcInputStream> streamsData = readDiskRanges(stripe.getOffset(), diskRanges, systemMemoryUsage); writeValidation.get().validateRowGroupStatistics(orcDataSource.getId(), stripe.getOffset(), columnIndexes); Map<StreamId, OrcInputStream> streamsData = readDiskRanges(stripe.getOffset(), diskRanges, systemMemoryUsage);
private static OrcProto.StripeInformation toStripeInformation(StripeInformation stripe) { return OrcProto.StripeInformation.newBuilder() .setNumberOfRows(stripe.getNumberOfRows()) .setOffset(stripe.getOffset()) .setIndexLength(stripe.getIndexLength()) .setDataLength(stripe.getDataLength()) .setFooterLength(stripe.getFooterLength()) .build(); }
private static DwrfProto.StripeInformation toStripeInformation(StripeInformation stripe) { return DwrfProto.StripeInformation.newBuilder() .setNumberOfRows(stripe.getNumberOfRows()) .setOffset(stripe.getOffset()) .setIndexLength(stripe.getIndexLength()) .setDataLength(stripe.getDataLength()) .setFooterLength(stripe.getFooterLength()) .build(); }
orcDataSource.readFully(stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(), tailBuffer); try (InputStream inputStream = new OrcInputStream(orcDataSource.getId(), Slices.wrappedBuffer(tailBuffer).getInput(), Optional.empty(), newSimpleAggregatedMemoryContext(), tailBuffer.length)) { StripeFooter stripeFooter = ORC.createMetadataReader().readStripeFooter(footer.getTypes(), inputStream);
public static LinearProbeRangeFinder createTinyStripesRangeFinder(List<StripeInformation> stripes, DataSize maxMergeDistance, DataSize maxReadSize) { if (stripes.size() == 0) { return new LinearProbeRangeFinder(ImmutableList.of()); } List<DiskRange> scratchDiskRanges = stripes.stream() .map(stripe -> new DiskRange(stripe.getOffset(), Ints.checkedCast(stripe.getTotalLength()))) .collect(Collectors.toList()); List<DiskRange> diskRanges = mergeAdjacentDiskRanges(scratchDiskRanges, maxMergeDistance, maxReadSize); return new LinearProbeRangeFinder(diskRanges); } }
public static LinearProbeRangeFinder createTinyStripesRangeFinder(List<StripeInformation> stripes, DataSize maxMergeDistance, DataSize tinyStripeThreshold) { if (stripes.size() == 0) { return new LinearProbeRangeFinder(ImmutableList.of()); } List<DiskRange> scratchDiskRanges = stripes.stream() .map(stripe -> new DiskRange(stripe.getOffset(), toIntExact(stripe.getTotalLength()))) .collect(Collectors.toList()); List<DiskRange> diskRanges = mergeAdjacentDiskRanges(scratchDiskRanges, maxMergeDistance, tinyStripeThreshold); return new LinearProbeRangeFinder(diskRanges); } }
public void validateStripeStatistics(OrcDataSourceId orcDataSourceId, List<StripeInformation> actualStripes, List<StripeStatistics> actualStripeStatistics) throws OrcCorruptionException { requireNonNull(actualStripes, "actualStripes is null"); requireNonNull(actualStripeStatistics, "actualStripeStatistics is null"); if (actualStripeStatistics.isEmpty()) { // DWRF does not have stripe statistics return; } if (actualStripeStatistics.size() != stripeStatistics.size()) { throw new OrcCorruptionException(orcDataSourceId, "Write validation failed: unexpected number of columns in stripe statistics"); } for (int stripeIndex = 0; stripeIndex < actualStripes.size(); stripeIndex++) { long stripeOffset = actualStripes.get(stripeIndex).getOffset(); StripeStatistics actual = actualStripeStatistics.get(stripeIndex); validateStripeStatistics(orcDataSourceId, stripeOffset, actual.getColumnStatistics()); } }
public StripeFooter readStripeFooter(StripeInformation stripe, AbstractAggregatedMemoryContext systemMemoryUsage) throws IOException { long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(); int tailLength = Ints.checkedCast(stripe.getFooterLength()); // read the footer byte[] tailBuffer = new byte[tailLength]; orcDataSource.readFully(offset, tailBuffer); try (InputStream inputStream = new OrcInputStream(orcDataSource.toString(), Slices.wrappedBuffer(tailBuffer).getInput(), compressionKind, bufferSize, systemMemoryUsage)) { return metadataReader.readStripeFooter(types, inputStream); } }
public StripeFooter readStripeFooter(StripeInformation stripe, AggregatedMemoryContext systemMemoryUsage) throws IOException { long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(); int tailLength = toIntExact(stripe.getFooterLength()); // read the footer byte[] tailBuffer = new byte[tailLength]; orcDataSource.readFully(offset, tailBuffer); try (InputStream inputStream = new OrcInputStream(orcDataSource.getId(), Slices.wrappedBuffer(tailBuffer).getInput(), decompressor, systemMemoryUsage, tailLength)) { return metadataReader.readStripeFooter(types, inputStream); } }
private static DwrfProto.StripeInformation toStripeInformation(StripeInformation stripe) { return DwrfProto.StripeInformation.newBuilder() .setNumberOfRows(stripe.getNumberOfRows()) .setOffset(stripe.getOffset()) .setIndexLength(stripe.getIndexLength()) .setDataLength(stripe.getDataLength()) .setFooterLength(stripe.getFooterLength()) .build(); }
private static OrcProto.StripeInformation toStripeInformation(StripeInformation stripe) { return OrcProto.StripeInformation.newBuilder() .setNumberOfRows(stripe.getNumberOfRows()) .setOffset(stripe.getOffset()) .setIndexLength(stripe.getIndexLength()) .setDataLength(stripe.getDataLength()) .setFooterLength(stripe.getFooterLength()) .build(); }