@Override public OrcDataSourceId getId() { return dataSource.getId(); }
public OrcDataSourceId getOrcDataSourceId() { return orcDataSource.getId(); }
@Override public String toString() { return toStringHelper(this) .add("streamName", streamName) .add("streamId", streamId) .add("sequence", sequence) .add("streamType", streamType) .add("dataSource", orcDataSource.getId()) .toString(); } }
private void validateWrite(Predicate<OrcWriteValidation> test, String messageFormat, Object... args) throws OrcCorruptionException { if (writeValidation.isPresent() && !test.apply(writeValidation.get())) { throw new OrcCorruptionException(orcDataSource.getId(), "Write validation failed: " + messageFormat, args); } }
@Override public OrcDataSourceId getId() { return delegate.getId(); }
private void validateWrite(Predicate<OrcWriteValidation> test, String messageFormat, Object... args) throws OrcCorruptionException { if (writeValidation.isPresent() && !test.test(writeValidation.get())) { throw new OrcCorruptionException(orcDataSource.getId(), "Write validation failed: " + messageFormat, args); } }
public Map<StreamId, OrcInputStream> readDiskRanges(long stripeOffset, Map<StreamId, DiskRange> diskRanges, AggregatedMemoryContext systemMemoryUsage) throws IOException { // // Note: this code does not use the Java 8 stream APIs to avoid any extra object allocation // // transform ranges to have an absolute offset in file ImmutableMap.Builder<StreamId, DiskRange> diskRangesBuilder = ImmutableMap.builder(); for (Entry<StreamId, DiskRange> entry : diskRanges.entrySet()) { DiskRange diskRange = entry.getValue(); diskRangesBuilder.put(entry.getKey(), new DiskRange(stripeOffset + diskRange.getOffset(), diskRange.getLength())); } diskRanges = diskRangesBuilder.build(); // read ranges Map<StreamId, OrcDataSourceInput> streamsData = orcDataSource.readFully(diskRanges); // transform streams to OrcInputStream ImmutableMap.Builder<StreamId, OrcInputStream> streamsBuilder = ImmutableMap.builder(); for (Entry<StreamId, OrcDataSourceInput> entry : streamsData.entrySet()) { OrcDataSourceInput sourceInput = entry.getValue(); streamsBuilder.put(entry.getKey(), new OrcInputStream(orcDataSource.getId(), sourceInput.getInput(), decompressor, systemMemoryUsage, sourceInput.getRetainedSizeInBytes())); } return streamsBuilder.build(); }
@Override public final void load(LazyBlock lazyBlock) { if (loaded) { return; } checkState(batchId == expectedBatchId); try { Block block = recordReader.readBlock(type, columnIndex); lazyBlock.setBlock(block); } catch (OrcCorruptionException e) { throw new PrestoException(HIVE_BAD_DATA, e); } catch (IOException | RuntimeException e) { throw new PrestoException(HIVE_CURSOR_ERROR, format("Failed to read ORC file: %s", orcDataSource.getId()), e); } loaded = true; } }
StatisticsValidation statisticsValidation = rowGroupStatisticsValidation.get(); long offset = stripes.get(currentStripe).getOffset(); writeValidation.get().validateRowGroupStatistics(orcDataSource.getId(), offset, currentRowGroup, statisticsValidation.build()); statisticsValidation.reset();
@Override public void close() throws IOException { try (Closer closer = Closer.create()) { closer.register(orcDataSource); for (StreamReader column : streamReaders) { if (column != null) { closer.register(() -> column.close()); } } } if (writeChecksumBuilder.isPresent()) { WriteChecksum actualChecksum = writeChecksumBuilder.get().build(); validateWrite(validation -> validation.getChecksum().getTotalRowCount() == actualChecksum.getTotalRowCount(), "Invalid row count"); List<Long> columnHashes = actualChecksum.getColumnHashes(); for (int i = 0; i < columnHashes.size(); i++) { int columnIndex = i; validateWrite(validation -> validation.getChecksum().getColumnHashes().get(columnIndex).equals(columnHashes.get(columnIndex)), "Invalid checksum for column %s", columnIndex); } validateWrite(validation -> validation.getChecksum().getStripeHash() == actualChecksum.getStripeHash(), "Invalid stripes checksum"); } if (fileStatisticsValidation.isPresent()) { List<ColumnStatistics> columnStatistics = fileStatisticsValidation.get().build(); writeValidation.get().validateFileStatistics(orcDataSource.getId(), columnStatistics); } }
StatisticsValidation statisticsValidation = stripeStatisticsValidation.get(); long offset = stripes.get(currentStripe).getOffset(); writeValidation.get().validateStripeStatistics(orcDataSource.getId(), offset, statisticsValidation.build()); statisticsValidation.reset();
static void validateFile( OrcWriteValidation writeValidation, OrcDataSource input, List<Type> types, DateTimeZone hiveStorageTimeZone, OrcEncoding orcEncoding) throws OrcCorruptionException { ImmutableMap.Builder<Integer, Type> readTypes = ImmutableMap.builder(); for (int columnIndex = 0; columnIndex < types.size(); columnIndex++) { readTypes.put(columnIndex, types.get(columnIndex)); } try { OrcReader orcReader = new OrcReader(input, orcEncoding, new DataSize(1, MEGABYTE), new DataSize(8, MEGABYTE), new DataSize(8, MEGABYTE), new DataSize(16, MEGABYTE), Optional.of(writeValidation)); try (OrcRecordReader orcRecordReader = orcReader.createRecordReader(readTypes.build(), OrcPredicate.TRUE, hiveStorageTimeZone, newSimpleAggregatedMemoryContext(), INITIAL_BATCH_SIZE)) { while (orcRecordReader.nextBatch() >= 0) { // ignored } } } catch (IOException e) { throw new OrcCorruptionException(e, input.getId(), "Validation failed"); } } }
public StripeFooter readStripeFooter(StripeInformation stripe, AggregatedMemoryContext systemMemoryUsage) throws IOException { long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(); int tailLength = toIntExact(stripe.getFooterLength()); // read the footer byte[] tailBuffer = new byte[tailLength]; orcDataSource.readFully(offset, tailBuffer); try (InputStream inputStream = new OrcInputStream(orcDataSource.getId(), Slices.wrappedBuffer(tailBuffer).getInput(), decompressor, systemMemoryUsage, tailLength)) { return metadataReader.readStripeFooter(types, inputStream); } }
this.orcDataSource = orcDataSource; requireNonNull(orcEncoding, "orcEncoding is null"); this.metadataReader = new ExceptionWrappingMetadataReader(orcDataSource.getId(), orcEncoding.createMetadataReader()); this.maxMergeDistance = requireNonNull(maxMergeDistance, "maxMergeDistance is null"); this.maxReadSize = requireNonNull(maxReadSize, "maxReadSize is null"); throw new OrcCorruptionException(orcDataSource.getId(), "Invalid file size %s", size); throw new OrcCorruptionException(orcDataSource.getId(), "Invalid postscript length %s", postScriptSize); throw new OrcCorruptionException(orcDataSource.getId(), "Not an ORC file"); this.decompressor = createOrcDecompressor(orcDataSource.getId(), compressionKind, bufferSize); validateWrite(validation -> validation.getCompression() == compressionKind, "Unexpected compression"); try (InputStream metadataInputStream = new OrcInputStream(orcDataSource.getId(), metadataSlice.getInput(), decompressor, newSimpleAggregatedMemoryContext(), metadataSize)) { this.metadata = metadataReader.readMetadata(hiveWriterVersion, metadataInputStream); try (InputStream footerInputStream = new OrcInputStream(orcDataSource.getId(), footerSlice.getInput(), decompressor, newSimpleAggregatedMemoryContext(), footerSize)) { this.footer = metadataReader.readFooter(hiveWriterVersion, footerInputStream); throw new OrcCorruptionException(orcDataSource.getId(), "File has no columns"); validateWrite(validation -> validation.getRowGroupMaxRowCount() == footer.getRowsInRowGroup(), "Unexpected rows in group"); if (writeValidation.isPresent()) { writeValidation.get().validateMetadata(orcDataSource.getId(), footer.getUserMetadata()); writeValidation.get().validateFileStatistics(orcDataSource.getId(), footer.getFileStats()); writeValidation.get().validateStripeStatistics(orcDataSource.getId(), footer.getStripes(), metadata.getStripeStatsList());
throw new PrestoException(HIVE_CURSOR_ERROR, format("Failed to read ORC file: %s", orcDataSource.getId()), e);
try (InputStream inputStream = new OrcInputStream(orcDataSource.getId(), Slices.wrappedBuffer(tailBuffer).getInput(), Optional.empty(), newSimpleAggregatedMemoryContext(), tailBuffer.length)) { StripeFooter stripeFooter = ORC.createMetadataReader().readStripeFooter(footer.getTypes(), inputStream);
@Override public OrcDataSourceId getId() { return dataSource.getId(); }
@Override public String toString() { return toStringHelper(this) .add("streamName", streamName) .add("streamId", streamId) .add("sequence", sequence) .add("streamType", streamType) .add("dataSource", orcDataSource.getId()) .toString(); } }
private void validateWrite(Predicate<OrcWriteValidation> test, String messageFormat, Object... args) throws OrcCorruptionException { if (writeValidation.isPresent() && !test.apply(writeValidation.get())) { throw new OrcCorruptionException(orcDataSource.getId(), "Write validation failed: " + messageFormat, args); } }