@Override public StripeFooter readStripeFooter(List<OrcType> types, InputStream inputStream) throws IOException { try { return delegate.readStripeFooter(types, inputStream); } catch (IOException e) { throw propagate(e, "Invalid stripe footer"); } }
@Override public Footer readFooter(HiveWriterVersion hiveWriterVersion, InputStream inputStream) throws OrcCorruptionException { try { return delegate.readFooter(hiveWriterVersion, inputStream); } catch (IOException | RuntimeException e) { throw propagate(e, "Invalid file footer"); } }
@Override public Metadata readMetadata(HiveWriterVersion hiveWriterVersion, InputStream inputStream) throws OrcCorruptionException { try { return delegate.readMetadata(hiveWriterVersion, inputStream); } catch (IOException | RuntimeException e) { throw propagate(e, "Invalid file metadata"); } }
PostScript postScript = metadataReader.readPostScript(buffer, postScriptOffset, postScriptSize); this.metadata = metadataReader.readMetadata(metadataInputStream); this.footer = metadataReader.readFooter(footerInputStream);
@Override public List<RowGroupIndex> readRowIndexes(HiveWriterVersion hiveWriterVersion, InputStream inputStream) throws OrcCorruptionException { try { return delegate.readRowIndexes(hiveWriterVersion, inputStream); } catch (IOException | RuntimeException e) { throw propagate(e, "Invalid stripe row index"); } }
@Override public PostScript readPostScript(byte[] data, int offset, int length) throws OrcCorruptionException { try { return delegate.readPostScript(data, offset, length); } catch (IOException | RuntimeException e) { throw propagate(e, "Invalid postscript"); } }
@Override public List<HiveBloomFilter> readBloomFilterIndexes(InputStream inputStream) throws OrcCorruptionException { try { return delegate.readBloomFilterIndexes(inputStream); } catch (IOException | RuntimeException e) { throw propagate(e, "Invalid bloom filter"); } }
private Map<StreamId, List<RowGroupIndex>> readColumnIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData, Map<StreamId, List<HiveBloomFilter>> bloomFilterIndexes) throws IOException { ImmutableMap.Builder<StreamId, List<RowGroupIndex>> columnIndexes = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { Stream stream = entry.getValue(); if (stream.getStreamKind() == ROW_INDEX) { OrcInputStream inputStream = streamsData.get(entry.getKey()); List<HiveBloomFilter> bloomFilters = bloomFilterIndexes.get(entry.getKey()); List<RowGroupIndex> rowGroupIndexes = metadataReader.readRowIndexes(hiveWriterVersion, inputStream); if (bloomFilters != null && !bloomFilters.isEmpty()) { ImmutableList.Builder<RowGroupIndex> newRowGroupIndexes = ImmutableList.builder(); for (int i = 0; i < rowGroupIndexes.size(); i++) { RowGroupIndex rowGroupIndex = rowGroupIndexes.get(i); ColumnStatistics columnStatistics = rowGroupIndex.getColumnStatistics() .withBloomFilter(bloomFilters.get(i)); newRowGroupIndexes.add(new RowGroupIndex(rowGroupIndex.getPositions(), columnStatistics)); } rowGroupIndexes = newRowGroupIndexes.build(); } columnIndexes.put(entry.getKey(), rowGroupIndexes); } } return columnIndexes.build(); }
@Override public PostScript readPostScript(byte[] data, int offset, int length) throws OrcCorruptionException { try { return delegate.readPostScript(data, offset, length); } catch (IOException | RuntimeException e) { throw propagate(e, "Invalid postscript"); } }
private Map<StreamId, List<HiveBloomFilter>> readBloomFilterIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData) throws IOException { ImmutableMap.Builder<StreamId, List<HiveBloomFilter>> bloomFilters = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { Stream stream = entry.getValue(); if (stream.getStreamKind() == BLOOM_FILTER) { OrcInputStream inputStream = streamsData.get(entry.getKey()); bloomFilters.put(entry.getKey(), metadataReader.readBloomFilterIndexes(inputStream)); } // TODO: add support for BLOOM_FILTER_UTF8 } return bloomFilters.build(); }
public StripeFooter readStripeFooter(StripeInformation stripe, AggregatedMemoryContext systemMemoryUsage) throws IOException { long offset = stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(); int tailLength = toIntExact(stripe.getFooterLength()); // read the footer byte[] tailBuffer = new byte[tailLength]; orcDataSource.readFully(offset, tailBuffer); try (InputStream inputStream = new OrcInputStream(orcDataSource.getId(), Slices.wrappedBuffer(tailBuffer).getInput(), decompressor, systemMemoryUsage, tailLength)) { return metadataReader.readStripeFooter(types, inputStream); } }
for (Entry<StreamId, Stream> entry : streams.entrySet()) { if (entry.getKey().getStreamKind() == ROW_INDEX) { List<RowGroupIndex> rowGroupIndexes = metadataReader.readRowIndexes(hiveWriterVersion, streamsData.get(entry.getKey())); checkState(rowGroupIndexes.size() == 1 || invalidCheckPoint, "expect a single row group or an invalid check point"); long totalBytes = 0;
@Override public Footer readFooter(HiveWriterVersion hiveWriterVersion, InputStream inputStream) throws OrcCorruptionException { try { return delegate.readFooter(hiveWriterVersion, inputStream); } catch (IOException | RuntimeException e) { throw propagate(e, "Invalid file footer"); } }
@Override public Metadata readMetadata(HiveWriterVersion hiveWriterVersion, InputStream inputStream) throws OrcCorruptionException { try { return delegate.readMetadata(hiveWriterVersion, inputStream); } catch (IOException | RuntimeException e) { throw propagate(e, "Invalid file metadata"); } }
@Override public List<HiveBloomFilter> readBloomFilterIndexes(InputStream inputStream) throws OrcCorruptionException { try { return delegate.readBloomFilterIndexes(inputStream); } catch (IOException | RuntimeException e) { throw propagate(e, "Invalid bloom filter"); } }
orcDataSource.readFully(stripe.getOffset() + stripe.getIndexLength() + stripe.getDataLength(), tailBuffer); try (InputStream inputStream = new OrcInputStream(orcDataSource.getId(), Slices.wrappedBuffer(tailBuffer).getInput(), Optional.empty(), newSimpleAggregatedMemoryContext(), tailBuffer.length)) { StripeFooter stripeFooter = ORC.createMetadataReader().readStripeFooter(footer.getTypes(), inputStream);
@Override public List<RowGroupIndex> readRowIndexes(HiveWriterVersion hiveWriterVersion, InputStream inputStream) throws OrcCorruptionException { try { return delegate.readRowIndexes(hiveWriterVersion, inputStream); } catch (IOException | RuntimeException e) { throw propagate(e, "Invalid stripe row index"); } }
private Map<StreamId, List<HiveBloomFilter>> readBloomFilterIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData) throws IOException { ImmutableMap.Builder<StreamId, List<HiveBloomFilter>> bloomFilters = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { Stream stream = entry.getValue(); if (stream.getStreamKind() == BLOOM_FILTER) { OrcInputStream inputStream = streamsData.get(entry.getKey()); bloomFilters.put(entry.getKey(), metadataReader.readBloomFilterIndexes(inputStream)); } // TODO: add support for BLOOM_FILTER_UTF8 } return bloomFilters.build(); }
@Override public StripeFooter readStripeFooter(List<OrcType> types, InputStream inputStream) throws IOException { try { return delegate.readStripeFooter(types, inputStream); } catch (IOException e) { throw propagate(e, "Invalid stripe footer"); } }
private Map<Integer, List<RowGroupIndex>> readColumnIndexes(Map<StreamId, Stream> streams, Map<StreamId, OrcInputStream> streamsData) throws IOException { ImmutableMap.Builder<Integer, List<RowGroupIndex>> columnIndexes = ImmutableMap.builder(); for (Entry<StreamId, Stream> entry : streams.entrySet()) { Stream stream = entry.getValue(); if (stream.getStreamKind() == ROW_INDEX) { OrcInputStream inputStream = streamsData.get(entry.getKey()); columnIndexes.put(stream.getColumn(), metadataReader.readRowIndexes(inputStream)); } } return columnIndexes.build(); }