@Override public List<RecordReader> getReaders(UnifiedParquetReader unifiedReader) { List<RecordReader> returnList = new ArrayList<>(); returnList.add(unifiedReader.addFilterIfNecessary( new ParquetRowiseReader( unifiedReader.context, unifiedReader.getFooter(), unifiedReader.readEntry.getRowGroupIndex(), unifiedReader.readEntry.getPath(), unifiedReader.realFields, unifiedReader.fs, unifiedReader.schemaHelper, unifiedReader.inputStreamProvider ) )); return returnList; } },
final ParquetMetadata footer = unifiedReader.getFooter(); final List<BlockMetaData> blocks = footer.getBlocks(); final int rowGroupIdx = unifiedReader.readEntry.getRowGroupIndex(); if (blocks.size() <= rowGroupIdx) { throw new IllegalArgumentException(
unifiedReader.enableDetailedTracing, unifiedReader.getFooter(), unifiedReader.readEntry.getRowGroupIndex(), deltas, unifiedReader.schemaHelper, unifiedReader.context, unifiedReader.getFooter(), unifiedReader.readEntry.getRowGroupIndex(), unifiedReader.readEntry.getPath(), unifiedReader.nonVectorizableReaderColumns,
@Override public List<RecordReader> getReaders(UnifiedParquetReader unifiedReader) throws ExecutionSetupException { List<RecordReader> returnList = new ArrayList<>(); returnList.add(unifiedReader.addFilterIfNecessary( new DeprecatedParquetVectorizedReader( unifiedReader.context, unifiedReader.readEntry.getPath(), unifiedReader.readEntry.getRowGroupIndex(), unifiedReader.fs, CodecFactory.createDirectCodecFactory( unifiedReader.fs.getConf(), new ParquetDirectByteBufferAllocator(unifiedReader.context.getAllocator()), 0), unifiedReader.getFooter(), unifiedReader.realFields, unifiedReader.schemaHelper, unifiedReader.globalDictionaryFieldInfoMap, unifiedReader.dictionaries ) )); return returnList; } },
private void splitColumns(final ParquetMetadata footer, List<SchemaPath> vectorizableReaderColumns, List<SchemaPath> nonVectorizableReaderColumns) { final BlockMetaData block = footer.getBlocks().get(readEntry.getRowGroupIndex()); final Map<String, ColumnChunkMetaData> fields = new HashMap<>(); final List<Type> nonVectorizableTypes = new ArrayList<>();
private void computeLocality(ParquetMetadata footer) throws ExecutionSetupException { try { BlockMetaData block = footer.getBlocks().get(readEntry.getRowGroupIndex()); BlockLocation[] blockLocations = fs.getFileBlockLocations(new Path(readEntry.getPath()), block.getStartingPos(), block.getCompressedSize()); String localHost = InetAddress.getLocalHost().getCanonicalHostName(); List<Range<Long>> intersectingRanges = new ArrayList<>(); Range<Long> rowGroupRange = Range.openClosed(block.getStartingPos(), block.getStartingPos() + block.getCompressedSize()); for (BlockLocation loc : blockLocations) { for (String host : loc.getHosts()) { if (host.equals(localHost)) { intersectingRanges.add(Range.closedOpen(loc.getOffset(), loc.getOffset() + loc.getLength()).intersection(rowGroupRange)); } } } long totalIntersect = 0; for (Range<Long> range : intersectingRanges) { totalIntersect += (range.upperEndpoint() - range.lowerEndpoint()); } if (totalIntersect < block.getCompressedSize()) { context.getStats().addLongStat(Metric.NUM_REMOTE_READERS, 1); } else { context.getStats().addLongStat(Metric.NUM_REMOTE_READERS, 0); } } catch (IOException e) { throw new ExecutionSetupException(e); } }