private ByteString convertToScanXAttr(ByteString xattrFullSerialized) { ParquetDatasetSplitXAttr fullXAttr = ParquetDatasetXAttrSerDe.PARQUET_DATASET_SPLIT_XATTR_SERIALIZER.revert(xattrFullSerialized.toByteArray());; ParquetDatasetSplitScanXAttr scanXAttr = new ParquetDatasetSplitScanXAttr(); scanXAttr.setPath(fullXAttr.getPath()); scanXAttr.setFileLength(fullXAttr.getUpdateKey().getLength()); scanXAttr.setStart(fullXAttr.getStart()); scanXAttr.setLength(fullXAttr.getLength()); scanXAttr.setRowGroupIndex(fullXAttr.getRowGroupIndex()); return ByteString.copyFrom(ParquetDatasetXAttrSerDe.PARQUET_DATASET_SPLIT_SCAN_XATTR_SERIALIZER.serialize(scanXAttr)); }
@Override public List<RecordReader> getReaders(UnifiedParquetReader unifiedReader) { List<RecordReader> returnList = new ArrayList<>(); returnList.add(unifiedReader.addFilterIfNecessary( new ParquetRowiseReader( unifiedReader.context, unifiedReader.getFooter(), unifiedReader.readEntry.getRowGroupIndex(), unifiedReader.readEntry.getPath(), unifiedReader.realFields, unifiedReader.fs, unifiedReader.schemaHelper, unifiedReader.inputStreamProvider ) )); return returnList; } },
ParquetDatasetSplitScanXAttr split = new ParquetDatasetSplitScanXAttr(); split.setRowGroupIndex(rowGroupNum); split.setPath(Path.getPathWithoutSchemeAndAuthority(finalPath).toString()); split.setStart(0l); split.setLength((long) Integer.MAX_VALUE); fileSplit.getLength() < oContext.getOptions().getOption(ExecConstants.PARQUET_MULTI_STREAM_SIZE_LIMIT)); InputStreamProvider inputStreamProvider = new InputStreamProvider(fs, new Path(split.getPath()), useSingleStream);
public ParquetDatasetSplitScanXAttr newMessage() { return new ParquetDatasetSplitScanXAttr(); }
Path p = new Path(split.getSplitXAttr().getPath()); Long length = split.getSplitXAttr().getFileLength(); if (length == null || !context.getOptions().getOption(ExecConstants.PARQUET_CACHED_ENTITY_SET_FILE_SIZE)) { length = fs.getFileStatus(p).getLen(); final ParquetMetadata footer = footerCache.getFooter(inputStreamProvider.stream(), split.getSplitXAttr().getPath(), length, fs); return readerConfig.wrapIfNecessary(context.getAllocator(), inner, split.getDatasetSplit()); } catch (IOException e) { throw UserException.dataReadError(e).addContext("Failure opening parquet file").addContext("File", split.getSplitXAttr().getPath()).build(logger);
final ParquetMetadata footer = unifiedReader.getFooter(); final List<BlockMetaData> blocks = footer.getBlocks(); final int rowGroupIdx = unifiedReader.readEntry.getRowGroupIndex(); if (blocks.size() <= rowGroupIdx) { throw new IllegalArgumentException(
private void splitColumns(final ParquetMetadata footer, List<SchemaPath> vectorizableReaderColumns, List<SchemaPath> nonVectorizableReaderColumns) { final BlockMetaData block = footer.getBlocks().get(readEntry.getRowGroupIndex()); final Map<String, ColumnChunkMetaData> fields = new HashMap<>(); final List<Type> nonVectorizableTypes = new ArrayList<>();
unifiedReader.context, unifiedReader.vectorizableReaderColumns, unifiedReader.readEntry.getPath(), unifiedReader.codecFactory, unifiedReader.filterConditions, unifiedReader.enableDetailedTracing, unifiedReader.getFooter(), unifiedReader.readEntry.getRowGroupIndex(), deltas, unifiedReader.schemaHelper, unifiedReader.context, unifiedReader.getFooter(), unifiedReader.readEntry.getRowGroupIndex(), unifiedReader.readEntry.getPath(), unifiedReader.nonVectorizableReaderColumns, unifiedReader.fs,
@Override public List<RecordReader> getReaders(UnifiedParquetReader unifiedReader) throws ExecutionSetupException { List<RecordReader> returnList = new ArrayList<>(); returnList.add(unifiedReader.addFilterIfNecessary( new DeprecatedParquetVectorizedReader( unifiedReader.context, unifiedReader.readEntry.getPath(), unifiedReader.readEntry.getRowGroupIndex(), unifiedReader.fs, CodecFactory.createDirectCodecFactory( unifiedReader.fs.getConf(), new ParquetDirectByteBufferAllocator(unifiedReader.context.getAllocator()), 0), unifiedReader.getFooter(), unifiedReader.realFields, unifiedReader.schemaHelper, unifiedReader.globalDictionaryFieldInfoMap, unifiedReader.dictionaries ) )); return returnList; } },
private void computeLocality(ParquetMetadata footer) throws ExecutionSetupException { try { BlockMetaData block = footer.getBlocks().get(readEntry.getRowGroupIndex()); BlockLocation[] blockLocations = fs.getFileBlockLocations(new Path(readEntry.getPath()), block.getStartingPos(), block.getCompressedSize()); String localHost = InetAddress.getLocalHost().getCanonicalHostName(); List<Range<Long>> intersectingRanges = new ArrayList<>(); Range<Long> rowGroupRange = Range.openClosed(block.getStartingPos(), block.getStartingPos() + block.getCompressedSize()); for (BlockLocation loc : blockLocations) { for (String host : loc.getHosts()) { if (host.equals(localHost)) { intersectingRanges.add(Range.closedOpen(loc.getOffset(), loc.getOffset() + loc.getLength()).intersection(rowGroupRange)); } } } long totalIntersect = 0; for (Range<Long> range : intersectingRanges) { totalIntersect += (range.upperEndpoint() - range.lowerEndpoint()); } if (totalIntersect < block.getCompressedSize()) { context.getStats().addLongStat(Metric.NUM_REMOTE_READERS, 1); } else { context.getStats().addLongStat(Metric.NUM_REMOTE_READERS, 0); } } catch (IOException e) { throw new ExecutionSetupException(e); } }