@Override public List<RecordReader> getReaders(UnifiedParquetReader unifiedReader) { List<RecordReader> returnList = new ArrayList<>(); returnList.add(unifiedReader.addFilterIfNecessary( new ParquetRowiseReader( unifiedReader.context, unifiedReader.getFooter(), unifiedReader.readEntry.getRowGroupIndex(), unifiedReader.readEntry.getPath(), unifiedReader.realFields, unifiedReader.fs, unifiedReader.schemaHelper, unifiedReader.inputStreamProvider ) )); return returnList; } },
unifiedReader.context, unifiedReader.vectorizableReaderColumns, unifiedReader.readEntry.getPath(), unifiedReader.codecFactory, unifiedReader.filterConditions, unifiedReader.getFooter(), unifiedReader.readEntry.getRowGroupIndex(), unifiedReader.readEntry.getPath(), unifiedReader.nonVectorizableReaderColumns, unifiedReader.fs,
@Override public List<RecordReader> getReaders(UnifiedParquetReader unifiedReader) throws ExecutionSetupException { List<RecordReader> returnList = new ArrayList<>(); returnList.add(unifiedReader.addFilterIfNecessary( new DeprecatedParquetVectorizedReader( unifiedReader.context, unifiedReader.readEntry.getPath(), unifiedReader.readEntry.getRowGroupIndex(), unifiedReader.fs, CodecFactory.createDirectCodecFactory( unifiedReader.fs.getConf(), new ParquetDirectByteBufferAllocator(unifiedReader.context.getAllocator()), 0), unifiedReader.getFooter(), unifiedReader.realFields, unifiedReader.schemaHelper, unifiedReader.globalDictionaryFieldInfoMap, unifiedReader.dictionaries ) )); return returnList; } },
private void computeLocality(ParquetMetadata footer) throws ExecutionSetupException { try { BlockMetaData block = footer.getBlocks().get(readEntry.getRowGroupIndex()); BlockLocation[] blockLocations = fs.getFileBlockLocations(new Path(readEntry.getPath()), block.getStartingPos(), block.getCompressedSize()); String localHost = InetAddress.getLocalHost().getCanonicalHostName(); List<Range<Long>> intersectingRanges = new ArrayList<>(); Range<Long> rowGroupRange = Range.openClosed(block.getStartingPos(), block.getStartingPos() + block.getCompressedSize()); for (BlockLocation loc : blockLocations) { for (String host : loc.getHosts()) { if (host.equals(localHost)) { intersectingRanges.add(Range.closedOpen(loc.getOffset(), loc.getOffset() + loc.getLength()).intersection(rowGroupRange)); } } } long totalIntersect = 0; for (Range<Long> range : intersectingRanges) { totalIntersect += (range.upperEndpoint() - range.lowerEndpoint()); } if (totalIntersect < block.getCompressedSize()) { context.getStats().addLongStat(Metric.NUM_REMOTE_READERS, 1); } else { context.getStats().addLongStat(Metric.NUM_REMOTE_READERS, 0); } } catch (IOException e) { throw new ExecutionSetupException(e); } }
fileSplit.getLength() < oContext.getOptions().getOption(ExecConstants.PARQUET_MULTI_STREAM_SIZE_LIMIT)); InputStreamProvider inputStreamProvider = new InputStreamProvider(fs, new Path(split.getPath()), useSingleStream);
Path p = new Path(split.getSplitXAttr().getPath()); Long length = split.getSplitXAttr().getFileLength(); if (length == null || !context.getOptions().getOption(ExecConstants.PARQUET_CACHED_ENTITY_SET_FILE_SIZE)) { final ParquetMetadata footer = footerCache.getFooter(inputStreamProvider.stream(), split.getSplitXAttr().getPath(), length, fs); return readerConfig.wrapIfNecessary(context.getAllocator(), inner, split.getDatasetSplit()); } catch (IOException e) { throw UserException.dataReadError(e).addContext("Failure opening parquet file").addContext("File", split.getSplitXAttr().getPath()).build(logger);