/** * @param configuration the Hadoop conf * @param fileMetaData fileMetaData for parquet file * @param filePath Path for the parquet file * @param blocks the blocks to read * @param columns the columns to read (their path) * @throws IOException if the file can not be opened * @deprecated will be removed in 2.0.0. */ @Deprecated public ParquetFileReader( Configuration configuration, FileMetaData fileMetaData, Path filePath, List<BlockMetaData> blocks, List<ColumnDescriptor> columns) throws IOException { this.converter = new ParquetMetadataConverter(configuration); this.file = HadoopInputFile.fromPath(filePath, configuration); this.fileMetaData = fileMetaData; this.f = file.newStream(); this.options = HadoopReadOptions.builder(configuration).build(); this.blocks = filterRowGroups(blocks); this.blockIndexStores = listWithNulls(this.blocks.size()); this.blockRowRanges = listWithNulls(this.blocks.size()); for (ColumnDescriptor col : columns) { paths.put(ColumnPath.get(col.getPath()), col); } }
/** * @param configuration the Hadoop conf * @param fileMetaData fileMetaData for parquet file * @param filePath Path for the parquet file * @param blocks the blocks to read * @param columns the columns to read (their path) * @throws IOException if the file can not be opened * @deprecated will be removed in 2.0.0. */ @Deprecated public ParquetFileReader( Configuration configuration, FileMetaData fileMetaData, Path filePath, List<BlockMetaData> blocks, List<ColumnDescriptor> columns) throws IOException { this.converter = new ParquetMetadataConverter(configuration); this.file = HadoopInputFile.fromPath(filePath, configuration); this.fileMetaData = fileMetaData; this.f = file.newStream(); this.options = HadoopReadOptions.builder(configuration).build(); this.blocks = filterRowGroups(blocks); for (ColumnDescriptor col : columns) { paths.put(ColumnPath.get(col.getPath()), col); } }
public ParquetFileReader(InputFile file, ParquetReadOptions options) throws IOException { this.converter = new ParquetMetadataConverter(options); this.file = file; this.f = file.newStream(); this.options = options; this.footer = readFooter(file, options, f, converter); this.fileMetaData = footer.getFileMetaData(); this.blocks = filterRowGroups(footer.getBlocks()); for (ColumnDescriptor col : footer.getFileMetaData().getSchema().getColumns()) { paths.put(ColumnPath.get(col.getPath()), col); } }
/** * Reads the meta data block in the footer of the file using provided input stream * @param file a {@link InputFile} to read * @param filter the filter to apply to row groups * @return the metadata blocks in the footer * @throws IOException if an error occurs while reading the file * @deprecated will be removed in 2.0.0; * use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)} */ @Deprecated public static final ParquetMetadata readFooter(InputFile file, MetadataFilter filter) throws IOException { ParquetReadOptions options; if (file instanceof HadoopInputFile) { options = HadoopReadOptions.builder(((HadoopInputFile) file).getConfiguration()) .withMetadataFilter(filter).build(); } else { options = ParquetReadOptions.builder().withMetadataFilter(filter).build(); } try (SeekableInputStream in = file.newStream()) { return readFooter(file, options, in); } }
/** * @param conf the Hadoop Configuration * @param file Path to a parquet file * @param footer a {@link ParquetMetadata} footer already read from the file * @throws IOException if the file can not be opened * @deprecated will be removed in 2.0.0. */ @Deprecated public ParquetFileReader(Configuration conf, Path file, ParquetMetadata footer) throws IOException { this.converter = new ParquetMetadataConverter(conf); this.file = HadoopInputFile.fromPath(file, conf); this.f = this.file.newStream(); this.options = HadoopReadOptions.builder(conf).build(); this.footer = footer; this.fileMetaData = footer.getFileMetaData(); this.blocks = filterRowGroups(footer.getBlocks()); this.blockIndexStores = listWithNulls(this.blocks.size()); this.blockRowRanges = listWithNulls(this.blocks.size()); for (ColumnDescriptor col : footer.getFileMetaData().getSchema().getColumns()) { paths.put(ColumnPath.get(col.getPath()), col); } }
/** * Reads the meta data block in the footer of the file using provided input stream * @param file a {@link InputFile} to read * @param filter the filter to apply to row groups * @return the metadata blocks in the footer * @throws IOException if an error occurs while reading the file * @deprecated will be removed in 2.0.0; * use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)} */ @Deprecated public static final ParquetMetadata readFooter(InputFile file, MetadataFilter filter) throws IOException { ParquetReadOptions options; if (file instanceof HadoopInputFile) { options = HadoopReadOptions.builder(((HadoopInputFile) file).getConfiguration()) .withMetadataFilter(filter).build(); } else { options = ParquetReadOptions.builder().withMetadataFilter(filter).build(); } try (SeekableInputStream in = file.newStream()) { return readFooter(file, options, in); } }
public ParquetFileReader(InputFile file, ParquetReadOptions options) throws IOException { this.converter = new ParquetMetadataConverter(options); this.file = file; this.f = file.newStream(); this.options = options; try { this.footer = readFooter(file, options, f, converter); } catch (Exception e) { // In case that reading footer throws an exception in the constructor, the new stream // should be closed. Otherwise, there's no way to close this outside. f.close(); throw e; } this.fileMetaData = footer.getFileMetaData(); this.blocks = filterRowGroups(footer.getBlocks()); this.blockIndexStores = listWithNulls(this.blocks.size()); this.blockRowRanges = listWithNulls(this.blocks.size()); for (ColumnDescriptor col : footer.getFileMetaData().getSchema().getColumns()) { paths.put(ColumnPath.get(col.getPath()), col); } }
/** * @param conf the Hadoop Configuration * @param file Path to a parquet file * @param footer a {@link ParquetMetadata} footer already read from the file * @throws IOException if the file can not be opened * @deprecated will be removed in 2.0.0. */ @Deprecated public ParquetFileReader(Configuration conf, Path file, ParquetMetadata footer) throws IOException { this.converter = new ParquetMetadataConverter(conf); this.file = HadoopInputFile.fromPath(file, conf); this.f = this.file.newStream(); this.options = HadoopReadOptions.builder(conf).build(); this.footer = footer; this.fileMetaData = footer.getFileMetaData(); this.blocks = filterRowGroups(footer.getBlocks()); for (ColumnDescriptor col : footer.getFileMetaData().getSchema().getColumns()) { paths.put(ColumnPath.get(col.getPath()), col); } }