org.apache.parquet.io.InputFile.newStream java code examples

/**
 * @param configuration the Hadoop conf
 * @param fileMetaData fileMetaData for parquet file
 * @param filePath Path for the parquet file
 * @param blocks the blocks to read
 * @param columns the columns to read (their path)
 * @throws IOException if the file can not be opened
 * @deprecated will be removed in 2.0.0.
 */
@Deprecated
public ParquetFileReader(
  Configuration configuration, FileMetaData fileMetaData,
  Path filePath, List<BlockMetaData> blocks, List<ColumnDescriptor> columns) throws IOException {
 this.converter = new ParquetMetadataConverter(configuration);
 this.file = HadoopInputFile.fromPath(filePath, configuration);
 this.fileMetaData = fileMetaData;
 this.f = file.newStream();
 this.options = HadoopReadOptions.builder(configuration).build();
 this.blocks = filterRowGroups(blocks);
 this.blockIndexStores = listWithNulls(this.blocks.size());
 this.blockRowRanges = listWithNulls(this.blocks.size());
 for (ColumnDescriptor col : columns) {
  paths.put(ColumnPath.get(col.getPath()), col);
 }
}

/**
 * @param configuration the Hadoop conf
 * @param fileMetaData fileMetaData for parquet file
 * @param filePath Path for the parquet file
 * @param blocks the blocks to read
 * @param columns the columns to read (their path)
 * @throws IOException if the file can not be opened
 * @deprecated will be removed in 2.0.0.
 */
@Deprecated
public ParquetFileReader(
  Configuration configuration, FileMetaData fileMetaData,
  Path filePath, List<BlockMetaData> blocks, List<ColumnDescriptor> columns) throws IOException {
 this.converter = new ParquetMetadataConverter(configuration);
 this.file = HadoopInputFile.fromPath(filePath, configuration);
 this.fileMetaData = fileMetaData;
 this.f = file.newStream();
 this.options = HadoopReadOptions.builder(configuration).build();
 this.blocks = filterRowGroups(blocks);
 for (ColumnDescriptor col : columns) {
  paths.put(ColumnPath.get(col.getPath()), col);
 }
}

public ParquetFileReader(InputFile file, ParquetReadOptions options) throws IOException {
 this.converter = new ParquetMetadataConverter(options);
 this.file = file;
 this.f = file.newStream();
 this.options = options;
 this.footer = readFooter(file, options, f, converter);
 this.fileMetaData = footer.getFileMetaData();
 this.blocks = filterRowGroups(footer.getBlocks());
 for (ColumnDescriptor col : footer.getFileMetaData().getSchema().getColumns()) {
  paths.put(ColumnPath.get(col.getPath()), col);
 }
}

/**
 * Reads the meta data block in the footer of the file using provided input stream
 * @param file a {@link InputFile} to read
 * @param filter the filter to apply to row groups
 * @return the metadata blocks in the footer
 * @throws IOException if an error occurs while reading the file
 * @deprecated will be removed in 2.0.0;
 *             use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
 */
@Deprecated
public static final ParquetMetadata readFooter(InputFile file, MetadataFilter filter) throws IOException {
 ParquetReadOptions options;
 if (file instanceof HadoopInputFile) {
  options = HadoopReadOptions.builder(((HadoopInputFile) file).getConfiguration())
    .withMetadataFilter(filter).build();
 } else {
  options = ParquetReadOptions.builder().withMetadataFilter(filter).build();
 }
 try (SeekableInputStream in = file.newStream()) {
  return readFooter(file, options, in);
 }
}

/**
 * @param conf the Hadoop Configuration
 * @param file Path to a parquet file
 * @param footer a {@link ParquetMetadata} footer already read from the file
 * @throws IOException if the file can not be opened
 * @deprecated will be removed in 2.0.0.
 */
@Deprecated
public ParquetFileReader(Configuration conf, Path file, ParquetMetadata footer) throws IOException {
 this.converter = new ParquetMetadataConverter(conf);
 this.file = HadoopInputFile.fromPath(file, conf);
 this.f = this.file.newStream();
 this.options = HadoopReadOptions.builder(conf).build();
 this.footer = footer;
 this.fileMetaData = footer.getFileMetaData();
 this.blocks = filterRowGroups(footer.getBlocks());
 this.blockIndexStores = listWithNulls(this.blocks.size());
 this.blockRowRanges = listWithNulls(this.blocks.size());
 for (ColumnDescriptor col : footer.getFileMetaData().getSchema().getColumns()) {
  paths.put(ColumnPath.get(col.getPath()), col);
 }
}

/**
 * Reads the meta data block in the footer of the file using provided input stream
 * @param file a {@link InputFile} to read
 * @param filter the filter to apply to row groups
 * @return the metadata blocks in the footer
 * @throws IOException if an error occurs while reading the file
 * @deprecated will be removed in 2.0.0;
 *             use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
 */
@Deprecated
public static final ParquetMetadata readFooter(InputFile file, MetadataFilter filter) throws IOException {
 ParquetReadOptions options;
 if (file instanceof HadoopInputFile) {
  options = HadoopReadOptions.builder(((HadoopInputFile) file).getConfiguration())
    .withMetadataFilter(filter).build();
 } else {
  options = ParquetReadOptions.builder().withMetadataFilter(filter).build();
 }
 try (SeekableInputStream in = file.newStream()) {
  return readFooter(file, options, in);
 }
}

public ParquetFileReader(InputFile file, ParquetReadOptions options) throws IOException {
 this.converter = new ParquetMetadataConverter(options);
 this.file = file;
 this.f = file.newStream();
 this.options = options;
 try {
  this.footer = readFooter(file, options, f, converter);
 } catch (Exception e) {
  // In case that reading footer throws an exception in the constructor, the new stream
  // should be closed. Otherwise, there's no way to close this outside.
  f.close();
  throw e;
 }
 this.fileMetaData = footer.getFileMetaData();
 this.blocks = filterRowGroups(footer.getBlocks());
 this.blockIndexStores = listWithNulls(this.blocks.size());
 this.blockRowRanges = listWithNulls(this.blocks.size());
 for (ColumnDescriptor col : footer.getFileMetaData().getSchema().getColumns()) {
  paths.put(ColumnPath.get(col.getPath()), col);
 }
}

/**
 * @param conf the Hadoop Configuration
 * @param file Path to a parquet file
 * @param footer a {@link ParquetMetadata} footer already read from the file
 * @throws IOException if the file can not be opened
 * @deprecated will be removed in 2.0.0.
 */
@Deprecated
public ParquetFileReader(Configuration conf, Path file, ParquetMetadata footer) throws IOException {
 this.converter = new ParquetMetadataConverter(conf);
 this.file = HadoopInputFile.fromPath(file, conf);
 this.f = this.file.newStream();
 this.options = HadoopReadOptions.builder(conf).build();
 this.footer = footer;
 this.fileMetaData = footer.getFileMetaData();
 this.blocks = filterRowGroups(footer.getBlocks());
 for (ColumnDescriptor col : footer.getFileMetaData().getSchema().getColumns()) {
  paths.put(ColumnPath.get(col.getPath()), col);
 }
}

Javadoc

Open a new SeekableInputStream for the underlying data file.

Popular methods of InputFile

getLength

Popular in Java

Finding current android device location
getResourceAsStream (ClassLoader)
scheduleAtFixedRate (ScheduledExecutorService)
findViewById (Activity)
ConnectException (java.net)
A ConnectException is thrown if a connection cannot be established to a remote host on a specific po
TreeSet (java.util)
TreeSet is an implementation of SortedSet. All optional operations (adding and removing) are support
ExecutorService (java.util.concurrent)
An Executor that provides methods to manage termination and methods that can produce a Future for tr
JarFile (java.util.jar)
JarFile is used to read jar entries and their associated data from jar files.
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
JTable (javax.swing)
Top 12 Jupyter Notebook extensions

How to use newStreammethodin org.apache.parquet.io.InputFile

Best Java code snippets using org.apache.parquet.io.InputFile.newStream (Showing top 8 results out of 315)

How to use
newStream
method
in
org.apache.parquet.io.InputFile