parquet.hadoop.ParquetFileReader.readFooter java code examples

public ParquetFileMetadata(Configuration conf, Path hdfsFilePath) throws IOException
{
  this.metaData = ParquetFileReader.readFooter(conf, hdfsFilePath, NO_FILTER);
}

public static ParquetMetadata[] getMetadatas (FileStatus[] fileStatuses, Configuration conf) throws IOException
{
  ParquetMetadata[] res = new ParquetMetadata[fileStatuses.length];
  for (int i = 0; i < fileStatuses.length; ++i)
  {
    res[i] = ParquetFileReader.readFooter(conf, fileStatuses[i].getPath(), NO_FILTER);
  }
  return res;
}

/**
 * Reads the meta data block in the footer of the file
 * @param configuration
 * @param file the parquet File
 * @return the metadata blocks in the footer
 * @throws IOException if an error occurs while reading the file
 */
@Deprecated
public static final ParquetMetadata readFooter(Configuration configuration, Path file) throws IOException {
 return readFooter(configuration, file, NO_FILTER);
}

/**
 * @deprecated use {@link ParquetFileReader#readFooter(Configuration, FileStatus, MetadataFilter)}
 */
@Deprecated
public static final ParquetMetadata readFooter(Configuration configuration, FileStatus file) throws IOException {
 return readFooter(configuration, file, NO_FILTER);
}

/**
 * Reads the meta data in the footer of the file.
 * Skipping row groups (or not) based on the provided filter
 * @param configuration
 * @param file the Parquet File
 * @param filter the filter to apply to row groups
 * @return the metadata with row groups filtered.
 * @throws IOException  if an error occurs while reading the file
 */
public static ParquetMetadata readFooter(Configuration configuration, Path file, MetadataFilter filter) throws IOException {
 FileSystem fileSystem = file.getFileSystem(configuration);
 return readFooter(configuration, fileSystem.getFileStatus(file), filter);
}

 @Override
 public ParquetMetadata call() throws Exception {
  try {
   ParquetMetadata footer = ParquetFileReader.readFooter(configuration, currentFile, NO_FILTER);
   return footer;
  } catch (Exception e) {
   throw new ParquetDecodingException("could not read footer", e);
  }
 }
}));

 @Override
 public Footer call() throws Exception {
  try {
   return new Footer(currentFile.getPath(), readFooter(configuration, currentFile, filter(skipRowGroups)));
  } catch (IOException e) {
   throw new IOException("Could not read footer for file " + currentFile, e);
  }
 }
});

/**
 * Read the parquet schema from a parquet File
 */
private MessageType readSchemaFromDataFile(Path parquetFilePath) throws IOException {
 LOG.info("Reading schema from " + parquetFilePath);
 if (!fs.exists(parquetFilePath)) {
  throw new IllegalArgumentException(
    "Failed to read schema from data file " + parquetFilePath + ". File does not exist.");
 }
 ParquetMetadata fileFooter = ParquetFileReader.readFooter(fs.getConf(), parquetFilePath,
   ParquetMetadataConverter.NO_FILTER);
 return fileFooter.getFileMetaData().getSchema();
}

/**
 * Specifically reads a given summary file
 * @param configuration
 * @param summaryStatus
 * @return the metadata translated for each file
 * @throws IOException
 */
public static List<Footer> readSummaryFile(Configuration configuration, FileStatus summaryStatus) throws IOException {
 final Path parent = summaryStatus.getPath().getParent();
 ParquetMetadata mergedFooters = readFooter(configuration, summaryStatus, filter(false));
 return footersFromSummaryFile(parent, mergedFooters);
}

static ParquetMetadata readSummaryMetadata(Configuration configuration, Path basePath, boolean skipRowGroups) throws IOException {
 Path metadataFile = new Path(basePath, PARQUET_METADATA_FILE);
 Path commonMetaDataFile = new Path(basePath, PARQUET_COMMON_METADATA_FILE);
 FileSystem fileSystem = basePath.getFileSystem(configuration);
 if (skipRowGroups && fileSystem.exists(commonMetaDataFile)) {
  // reading the summary file that does not contain the row groups
  if (Log.INFO) LOG.info("reading summary file: " + commonMetaDataFile);
  return readFooter(configuration, commonMetaDataFile, filter(skipRowGroups));
 } else if (fileSystem.exists(metadataFile)) {
  if (Log.INFO) LOG.info("reading summary file: " + metadataFile);
  return readFooter(configuration, metadataFile, filter(skipRowGroups));
 } else {
  return null;
 }
}

/**
 * Reads the schema from the parquet file. This is different from ParquetUtils as it uses the
 * twitter parquet to support hive 1.1.0
 */
private static MessageType readSchema(Configuration conf, Path parquetFilePath) {
 try {
  return ParquetFileReader.readFooter(conf, parquetFilePath).getFileMetaData().getSchema();
 } catch (IOException e) {
  throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath, e);
 }
}

 file = path;
metaData = ParquetFileReader.readFooter(conf, file);
MessageType schema = metaData.getFileMetaData().getSchema();

@Override
public void execute(CommandLine options) throws Exception {
  super.execute(options);
  String[] args = options.getArgs();
  String input = args[0];
  Configuration conf = new Configuration();
  Path inpath = new Path(input);
  ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath);
  MessageType schema = metaData.getFileMetaData().getSchema();
  PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter()
                       .withAutoColumn()
                       .withAutoCrop()
                       .withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES)
                       .withColumnPadding(1)
                       .withMaxBufferedLines(1000000)
                       .withFlushOnTab()
                       .build();
  boolean showmd = !options.hasOption('m');
  boolean showdt = !options.hasOption('d');
  Set<String> showColumns = null;
  if (options.hasOption('c')) {
    String[] cols = options.getOptionValues('c');
    showColumns = new HashSet<String>(Arrays.asList(cols));
  }
  dump(out, metaData, schema, inpath, showmd, showdt, showColumns);
}

ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(configuration, path, NO_FILTER);
List<BlockMetaData> blocks = parquetMetadata.getBlocks();
FileMetaData fileMetaData = parquetMetadata.getFileMetaData();

      path));
ParquetMetadata footer = ParquetFileReader.readFooter(
    hadoopConfiguration, path, ParquetMetadataConverter.NO_FILTER);
List<BlockMetaData> blocks = filterBlocks(footer.getBlocks());

 footer = readFooter(configuration, path, range(split.getStart(), split.getEnd()));
 MessageType fileSchema = footer.getFileMetaData().getSchema();
 Filter filter = getFilter(configuration);
} else {
 footer = readFooter(configuration, path, NO_FILTER);
 Set<Long> offsets = new HashSet<Long>();
 for (long offset : rowGroupOffsets) {

FilterCompat.Filter filter = setFilter(jobConf);
final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(jobConf, finalPath);
final List<BlockMetaData> blocks = parquetMetadata.getBlocks();
final FileMetaData fileMetaData = parquetMetadata.getFileMetaData();

Popular methods of ParquetFileReader

<init>
close
readNextRowGroup
Reads all the columns requested from the row group at the current file position.
readAllFootersInParallel
Read the footers of all the files under that path (recursively) not using summary files. rowGroups a
readAllFootersInParallelUsingSummaryFiles
for files provided, check if there's a summary file. If a summary file is found it is used otherwise
readFooters
filter
footersFromSummaryFile
listFiles
readSummaryFile
Specifically reads a given summary file
readSummaryMetadata
runAllInParallel

Popular in Java

Updating database using SQL prepared statement
getApplicationContext (Context)
setScale (BigDecimal)
startActivity (Activity)
FileReader (java.io)
A specialized Reader that reads from a file in the file system. All read requests made by calling me
Random (java.util)
This class provides methods that return pseudo-random values.It is dangerous to seed Random with the
Scanner (java.util)
A parser that parses a text string of primitive types and strings with the help of regular expressio
ServletException (javax.servlet)
Defines a general exception a servlet can throw when it encounters difficulty.
SAXParseException (org.xml.sax)
Encapsulate an XML parse error or warning.> This module, both source code and documentation, is in t
Menu (java.awt)
Top PhpStorm plugins

How to use readFootermethodin parquet.hadoop.ParquetFileReader

Best Java code snippets using parquet.hadoop.ParquetFileReader.readFooter (Showing top 17 results out of 315)

How to use
readFooter
method
in
parquet.hadoop.ParquetFileReader