public ParquetFileMetadata(Configuration conf, Path hdfsFilePath) throws IOException { this.metaData = ParquetFileReader.readFooter(conf, hdfsFilePath, NO_FILTER); }
public static ParquetMetadata[] getMetadatas (FileStatus[] fileStatuses, Configuration conf) throws IOException { ParquetMetadata[] res = new ParquetMetadata[fileStatuses.length]; for (int i = 0; i < fileStatuses.length; ++i) { res[i] = ParquetFileReader.readFooter(conf, fileStatuses[i].getPath(), NO_FILTER); } return res; }
/** * Reads the meta data block in the footer of the file * @param configuration * @param file the parquet File * @return the metadata blocks in the footer * @throws IOException if an error occurs while reading the file */ @Deprecated public static final ParquetMetadata readFooter(Configuration configuration, Path file) throws IOException { return readFooter(configuration, file, NO_FILTER); }
/** * @deprecated use {@link ParquetFileReader#readFooter(Configuration, FileStatus, MetadataFilter)} */ @Deprecated public static final ParquetMetadata readFooter(Configuration configuration, FileStatus file) throws IOException { return readFooter(configuration, file, NO_FILTER); }
/** * Reads the meta data in the footer of the file. * Skipping row groups (or not) based on the provided filter * @param configuration * @param file the Parquet File * @param filter the filter to apply to row groups * @return the metadata with row groups filtered. * @throws IOException if an error occurs while reading the file */ public static ParquetMetadata readFooter(Configuration configuration, Path file, MetadataFilter filter) throws IOException { FileSystem fileSystem = file.getFileSystem(configuration); return readFooter(configuration, fileSystem.getFileStatus(file), filter); }
@Override public ParquetMetadata call() throws Exception { try { ParquetMetadata footer = ParquetFileReader.readFooter(configuration, currentFile, NO_FILTER); return footer; } catch (Exception e) { throw new ParquetDecodingException("could not read footer", e); } } }));
@Override public Footer call() throws Exception { try { return new Footer(currentFile.getPath(), readFooter(configuration, currentFile, filter(skipRowGroups))); } catch (IOException e) { throw new IOException("Could not read footer for file " + currentFile, e); } } });
/** * Read the parquet schema from a parquet File */ private MessageType readSchemaFromDataFile(Path parquetFilePath) throws IOException { LOG.info("Reading schema from " + parquetFilePath); if (!fs.exists(parquetFilePath)) { throw new IllegalArgumentException( "Failed to read schema from data file " + parquetFilePath + ". File does not exist."); } ParquetMetadata fileFooter = ParquetFileReader.readFooter(fs.getConf(), parquetFilePath, ParquetMetadataConverter.NO_FILTER); return fileFooter.getFileMetaData().getSchema(); }
/** * Specifically reads a given summary file * @param configuration * @param summaryStatus * @return the metadata translated for each file * @throws IOException */ public static List<Footer> readSummaryFile(Configuration configuration, FileStatus summaryStatus) throws IOException { final Path parent = summaryStatus.getPath().getParent(); ParquetMetadata mergedFooters = readFooter(configuration, summaryStatus, filter(false)); return footersFromSummaryFile(parent, mergedFooters); }
static ParquetMetadata readSummaryMetadata(Configuration configuration, Path basePath, boolean skipRowGroups) throws IOException { Path metadataFile = new Path(basePath, PARQUET_METADATA_FILE); Path commonMetaDataFile = new Path(basePath, PARQUET_COMMON_METADATA_FILE); FileSystem fileSystem = basePath.getFileSystem(configuration); if (skipRowGroups && fileSystem.exists(commonMetaDataFile)) { // reading the summary file that does not contain the row groups if (Log.INFO) LOG.info("reading summary file: " + commonMetaDataFile); return readFooter(configuration, commonMetaDataFile, filter(skipRowGroups)); } else if (fileSystem.exists(metadataFile)) { if (Log.INFO) LOG.info("reading summary file: " + metadataFile); return readFooter(configuration, metadataFile, filter(skipRowGroups)); } else { return null; } }
/** * Reads the schema from the parquet file. This is different from ParquetUtils as it uses the * twitter parquet to support hive 1.1.0 */ private static MessageType readSchema(Configuration conf, Path parquetFilePath) { try { return ParquetFileReader.readFooter(conf, parquetFilePath).getFileMetaData().getSchema(); } catch (IOException e) { throw new HoodieIOException("Failed to read footer for parquet " + parquetFilePath, e); } }
file = path; metaData = ParquetFileReader.readFooter(conf, file); MessageType schema = metaData.getFileMetaData().getSchema();
@Override public void execute(CommandLine options) throws Exception { super.execute(options); String[] args = options.getArgs(); String input = args[0]; Configuration conf = new Configuration(); Path inpath = new Path(input); ParquetMetadata metaData = ParquetFileReader.readFooter(conf, inpath); MessageType schema = metaData.getFileMetaData().getSchema(); PrettyPrintWriter out = PrettyPrintWriter.stdoutPrettyPrinter() .withAutoColumn() .withAutoCrop() .withWhitespaceHandler(WhiteSpaceHandler.ELIMINATE_NEWLINES) .withColumnPadding(1) .withMaxBufferedLines(1000000) .withFlushOnTab() .build(); boolean showmd = !options.hasOption('m'); boolean showdt = !options.hasOption('d'); Set<String> showColumns = null; if (options.hasOption('c')) { String[] cols = options.getOptionValues('c'); showColumns = new HashSet<String>(Arrays.asList(cols)); } dump(out, metaData, schema, inpath, showmd, showdt, showColumns); }
ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(configuration, path, NO_FILTER); List<BlockMetaData> blocks = parquetMetadata.getBlocks(); FileMetaData fileMetaData = parquetMetadata.getFileMetaData();
path)); ParquetMetadata footer = ParquetFileReader.readFooter( hadoopConfiguration, path, ParquetMetadataConverter.NO_FILTER); List<BlockMetaData> blocks = filterBlocks(footer.getBlocks());
footer = readFooter(configuration, path, range(split.getStart(), split.getEnd())); MessageType fileSchema = footer.getFileMetaData().getSchema(); Filter filter = getFilter(configuration); } else { footer = readFooter(configuration, path, NO_FILTER); Set<Long> offsets = new HashSet<Long>(); for (long offset : rowGroupOffsets) {
FilterCompat.Filter filter = setFilter(jobConf); final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(jobConf, finalPath); final List<BlockMetaData> blocks = parquetMetadata.getBlocks(); final FileMetaData fileMetaData = parquetMetadata.getFileMetaData();