readContext = new DataWritableReadSupport().init(new InitContext(jobConf, null, fileMetaData.getSchema()));
Map<String, String> fileMetadata = footer.getFileMetaData().getKeyValueMetaData(); ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration)); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema)); this.requestedSchema = readContext.getRequestedSchema();
Map<String, String> fileMetadata = footer.getFileMetaData().getKeyValueMetaData(); ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration)); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema)); this.requestedSchema = readContext.getRequestedSchema();
Map<String, String> fileMetadata = footer.getFileMetaData().getKeyValueMetaData(); ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration)); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema)); this.requestedSchema = readContext.getRequestedSchema();
/** * @param configuration the configuration to connect to the file system * @param footers the footers of the files to read * @return the splits for the footers * @throws IOException if there is an error while reading * @deprecated split planning using file footers will be removed */ @Deprecated public List<ParquetInputSplit> getSplits(Configuration configuration, List<Footer> footers) throws IOException { boolean strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true); final long maxSplitSize = configuration.getLong("mapred.max.split.size", Long.MAX_VALUE); final long minSplitSize = Math.max(getFormatMinSplitSize(), configuration.getLong("mapred.min.split.size", 0L)); if (maxSplitSize < 0 || minSplitSize < 0) { throw new ParquetDecodingException("maxSplitSize or minSplitSize should not be negative: maxSplitSize = " + maxSplitSize + "; minSplitSize = " + minSplitSize); } GlobalMetaData globalMetaData = ParquetFileWriter.getGlobalMetaData(footers, strictTypeChecking); ReadContext readContext = getReadSupport(configuration).init(new InitContext( configuration, globalMetaData.getKeyValueMetaData(), globalMetaData.getSchema())); return new ClientSideMetadataSplitStrategy().getSplits( configuration, footers, maxSplitSize, minSplitSize, readContext); }
/** * @param configuration the configuration to connect to the file system * @param footers the footers of the files to read * @return the splits for the footers * @throws IOException if there is an error while reading * @deprecated split planning using file footers will be removed */ @Deprecated public List<ParquetInputSplit> getSplits(Configuration configuration, List<Footer> footers) throws IOException { boolean strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true); final long maxSplitSize = configuration.getLong("mapred.max.split.size", Long.MAX_VALUE); final long minSplitSize = Math.max(getFormatMinSplitSize(), configuration.getLong("mapred.min.split.size", 0L)); if (maxSplitSize < 0 || minSplitSize < 0) { throw new ParquetDecodingException("maxSplitSize or minSplitSize should not be negative: maxSplitSize = " + maxSplitSize + "; minSplitSize = " + minSplitSize); } GlobalMetaData globalMetaData = ParquetFileWriter.getGlobalMetaData(footers, strictTypeChecking); ReadContext readContext = getReadSupport(configuration).init(new InitContext( configuration, globalMetaData.getKeyValueMetaData(), globalMetaData.getSchema())); return new ClientSideMetadataSplitStrategy().getSplits( configuration, footers, maxSplitSize, minSplitSize, readContext); }
public void initialize(ParquetFileReader reader, Configuration configuration) throws IOException { // initialize a ReadContext for this file this.reader = reader; FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData(); this.fileSchema = parquetFileMetadata.getSchema(); Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData(); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( configuration, toSetMultiMap(fileMetadata), fileSchema)); this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy()); this.requestedSchema = readContext.getRequestedSchema(); this.columnCount = requestedSchema.getPaths().size(); this.recordConverter = readSupport.prepareForRead( configuration, fileMetadata, fileSchema, readContext); this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true); this.total = reader.getRecordCount(); this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total); this.filterRecords = configuration.getBoolean(RECORD_FILTERING_ENABLED, true); reader.setRequestedSchema(requestedSchema); LOG.info("RecordReader initialized will read a total of {} records.", total); }
public void initialize(ParquetFileReader reader, Configuration configuration) throws IOException { // initialize a ReadContext for this file this.reader = reader; FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData(); this.fileSchema = parquetFileMetadata.getSchema(); Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData(); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( configuration, toSetMultiMap(fileMetadata), fileSchema)); this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy()); this.requestedSchema = readContext.getRequestedSchema(); this.columnCount = requestedSchema.getPaths().size(); this.recordConverter = readSupport.prepareForRead( configuration, fileMetadata, fileSchema, readContext); this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true); this.total = reader.getFilteredRecordCount(); this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total); this.filterRecords = configuration.getBoolean(RECORD_FILTERING_ENABLED, true); reader.setRequestedSchema(requestedSchema); LOG.info("RecordReader initialized will read a total of {} records.", total); }
} catch (UnsupportedOperationException e) { context = wrapped.init(new InitContext( configuration, makeMultimap(keyValueMetaData), projection));
public void initialize(ParquetFileReader reader, ParquetReadOptions options) { // copy custom configuration to the Configuration passed to the ReadSupport Configuration conf = new Configuration(); if (options instanceof HadoopReadOptions) { conf = ((HadoopReadOptions) options).getConf(); } for (String property : options.getPropertyNames()) { conf.set(property, options.getProperty(property)); } // initialize a ReadContext for this file this.reader = reader; FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData(); this.fileSchema = parquetFileMetadata.getSchema(); Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData(); ReadSupport.ReadContext readContext = readSupport.init(new InitContext(conf, toSetMultiMap(fileMetadata), fileSchema)); this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy()); this.requestedSchema = readContext.getRequestedSchema(); this.columnCount = requestedSchema.getPaths().size(); this.recordConverter = readSupport.prepareForRead(conf, fileMetadata, fileSchema, readContext); this.strictTypeChecking = options.isEnabled(STRICT_TYPE_CHECKING, true); this.total = reader.getRecordCount(); this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(options, total); this.filterRecords = options.useRecordFilter(); reader.setRequestedSchema(requestedSchema); LOG.info("RecordReader initialized will read a total of {} records.", total); }
public void initialize(FileMetaData parquetFileMetadata, Path file, List<BlockMetaData> blocks, Configuration configuration) throws IOException { // initialize a ReadContext for this file Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData(); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( configuration, toSetMultiMap(fileMetadata), fileSchema)); this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy()); this.requestedSchema = readContext.getRequestedSchema(); this.fileSchema = parquetFileMetadata.getSchema(); this.file = file; this.columnCount = requestedSchema.getPaths().size(); this.recordConverter = readSupport.prepareForRead( configuration, fileMetadata, fileSchema, readContext); this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true); List<ColumnDescriptor> columns = requestedSchema.getColumns(); reader = new ParquetFileReader(configuration, parquetFileMetadata, file, blocks, columns); for (BlockMetaData block : blocks) { total += block.getRowCount(); } this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total); LOG.info("RecordReader initialized will read a total of " + total + " records."); }
public void initialize(ParquetFileReader reader, ParquetReadOptions options) { // copy custom configuration to the Configuration passed to the ReadSupport Configuration conf = new Configuration(); if (options instanceof HadoopReadOptions) { conf = ((HadoopReadOptions) options).getConf(); } for (String property : options.getPropertyNames()) { conf.set(property, options.getProperty(property)); } // initialize a ReadContext for this file this.reader = reader; FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData(); this.fileSchema = parquetFileMetadata.getSchema(); Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData(); ReadSupport.ReadContext readContext = readSupport.init(new InitContext(conf, toSetMultiMap(fileMetadata), fileSchema)); this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy()); this.requestedSchema = readContext.getRequestedSchema(); this.columnCount = requestedSchema.getPaths().size(); this.recordConverter = readSupport.prepareForRead(conf, fileMetadata, fileSchema, readContext); this.strictTypeChecking = options.isEnabled(STRICT_TYPE_CHECKING, true); this.total = reader.getFilteredRecordCount(); this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(options, total); this.filterRecords = options.useRecordFilter(); reader.setRequestedSchema(requestedSchema); LOG.info("RecordReader initialized will read a total of {} records.", total); }
Map<String, String> fileMetadata = footer.getFileMetaData().getKeyValueMetaData(); ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration)); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema)); this.requestedSchema = readContext.getRequestedSchema();
public void initialize(FileMetaData parquetFileMetadata, Path file, List<BlockMetaData> blocks, Configuration configuration) throws IOException { // initialize a ReadContext for this file Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData(); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( configuration, toSetMultiMap(fileMetadata), fileSchema)); this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy()); this.requestedSchema = readContext.getRequestedSchema(); this.fileSchema = parquetFileMetadata.getSchema(); this.file = file; this.columnCount = requestedSchema.getPaths().size(); this.recordConverter = readSupport.prepareForRead( configuration, fileMetadata, fileSchema, readContext); this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true); List<ColumnDescriptor> columns = requestedSchema.getColumns(); reader = new ParquetFileReader(configuration, parquetFileMetadata, file, blocks, columns); for (BlockMetaData block : blocks) { total += block.getRowCount(); } this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total); LOG.info("RecordReader initialized will read a total of " + total + " records."); }