org.apache.parquet.hadoop.api.InitContext.<init> java code examples

readContext = new DataWritableReadSupport().init(new InitContext(jobConf,
null, fileMetaData.getSchema()));

Map<String, String> fileMetadata = footer.getFileMetaData().getKeyValueMetaData();
ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration));
ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
  taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema));
this.requestedSchema = readContext.getRequestedSchema();

Map<String, String> fileMetadata = footer.getFileMetaData().getKeyValueMetaData();
ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration));
ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
  taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema));
this.requestedSchema = readContext.getRequestedSchema();

Map<String, String> fileMetadata = footer.getFileMetaData().getKeyValueMetaData();
ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration));
ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
  taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema));
this.requestedSchema = readContext.getRequestedSchema();

/**
 * @param configuration the configuration to connect to the file system
 * @param footers the footers of the files to read
 * @return the splits for the footers
 * @throws IOException if there is an error while reading
 * @deprecated split planning using file footers will be removed
 */
@Deprecated
public List<ParquetInputSplit> getSplits(Configuration configuration, List<Footer> footers) throws IOException {
 boolean strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
 final long maxSplitSize = configuration.getLong("mapred.max.split.size", Long.MAX_VALUE);
 final long minSplitSize = Math.max(getFormatMinSplitSize(), configuration.getLong("mapred.min.split.size", 0L));
 if (maxSplitSize < 0 || minSplitSize < 0) {
  throw new ParquetDecodingException("maxSplitSize or minSplitSize should not be negative: maxSplitSize = " + maxSplitSize + "; minSplitSize = " + minSplitSize);
 }
 GlobalMetaData globalMetaData = ParquetFileWriter.getGlobalMetaData(footers, strictTypeChecking);
 ReadContext readContext = getReadSupport(configuration).init(new InitContext(
   configuration,
   globalMetaData.getKeyValueMetaData(),
   globalMetaData.getSchema()));
 return new ClientSideMetadataSplitStrategy().getSplits(
   configuration, footers, maxSplitSize, minSplitSize, readContext);
}

/**
 * @param configuration the configuration to connect to the file system
 * @param footers the footers of the files to read
 * @return the splits for the footers
 * @throws IOException if there is an error while reading
 * @deprecated split planning using file footers will be removed
 */
@Deprecated
public List<ParquetInputSplit> getSplits(Configuration configuration, List<Footer> footers) throws IOException {
 boolean strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
 final long maxSplitSize = configuration.getLong("mapred.max.split.size", Long.MAX_VALUE);
 final long minSplitSize = Math.max(getFormatMinSplitSize(), configuration.getLong("mapred.min.split.size", 0L));
 if (maxSplitSize < 0 || minSplitSize < 0) {
  throw new ParquetDecodingException("maxSplitSize or minSplitSize should not be negative: maxSplitSize = " + maxSplitSize + "; minSplitSize = " + minSplitSize);
 }
 GlobalMetaData globalMetaData = ParquetFileWriter.getGlobalMetaData(footers, strictTypeChecking);
 ReadContext readContext = getReadSupport(configuration).init(new InitContext(
   configuration,
   globalMetaData.getKeyValueMetaData(),
   globalMetaData.getSchema()));
 return new ClientSideMetadataSplitStrategy().getSplits(
   configuration, footers, maxSplitSize, minSplitSize, readContext);
}

public void initialize(ParquetFileReader reader, Configuration configuration)
  throws IOException {
 // initialize a ReadContext for this file
 this.reader = reader;
 FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData();
 this.fileSchema = parquetFileMetadata.getSchema();
 Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData();
 ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
   configuration, toSetMultiMap(fileMetadata), fileSchema));
 this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy());
 this.requestedSchema = readContext.getRequestedSchema();
 this.columnCount = requestedSchema.getPaths().size();
 this.recordConverter = readSupport.prepareForRead(
   configuration, fileMetadata, fileSchema, readContext);
 this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
 this.total = reader.getRecordCount();
 this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total);
 this.filterRecords = configuration.getBoolean(RECORD_FILTERING_ENABLED, true);
 reader.setRequestedSchema(requestedSchema);
 LOG.info("RecordReader initialized will read a total of {} records.", total);
}

public void initialize(ParquetFileReader reader, Configuration configuration)
  throws IOException {
 // initialize a ReadContext for this file
 this.reader = reader;
 FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData();
 this.fileSchema = parquetFileMetadata.getSchema();
 Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData();
 ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
   configuration, toSetMultiMap(fileMetadata), fileSchema));
 this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy());
 this.requestedSchema = readContext.getRequestedSchema();
 this.columnCount = requestedSchema.getPaths().size();
 this.recordConverter = readSupport.prepareForRead(
   configuration, fileMetadata, fileSchema, readContext);
 this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
 this.total = reader.getFilteredRecordCount();
 this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total);
 this.filterRecords = configuration.getBoolean(RECORD_FILTERING_ENABLED, true);
 reader.setRequestedSchema(requestedSchema);
 LOG.info("RecordReader initialized will read a total of {} records.", total);
}

} catch (UnsupportedOperationException e) {
 context = wrapped.init(new InitContext(
   configuration, makeMultimap(keyValueMetaData), projection));

public void initialize(ParquetFileReader reader, ParquetReadOptions options) {
 // copy custom configuration to the Configuration passed to the ReadSupport
 Configuration conf = new Configuration();
 if (options instanceof HadoopReadOptions) {
  conf = ((HadoopReadOptions) options).getConf();
 }
 for (String property : options.getPropertyNames()) {
  conf.set(property, options.getProperty(property));
 }
 // initialize a ReadContext for this file
 this.reader = reader;
 FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData();
 this.fileSchema = parquetFileMetadata.getSchema();
 Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData();
 ReadSupport.ReadContext readContext = readSupport.init(new InitContext(conf, toSetMultiMap(fileMetadata), fileSchema));
 this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy());
 this.requestedSchema = readContext.getRequestedSchema();
 this.columnCount = requestedSchema.getPaths().size();
 this.recordConverter = readSupport.prepareForRead(conf, fileMetadata, fileSchema, readContext);
 this.strictTypeChecking = options.isEnabled(STRICT_TYPE_CHECKING, true);
 this.total = reader.getRecordCount();
 this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(options, total);
 this.filterRecords = options.useRecordFilter();
 reader.setRequestedSchema(requestedSchema);
 LOG.info("RecordReader initialized will read a total of {} records.", total);
}

public void initialize(FileMetaData parquetFileMetadata,
            Path file, List<BlockMetaData> blocks, Configuration configuration)
  throws IOException {
 // initialize a ReadContext for this file
 Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData();
 ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
   configuration, toSetMultiMap(fileMetadata), fileSchema));
 this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy());
 this.requestedSchema = readContext.getRequestedSchema();
 this.fileSchema = parquetFileMetadata.getSchema();
 this.file = file;
 this.columnCount = requestedSchema.getPaths().size();
 this.recordConverter = readSupport.prepareForRead(
   configuration, fileMetadata, fileSchema, readContext);
 this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
 List<ColumnDescriptor> columns = requestedSchema.getColumns();
 reader = new ParquetFileReader(configuration, parquetFileMetadata, file, blocks, columns);
 for (BlockMetaData block : blocks) {
  total += block.getRowCount();
 }
 this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total);
 LOG.info("RecordReader initialized will read a total of " + total + " records.");
}

public void initialize(ParquetFileReader reader, ParquetReadOptions options) {
 // copy custom configuration to the Configuration passed to the ReadSupport
 Configuration conf = new Configuration();
 if (options instanceof HadoopReadOptions) {
  conf = ((HadoopReadOptions) options).getConf();
 }
 for (String property : options.getPropertyNames()) {
  conf.set(property, options.getProperty(property));
 }
 // initialize a ReadContext for this file
 this.reader = reader;
 FileMetaData parquetFileMetadata = reader.getFooter().getFileMetaData();
 this.fileSchema = parquetFileMetadata.getSchema();
 Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData();
 ReadSupport.ReadContext readContext = readSupport.init(new InitContext(conf, toSetMultiMap(fileMetadata), fileSchema));
 this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy());
 this.requestedSchema = readContext.getRequestedSchema();
 this.columnCount = requestedSchema.getPaths().size();
 this.recordConverter = readSupport.prepareForRead(conf, fileMetadata, fileSchema, readContext);
 this.strictTypeChecking = options.isEnabled(STRICT_TYPE_CHECKING, true);
 this.total = reader.getFilteredRecordCount();
 this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(options, total);
 this.filterRecords = options.useRecordFilter();
 reader.setRequestedSchema(requestedSchema);
 LOG.info("RecordReader initialized will read a total of {} records.", total);
}

Map<String, String> fileMetadata = footer.getFileMetaData().getKeyValueMetaData();
ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration));
ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
  taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema));
this.requestedSchema = readContext.getRequestedSchema();

public void initialize(FileMetaData parquetFileMetadata,
            Path file, List<BlockMetaData> blocks, Configuration configuration)
  throws IOException {
 // initialize a ReadContext for this file
 Map<String, String> fileMetadata = parquetFileMetadata.getKeyValueMetaData();
 ReadSupport.ReadContext readContext = readSupport.init(new InitContext(
   configuration, toSetMultiMap(fileMetadata), fileSchema));
 this.columnIOFactory = new ColumnIOFactory(parquetFileMetadata.getCreatedBy());
 this.requestedSchema = readContext.getRequestedSchema();
 this.fileSchema = parquetFileMetadata.getSchema();
 this.file = file;
 this.columnCount = requestedSchema.getPaths().size();
 this.recordConverter = readSupport.prepareForRead(
   configuration, fileMetadata, fileSchema, readContext);
 this.strictTypeChecking = configuration.getBoolean(STRICT_TYPE_CHECKING, true);
 List<ColumnDescriptor> columns = requestedSchema.getColumns();
 reader = new ParquetFileReader(configuration, parquetFileMetadata, file, blocks, columns);
 for (BlockMetaData block : blocks) {
  total += block.getRowCount();
 }
 this.unmaterializableRecordCounter = new UnmaterializableRecordCounter(configuration, total);
 LOG.info("RecordReader initialized will read a total of " + total + " records.");
}

Popular methods of InitContext

getFileSchema
this is the union of all the schemas when reading multiple files.
getConfiguration
getMergedKeyValueMetaData
If there is a conflicting value when reading from multiple files, an exception will be thrown
getKeyValueMetadata
each key is associated with the list of distinct values found in footers

Popular in Java

Reading from database using SQL prepared statement
setRequestProperty (URLConnection)
setScale (BigDecimal)
putExtra (Intent)
BigDecimal (java.math)
An immutable arbitrary-precision signed decimal.A value is represented by an arbitrary-precision "un
TimeZone (java.util)
TimeZone represents a time zone offset, and also figures out daylight savings. Typically, you get a
Callable (java.util.concurrent)
A task that returns a result and may throw an exception. Implementors define a single method with no
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
IsNull (org.hamcrest.core)
Is the value null?
JComboBox (javax.swing)
Top PhpStorm plugins

How to use org.apache.parquet.hadoop.api.InitContextconstructor

Best Java code snippets using org.apache.parquet.hadoop.api.InitContext.<init> (Showing top 14 results out of 315)

How to use
org.apache.parquet.hadoop.api.InitContext
constructor