@Override public ReadContext init(InitContext context) { MessageType requestedProjection = getSchemaForRead(context.getFileSchema(), getPartialReadSchema(context)); return new ReadContext(requestedProjection); }
@Override public WriteContext init(Configuration configuration) { return new WriteContext(type, metadata); }
/** * It creates the readContext for Parquet side with the requested schema during the init phase. * * @param context * @return the parquet ReadContext */ @Override public org.apache.parquet.hadoop.api.ReadSupport.ReadContext init(InitContext context) { Configuration configuration = context.getConfiguration(); MessageType fileSchema = context.getFileSchema(); String columnNames = configuration.get(IOConstants.COLUMNS); Map<String, String> contextMetadata = new HashMap<String, String>(); boolean indexAccess = configuration.getBoolean(PARQUET_COLUMN_INDEX_ACCESS, false); if (columnNames != null) { List<String> columnNamesList = getColumnNames(columnNames); String columnTypes = configuration.get(IOConstants.COLUMNS_TYPES); List<TypeInfo> columnTypesList = getColumnTypes(columnTypes); MessageType tableSchema = getRequestedSchemaForIndexAccess(indexAccess, columnNamesList, columnTypesList, fileSchema); contextMetadata.put(HIVE_TABLE_AS_PARQUET_SCHEMA, tableSchema.toString()); contextMetadata.put(PARQUET_COLUMN_INDEX_ACCESS, String.valueOf(indexAccess)); this.hiveTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNamesList, columnTypesList); return new ReadContext(getRequestedPrunedSchema(columnNamesList, tableSchema, configuration), contextMetadata); } else { contextMetadata.put(HIVE_TABLE_AS_PARQUET_SCHEMA, fileSchema.toString()); return new ReadContext(fileSchema, contextMetadata); } }
Map<String, String> fileMetadata = footer.getFileMetaData().getKeyValueMetaData(); ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration)); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema)); this.requestedSchema = readContext.getRequestedSchema(); String sparkRequestedSchemaString = configuration.get(ParquetReadSupport$.MODULE$.SPARK_ROW_REQUESTED_SCHEMA());
/** * called in {@link org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce.JobContext)} in the front end * * @param context the initialisation context * @return the readContext that defines how to read the file */ public ReadContext init(InitContext context) { return init(context.getConfiguration(), context.getMergedKeyValueMetaData(), context.getFileSchema()); }
readContext = new DataWritableReadSupport().init(new InitContext(jobConf, null, fileMetaData.getSchema())); schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata() .get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)).getFieldCount(); final List<BlockMetaData> splitGroup = new ArrayList<BlockMetaData>(); oldSplit.getLocations(), filtedBlocks, readContext.getRequestedSchema().toString(), fileMetaData.getSchema().toString(), fileMetaData.getKeyValueMetaData(), readContext.getReadSupportMetadata()); return split; } else {
/** * * It creates the hive read support to interpret data from parquet to hive * * @param configuration // unused * @param keyValueMetaData * @param fileSchema // unused * @param readContext containing the requested schema and the schema of the hive table * @return Record Materialize for Hive */ @Override public RecordMaterializer<ArrayWritable> prepareForRead(final Configuration configuration, final Map<String, String> keyValueMetaData, final MessageType fileSchema, final org.apache.parquet.hadoop.api.ReadSupport.ReadContext readContext) { final Map<String, String> metadata = readContext.getReadSupportMetadata(); if (metadata == null) { throw new IllegalStateException("ReadContext not initialized properly. " + "Don't know the Hive Schema."); } String key = HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION.varname; if (!metadata.containsKey(key)) { metadata.put(key, String.valueOf(HiveConf.getBoolVar( configuration, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION))); } return new DataWritableRecordConverter(readContext.getRequestedSchema(), metadata, hiveTypeInfo); } }
MessageType fullSchema = context.getFileSchema(); HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); ParseSpec parseSpec = config.getParser().getParseSpec();
@Override public RecordMaterializer<GenericRecord> prepareForRead( Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, ReadContext readContext ) { // coercing this value to false by default here to be friendlier default behavior // see https://github.com/apache/incubator-druid/issues/5433#issuecomment-388539306 String jobProp = "parquet.avro.add-list-element-records"; Boolean explicitlySet = configuration.getBoolean(jobProp, false); if (!explicitlySet) { configuration.setBoolean(jobProp, false); } MessageType parquetSchema = readContext.getRequestedSchema(); Schema avroSchema = new AvroSchemaConverter(configuration).convert(parquetSchema); Class<? extends AvroDataSupplier> suppClass = configuration.getClass( AVRO_DATA_SUPPLIER, SpecificDataSupplier.class, AvroDataSupplier.class ); AvroDataSupplier supplier = ReflectionUtils.newInstance(suppClass, configuration); return new AvroRecordMaterializer<>(parquetSchema, avroSchema, supplier.get()); } }
@Override public RecordMaterializer<T> prepareForRead( Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, ReadSupport.ReadContext readContext) { return delegate.prepareForRead(configuration, keyValueMetaData, fileSchema, readContext); }
@Override public void prepareForWrite(RecordConsumer recordConsumer) { delegate.prepareForWrite(recordConsumer); }
Map<String, String> fileMetadata = footer.getFileMetaData().getKeyValueMetaData(); ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration)); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema)); this.requestedSchema = readContext.getRequestedSchema(); String sparkRequestedSchemaString = configuration.get(ParquetReadSupport$.MODULE$.SPARK_ROW_REQUESTED_SCHEMA());
@Override public ReadContext init(InitContext context) { MessageType requestedProjection = getSchemaForRead(context.getFileSchema(), getPartialReadSchema(context)); return new ReadContext(requestedProjection); } }
/** * called in {@link org.apache.hadoop.mapreduce.InputFormat#getSplits(org.apache.hadoop.mapreduce.JobContext)} in the front end * * @param context the initialisation context * @return the readContext that defines how to read the file */ public ReadContext init(InitContext context) { return init(context.getConfiguration(), context.getMergedKeyValueMetaData(), context.getFileSchema()); }
@Override public WriteContext init(final Configuration configuration) { schema = getSchema(configuration); return new WriteContext(schema, new HashMap<String, String>()); }
MessageType fullSchema = context.getFileSchema(); HadoopDruidIndexerConfig config = HadoopDruidIndexerConfig.fromConfiguration(context.getConfiguration()); ParseSpec parseSpec = config.getParser().getParseSpec();
@Override public RecordMaterializer<T> prepareForRead( Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema, ReadSupport.ReadContext readContext) { return delegate.prepareForRead(configuration, keyValueMetaData, fileSchema, readContext); }
Map<String, String> fileMetadata = footer.getFileMetaData().getKeyValueMetaData(); ReadSupport<T> readSupport = getReadSupportInstance(getReadSupportClass(configuration)); ReadSupport.ReadContext readContext = readSupport.init(new InitContext( taskAttemptContext.getConfiguration(), toSetMultiMap(fileMetadata), fileSchema)); this.requestedSchema = readContext.getRequestedSchema(); String sparkRequestedSchemaString = configuration.get(ParquetReadSupport$.MODULE$.SPARK_ROW_REQUESTED_SCHEMA());
@Override public ReadContext init(final InitContext context) { return new ReadContext(context.getFileSchema()); } }
@Override public ReadContext init(InitContext context) { return new ReadContext(context.getFileSchema()); }