org.apache.parquet.avro.AvroReadSupport java code examples

private ParquetReader<GenericRecord> initReader() throws IOException {
  Configuration configuration = getFs().getConf();
  if (this.schema != null) {
    AvroReadSupport.setAvroReadSchema(configuration, this.schema);
  }
  if (this.projection != null) {
    AvroReadSupport.setRequestedProjection(configuration, this.projection);
  }
  ParquetReader reader = AvroParquetReader.<GenericRecord>builder(getFilePath())
      .withConf(configuration).build();
  return reader;
}

/**
 * @param conf a configuration
 * @param file a file path
 * @param unboundRecordFilter an unbound record filter (from the old filter API)
 * @throws IOException if there is an error while reading
 * @deprecated will be removed in 2.0.0; use {@link #builder(InputFile)} instead.
 */
@Deprecated
public AvroParquetReader(Configuration conf, Path file, UnboundRecordFilter unboundRecordFilter) throws IOException {
 super(conf, file, new AvroReadSupport<T>(), unboundRecordFilter);
}

@Override
public RecordMaterializer<T> prepareForRead(
  Configuration configuration, Map<String, String> keyValueMetaData,
  MessageType fileSchema, ReadContext readContext) {
 Map<String, String> metadata = readContext.getReadSupportMetadata();
 MessageType parquetSchema = readContext.getRequestedSchema();
 Schema avroSchema;
 if (metadata.get(AVRO_READ_SCHEMA_METADATA_KEY) != null) {
  // use the Avro read schema provided by the user
  avroSchema = new Schema.Parser().parse(metadata.get(AVRO_READ_SCHEMA_METADATA_KEY));
 } else if (keyValueMetaData.get(AVRO_SCHEMA_METADATA_KEY) != null) {
  // use the Avro schema from the file metadata if present
  avroSchema = new Schema.Parser().parse(keyValueMetaData.get(AVRO_SCHEMA_METADATA_KEY));
 } else if (keyValueMetaData.get(OLD_AVRO_SCHEMA_METADATA_KEY) != null) {
  // use the Avro schema from the file metadata if present
  avroSchema = new Schema.Parser().parse(keyValueMetaData.get(OLD_AVRO_SCHEMA_METADATA_KEY));
 } else {
  // default to converting the Parquet schema into an Avro schema
  avroSchema = new AvroSchemaConverter(configuration).convert(parquetSchema);
 }
 GenericData model = getDataModel(configuration);
 String compatEnabled = metadata.get(AvroReadSupport.AVRO_COMPATIBILITY);
 if (compatEnabled != null && Boolean.valueOf(compatEnabled)) {
  return newCompatMaterializer(parquetSchema, avroSchema, model);
 }
 return new AvroRecordMaterializer<T>(parquetSchema, avroSchema, model);
}

/**
 * Override the Avro schema to use for reading. If not set, the Avro schema used for
 * writing is used.
 * <p>
 * Differences between the read and write schemas are resolved using
 * <a href="http://avro.apache.org/docs/current/spec.html#Schema+Resolution">Avro's schema resolution rules</a>.
 * @param job a job
 * @param avroReadSchema the requested schema
 * @see #setRequestedProjection(org.apache.hadoop.mapreduce.Job, org.apache.avro.Schema)
 * @see org.apache.parquet.avro.AvroParquetOutputFormat#setSchema(org.apache.hadoop.mapreduce.Job, org.apache.avro.Schema)
 */
public static void setAvroReadSchema(Job job, Schema avroReadSchema) {
 AvroReadSupport.setAvroReadSchema(ContextUtil.getConfiguration(job), avroReadSchema);
}

/**
 * Set the subset of columns to read (projection pushdown). Specified as an Avro
 * schema, the requested projection is converted into a Parquet schema for Parquet
 * column projection.
 * <p>
 * This is useful if the full schema is large and you only want to read a few
 * columns, since it saves time by not reading unused columns.
 * <p>
 * If a requested projection is set, then the Avro schema used for reading
 * must be compatible with the projection. For instance, if a column is not included
 * in the projection then it must either not be included or be optional in the read
 * schema. Use {@link #setAvroReadSchema(org.apache.hadoop.mapreduce.Job,
 * org.apache.avro.Schema)} to set a read schema, if needed.
 * @param job a job
 * @param requestedProjection the requested projection schema
 * @see #setAvroReadSchema(org.apache.hadoop.mapreduce.Job, org.apache.avro.Schema)
 * @see org.apache.parquet.avro.AvroParquetOutputFormat#setSchema(org.apache.hadoop.mapreduce.Job, org.apache.avro.Schema)
 */
public static void setRequestedProjection(Job job, Schema requestedProjection) {
 AvroReadSupport.setRequestedProjection(ContextUtil.getConfiguration(job),
   requestedProjection);
}

 @Override
 public ReadContext init(Configuration configuration, Map<String, String> keyValueMetaData, MessageType fileSchema) {
  if (avroType != null) {
   setRequestedProjection(configuration, avroType.getSchema());
  }
  return super.init(configuration, keyValueMetaData, fileSchema);
 }
}

private static void setConfigProperties(Configuration conf, Format format,
                    Schema schema, Class<?> type) {
 GenericData model = DataModelUtil.getDataModelForType(type);
 if (Formats.AVRO.equals(format)) {
  setModel.invoke(conf, model.getClass());
  conf.set(AVRO_SCHEMA_INPUT_KEY, schema.toString());
 } else if (Formats.PARQUET.equals(format)) {
  // TODO: update to a version of Parquet with setAvroDataSupplier
  //AvroReadSupport.setAvroDataSupplier(conf,
  //    DataModelUtil.supplierClassFor(model));
  AvroReadSupport.setAvroReadSchema(conf, schema);
 }
}

try {
 Schema chukwaAvroSchema = ChukwaAvroSchema.getSchema();
 AvroReadSupport.setRequestedProjection(conf, chukwaAvroSchema);
 reader = new AvroParquetReader<GenericRecord>(conf, new Path(dataSinkFile));

conf.addResource(getFs(filePath.toString(), conf).getConf());
Schema readSchema = HoodieAvroUtils.getRecordKeySchema();
AvroReadSupport.setAvroReadSchema(conf, readSchema);
AvroReadSupport.setRequestedProjection(conf, readSchema);
Set<String> rowKeys = new HashSet<>();
try (ParquetReader reader = AvroParquetReader.builder(filePath).withConf(conf).build()) {

@Override
public void initialize() {
 Preconditions.checkState(state.equals(ReaderWriterState.NEW),
  "A reader may not be opened more than once - current state:%s", state);
 LOG.debug("Opening reader on path:{}", path);
 try {
  final Configuration conf = fileSystem.getConf();
  AvroReadSupport.setAvroReadSchema(conf, readerSchema);
  reader = new AvroParquetReader<E>(
    conf, fileSystem.makeQualified(path));
 } catch (IOException e) {
  throw new DatasetIOException("Unable to create reader path:" + path, e);
 }
 advance();
 state = ReaderWriterState.OPEN;
}

/**
 * @param file a file path
 * @throws IOException if there is an error while reading
 * @deprecated will be removed in 2.0.0; use {@link #builder(InputFile)} instead.
 */
@Deprecated
public AvroParquetReader(Path file) throws IOException {
 super(file, new AvroReadSupport<T>());
}

AvroReadSupport.setRequestedProjection(conf, projectionSchema);
AvroReadSupport.setAvroReadSchema(conf, readerSchema);

  .setAvroReadSchema(jsc.hadoopConfiguration(), (new Schema.Parser().parse(schemaStr)));
ParquetInputFormat.setReadSupportClass(job, (AvroReadSupport.class));

/**
 * @param file a file path
 * @param unboundRecordFilter an unbound record filter (from the old filter API)
 * @throws IOException if there is an error while reading
 * @deprecated will be removed in 2.0.0; use {@link #builder(InputFile)} instead.
 */
@Deprecated
public AvroParquetReader(Path file, UnboundRecordFilter unboundRecordFilter) throws IOException {
 super(file, new AvroReadSupport<T>(), unboundRecordFilter);
}

AvroReadSupport.setRequestedProjection(configuration,
  AvroSchemaUtil.convert(expectedSchema, projection.getName()));
org.apache.avro.Schema avroReadSchema = AvroSchemaUtil.buildAvroProjection(
  AvroSchemaUtil.convert(ParquetSchemaUtil.convert(projection), projection.getName()),
  expectedSchema, ImmutableMap.of());
AvroReadSupport.setAvroReadSchema(configuration, ParquetAvro.parquetAvroSchema(avroReadSchema));

  .setAvroReadSchema(jsc.hadoopConfiguration(), (new Schema.Parser().parse(schemaStr)));
ParquetInputFormat.setReadSupportClass(job, (AvroReadSupport.class));

/**
 * @param conf a configuration
 * @param file a file path
 * @throws IOException if there is an error while reading
 * @deprecated will be removed in 2.0.0; use {@link #builder(InputFile)} instead.
 */
@Deprecated
public AvroParquetReader(Configuration conf, Path file) throws IOException {
 super(conf, file, new AvroReadSupport<T>());
}

   "Error in finding the old file path at commit " + commitTime + " for fileId: " + fileId);
} else {
 AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema());
 BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;
 try (ParquetReader<IndexedRecord> reader = AvroParquetReader.<IndexedRecord>builder(upsertHandle.getOldFilePath())

 @Override
 protected ReadSupport<T> getReadSupport() {
  if (isReflect) {
   conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, false);
  } else {
   conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, enableCompatibility);
  }
  return new AvroReadSupport<T>(model);
 }
}

   "Error in finding the old file path at commit " + commitTime + " for fileId: " + fileId);
} else {
 AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema());
 ParquetReader<IndexedRecord> reader = AvroParquetReader.builder(upsertHandle.getOldFilePath())
   .withConf(getHadoopConf()).build();

Javadoc

Avro implementation of ReadSupport for avro generic, specific, and reflect models. Use AvroParquetReader or AvroParquetInputFormat rather than using this class directly.

Most used methods

Popular in Java

Updating database using SQL prepared statement
findViewById (Activity)
notifyDataSetChanged (ArrayAdapter)
scheduleAtFixedRate (ScheduledExecutorService)
ArrayList (java.util)
ArrayList is an implementation of List, backed by an array. All optional operations including adding
List (java.util)
An ordered collection (also known as a sequence). The user of this interface has precise control ove
Manifest (java.util.jar)
The Manifest class is used to obtain attribute information for a JarFile and its entries.
Annotation (javassist.bytecode.annotation)
The annotation structure.An instance of this class is returned bygetAnnotations() in AnnotationsAttr
BoxLayout (javax.swing)
JFrame (javax.swing)
Top PhpStorm plugins

How to useAvroReadSupport in org.apache.parquet.avro

Best Java code snippets using org.apache.parquet.avro.AvroReadSupport (Showing top 20 results out of 315)

How to use
AvroReadSupport
in
org.apache.parquet.avro