org.apache.parquet.avro.AvroParquetReader$Builder java code examples

public AvroParquetFileReader(LogFilePath logFilePath, CompressionCodec codec) throws IOException {
  Path path = new Path(logFilePath.getLogFilePath());
  String topic = logFilePath.getTopic();
  Schema schema = schemaRegistryClient.getSchema(topic);
  reader = AvroParquetReader.<GenericRecord>builder(path).build();
  writer = new SpecificDatumWriter(schema);
  offset = logFilePath.getOffset();
}

AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema());
ParquetReader<IndexedRecord> reader = AvroParquetReader.builder(upsertHandle.getOldFilePath())
  .withConf(getHadoopConf()).build();
BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;
try {

AvroReadSupport.setRequestedProjection(conf, readSchema);
Set<String> rowKeys = new HashSet<>();
try (ParquetReader reader = AvroParquetReader.builder(filePath).withConf(conf).build()) {
 Object obj = reader.read();
 while (obj != null) {

BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;
try (ParquetReader<IndexedRecord> reader = AvroParquetReader.<IndexedRecord>builder(upsertHandle.getOldFilePath())
    .withConf(getHadoopConf()).build()) {
 wrapper = new SparkBoundedInMemoryExecutor(config, new ParquetReaderIterator(reader),
     new UpdateHandler(upsertHandle), x -> x);

/**
 * NOTE: This literally reads the entire file contents, thus should be used with caution.
 */
public static List<GenericRecord> readAvroRecords(Configuration configuration, Path filePath) {
 ParquetReader reader = null;
 List<GenericRecord> records = new ArrayList<>();
 try {
  reader = AvroParquetReader.builder(filePath).withConf(configuration).build();
  Object obj = reader.read();
  while (obj != null) {
   if (obj instanceof GenericRecord) {
    records.add(((GenericRecord) obj));
   }
   obj = reader.read();
  }
 } catch (IOException e) {
  throw new HoodieIOException("Failed to read avro records from Parquet " + filePath, e);
 } finally {
  if (reader != null) {
   try {
    reader.close();
   } catch (IOException e) {
    // ignore
   }
  }
 }
 return records;
}

public Stream<GenericRecord> toStream(Path hadoopPath) throws IOException {
  Filter filter = makeFilter();
  ParquetReader<GenericRecord> reader;
  try {
    reader = AvroParquetReader.<GenericRecord>builder(hadoopPath)
        .withFilter(filter)
        .withConf(getConfiguration())
        .build();
  } catch (IOException e) {
    // Default exception may not refer the input path
    throw new IOException("Issue on path: " + hadoopPath, e);
  } catch (RuntimeException e) {
    // Default exception may not refer the input path
    throw new IOException("Issue on path: " + hadoopPath, e);
  }
  return toStream(reader);
}

public static <T> Builder<T> builder(InputFile file) {
 return new Builder<T>(file);
}

private ParquetReader<GenericRecord> initReader() throws IOException {
  Configuration configuration = getFs().getConf();
  if (this.schema != null) {
    AvroReadSupport.setAvroReadSchema(configuration, this.schema);
  }
  if (this.projection != null) {
    AvroReadSupport.setRequestedProjection(configuration, this.projection);
  }
  ParquetReader reader = AvroParquetReader.<GenericRecord>builder(getFilePath())
      .withConf(configuration).build();
  return reader;
}

@Override
public HDFSRecordReader createHDFSRecordReader(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path)
    throws IOException {
  final ParquetReader.Builder<GenericRecord> readerBuilder = AvroParquetReader.<GenericRecord>builder(path).withConf(conf);
  return new AvroParquetHDFSRecordReader(readerBuilder.build());
}

/**
 * @param file a file path
 * @param <T> the Java type of records to read from the file
 * @return an Avro reader builder
 * @deprecated will be removed in 2.0.0; use {@link #builder(InputFile)} instead.
 */
@Deprecated
public static <T> Builder<T> builder(Path file) {
 return new Builder<T>(file);
}

public Stream<GenericRecord> toStream(org.apache.hadoop.fs.Path hadoopPath) throws IOException {
  Filter filter = makeFilter();
  ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(hadoopPath)
      .withFilter(filter)
      .withConf(getConfiguration())
      .build();
  return toStream(reader);
}

private void initReader(Path file) {
 try {
  if (reader != null) {
   reader.close();
  }
  GenericData.get().addLogicalTypeConversion(new Conversions.DecimalConversion());
  this.reader = AvroParquetReader.<GenericRecord>builder(file).withDataModel(GenericData.get()).build();
 } catch (IOException e) {
  throw new RuntimeException(e);
 }
}

@Override
public HDFSRecordReader createHDFSRecordReader(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path)
    throws IOException {
  final ParquetReader.Builder<GenericRecord> readerBuilder = AvroParquetReader.<GenericRecord>builder(path).withConf(conf);
  return new AvroParquetHDFSRecordReader(readerBuilder.build());
}

Most used methods

Popular in Java

Reactive rest calls using spring rest template
onCreateOptionsMenu (Activity)
getApplicationContext (Context)
getSharedPreferences (Context)
SocketTimeoutException (java.net)
This exception is thrown when a timeout expired on a socket read or accept operation.
MessageFormat (java.text)
Produces concatenated messages in language-neutral way. New code should probably use java.util.Forma
GregorianCalendar (java.util)
GregorianCalendar is a concrete subclass of Calendarand provides the standard calendar used by most
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
Kernel (java.awt.image)
Get (org.apache.hadoop.hbase.client)
Used to perform Get operations on a single row. To get everything for a row, instantiate a Get objec
Top 12 Jupyter Notebook extensions

How to useAvroParquetReader$Builder in org.apache.parquet.avro

Best Java code snippets using org.apache.parquet.avro.AvroParquetReader$Builder (Showing top 13 results out of 315)

How to use
AvroParquetReader$Builder
in
org.apache.parquet.avro