public AvroParquetFileReader(LogFilePath logFilePath, CompressionCodec codec) throws IOException { Path path = new Path(logFilePath.getLogFilePath()); String topic = logFilePath.getTopic(); Schema schema = schemaRegistryClient.getSchema(topic); reader = AvroParquetReader.<GenericRecord>builder(path).build(); writer = new SpecificDatumWriter(schema); offset = logFilePath.getOffset(); }
AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema()); ParquetReader<IndexedRecord> reader = AvroParquetReader.builder(upsertHandle.getOldFilePath()) .withConf(getHadoopConf()).build(); BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null; try {
AvroReadSupport.setRequestedProjection(conf, readSchema); Set<String> rowKeys = new HashSet<>(); try (ParquetReader reader = AvroParquetReader.builder(filePath).withConf(conf).build()) { Object obj = reader.read(); while (obj != null) {
BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null; try (ParquetReader<IndexedRecord> reader = AvroParquetReader.<IndexedRecord>builder(upsertHandle.getOldFilePath()) .withConf(getHadoopConf()).build()) { wrapper = new SparkBoundedInMemoryExecutor(config, new ParquetReaderIterator(reader), new UpdateHandler(upsertHandle), x -> x);
/** * NOTE: This literally reads the entire file contents, thus should be used with caution. */ public static List<GenericRecord> readAvroRecords(Configuration configuration, Path filePath) { ParquetReader reader = null; List<GenericRecord> records = new ArrayList<>(); try { reader = AvroParquetReader.builder(filePath).withConf(configuration).build(); Object obj = reader.read(); while (obj != null) { if (obj instanceof GenericRecord) { records.add(((GenericRecord) obj)); } obj = reader.read(); } } catch (IOException e) { throw new HoodieIOException("Failed to read avro records from Parquet " + filePath, e); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { // ignore } } } return records; }
public Stream<GenericRecord> toStream(Path hadoopPath) throws IOException { Filter filter = makeFilter(); ParquetReader<GenericRecord> reader; try { reader = AvroParquetReader.<GenericRecord>builder(hadoopPath) .withFilter(filter) .withConf(getConfiguration()) .build(); } catch (IOException e) { // Default exception may not refer the input path throw new IOException("Issue on path: " + hadoopPath, e); } catch (RuntimeException e) { // Default exception may not refer the input path throw new IOException("Issue on path: " + hadoopPath, e); } return toStream(reader); }
public static <T> Builder<T> builder(InputFile file) { return new Builder<T>(file); }
private ParquetReader<GenericRecord> initReader() throws IOException { Configuration configuration = getFs().getConf(); if (this.schema != null) { AvroReadSupport.setAvroReadSchema(configuration, this.schema); } if (this.projection != null) { AvroReadSupport.setRequestedProjection(configuration, this.projection); } ParquetReader reader = AvroParquetReader.<GenericRecord>builder(getFilePath()) .withConf(configuration).build(); return reader; }
@Override public HDFSRecordReader createHDFSRecordReader(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path) throws IOException { final ParquetReader.Builder<GenericRecord> readerBuilder = AvroParquetReader.<GenericRecord>builder(path).withConf(conf); return new AvroParquetHDFSRecordReader(readerBuilder.build()); }
/** * @param file a file path * @param <T> the Java type of records to read from the file * @return an Avro reader builder * @deprecated will be removed in 2.0.0; use {@link #builder(InputFile)} instead. */ @Deprecated public static <T> Builder<T> builder(Path file) { return new Builder<T>(file); }
public Stream<GenericRecord> toStream(org.apache.hadoop.fs.Path hadoopPath) throws IOException { Filter filter = makeFilter(); ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(hadoopPath) .withFilter(filter) .withConf(getConfiguration()) .build(); return toStream(reader); }
private void initReader(Path file) { try { if (reader != null) { reader.close(); } GenericData.get().addLogicalTypeConversion(new Conversions.DecimalConversion()); this.reader = AvroParquetReader.<GenericRecord>builder(file).withDataModel(GenericData.get()).build(); } catch (IOException e) { throw new RuntimeException(e); } }
@Override public HDFSRecordReader createHDFSRecordReader(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path) throws IOException { final ParquetReader.Builder<GenericRecord> readerBuilder = AvroParquetReader.<GenericRecord>builder(path).withConf(conf); return new AvroParquetHDFSRecordReader(readerBuilder.build()); }