@Override public HDFSRecordReader createHDFSRecordReader(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path) throws IOException { final ParquetReader.Builder<GenericRecord> readerBuilder = AvroParquetReader.<GenericRecord>builder(path).withConf(conf); return new AvroParquetHDFSRecordReader(readerBuilder.build()); }
public AvroParquetFileReader(LogFilePath logFilePath, CompressionCodec codec) throws IOException { Path path = new Path(logFilePath.getLogFilePath()); String topic = logFilePath.getTopic(); Schema schema = schemaRegistryClient.getSchema(topic); reader = AvroParquetReader.<GenericRecord>builder(path).build(); writer = new SpecificDatumWriter(schema); offset = logFilePath.getOffset(); }
@Override public HDFSRecordReader createHDFSRecordReader(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path) throws IOException { final ParquetReader.Builder<GenericRecord> readerBuilder = AvroParquetReader.<GenericRecord>builder(path).withConf(conf); return new AvroParquetHDFSRecordReader(readerBuilder.build()); }
AvroReadSupport.setRequestedProjection(conf, readSchema); Set<String> rowKeys = new HashSet<>(); try (ParquetReader reader = AvroParquetReader.builder(filePath).withConf(conf).build()) { Object obj = reader.read(); while (obj != null) {
public Stream<GenericRecord> toStream(Path hadoopPath) throws IOException { Filter filter = makeFilter(); ParquetReader<GenericRecord> reader; try { reader = AvroParquetReader.<GenericRecord>builder(hadoopPath) .withFilter(filter) .withConf(getConfiguration()) .build(); } catch (IOException e) { // Default exception may not refer the input path throw new IOException("Issue on path: " + hadoopPath, e); } catch (RuntimeException e) { // Default exception may not refer the input path throw new IOException("Issue on path: " + hadoopPath, e); } return toStream(reader); }
private ParquetReader<GenericRecord> initReader() throws IOException { Configuration configuration = getFs().getConf(); if (this.schema != null) { AvroReadSupport.setAvroReadSchema(configuration, this.schema); } if (this.projection != null) { AvroReadSupport.setRequestedProjection(configuration, this.projection); } ParquetReader reader = AvroParquetReader.<GenericRecord>builder(getFilePath()) .withConf(configuration).build(); return reader; }
/** * NOTE: This literally reads the entire file contents, thus should be used with caution. */ public static List<GenericRecord> readAvroRecords(Configuration configuration, Path filePath) { ParquetReader reader = null; List<GenericRecord> records = new ArrayList<>(); try { reader = AvroParquetReader.builder(filePath).withConf(configuration).build(); Object obj = reader.read(); while (obj != null) { if (obj instanceof GenericRecord) { records.add(((GenericRecord) obj)); } obj = reader.read(); } } catch (IOException e) { throw new HoodieIOException("Failed to read avro records from Parquet " + filePath, e); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { // ignore } } } return records; }
public Stream<GenericRecord> toStream(org.apache.hadoop.fs.Path hadoopPath) throws IOException { Filter filter = makeFilter(); ParquetReader<GenericRecord> reader = AvroParquetReader.<GenericRecord>builder(hadoopPath) .withFilter(filter) .withConf(getConfiguration()) .build(); return toStream(reader); }
private void initReader(Path file) { try { if (reader != null) { reader.close(); } GenericData.get().addLogicalTypeConversion(new Conversions.DecimalConversion()); this.reader = AvroParquetReader.<GenericRecord>builder(file).withDataModel(GenericData.get()).build(); } catch (IOException e) { throw new RuntimeException(e); } }
AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema()); BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null; try (ParquetReader<IndexedRecord> reader = AvroParquetReader.<IndexedRecord>builder(upsertHandle.getOldFilePath()) .withConf(getHadoopConf()).build()) { wrapper = new SparkBoundedInMemoryExecutor(config, new ParquetReaderIterator(reader),
} else { AvroReadSupport.setAvroReadSchema(getHadoopConf(), upsertHandle.getSchema()); ParquetReader<IndexedRecord> reader = AvroParquetReader.builder(upsertHandle.getOldFilePath()) .withConf(getHadoopConf()).build(); BoundedInMemoryExecutor<GenericRecord, GenericRecord, Void> wrapper = null;