private ParquetWriter createParquetWriter(final ProcessContext context, final FlowFile flowFile, final OutputStream out, final Schema schema) throws IOException { NifiParquetOutputFile nifiParquetOutputFile = new NifiParquetOutputFile(out); final AvroParquetWriter.Builder<GenericRecord> parquetWriter = AvroParquetWriter .<GenericRecord>builder(nifiParquetOutputFile) .withSchema(schema); Configuration conf = new Configuration(); conf.setBoolean(AvroReadSupport.AVRO_COMPATIBILITY, true); conf.setBoolean("parquet.avro.add-list-element-records", false); conf.setBoolean("parquet.avro.write-old-list-structure", false); ParquetUtils.applyCommonConfig(parquetWriter, context, flowFile, conf, this); return parquetWriter.build(); }
public AvroParquetFileWriter(LogFilePath logFilePath, CompressionCodec codec) throws IOException { Path path = new Path(logFilePath.getLogFilePath()); LOG.debug("Creating Brand new Writer for path {}", path); CompressionCodecName codecName = CompressionCodecName .fromCompressionCodec(codec != null ? codec.getClass() : null); topic = logFilePath.getTopic(); // Not setting blockSize, pageSize, enableDictionary, and validating writer = AvroParquetWriter.builder(path) .withSchema(schemaRegistryClient.getSchema(topic)) .withCompressionCodec(codecName) .build(); }
private static <T> ParquetWriter<T> createAvroParquetWriter( String schemaString, GenericData dataModel, OutputFile out) throws IOException { final Schema schema = new Schema.Parser().parse(schemaString); return AvroParquetWriter.<T>builder(out) .withSchema(schema) .withDataModel(dataModel) .build(); }
try { writer.set(AvroParquetWriter.<GenericRecord>builder(toHadoopPath(javaPathOnDisk)) .withSchema(m.getSchema()) .build()); } catch (NullPointerException e) { throw new IllegalStateException("Are you missing Hadoop binaries?", e);
@Override protected IGenericRecordConsumer prepareRecordConsumer(Schema schema, URI uri) throws IOException { ParquetWriter<GenericRecord> writer = AvroParquetWriter.<GenericRecord>builder(toHadoopPath(uri)) .withSchema(schema) .withConf(getConfiguration()) .build(); return new IGenericRecordConsumer() { @Override public void accept(GenericRecord t) { try { writer.write(t); } catch (IOException e) { throw new UncheckedIOException(e); } } @Override public void close() throws IOException { writer.close(); } }; }
/** * Returns a ParquetWriter for Avro files. This functionality is provided mostly as a utility for tests * where we may have to create parquet files to test ingestion or dispersal. * * @param dataFilePath * @param schema * @return */ public static ParquetWriter initializeAvroWriter(final Path dataFilePath, final Schema schema, final Configuration conf) throws IOException { final ParquetWriter<Object> writer = AvroParquetWriter.builder(dataFilePath) .withSchema(schema) .withCompressionCodec(CompressionCodecName.UNCOMPRESSED) .withConf(conf) .withWriterVersion(ParquetProperties.WriterVersion.PARQUET_1_0) .build(); return writer; } }
public static <T> Builder<T> builder(OutputFile file) { return new Builder<T>(file); }
public static <T> Builder<T> builder(Path file) { return new Builder<T>(file); }
@Override public HDFSRecordWriter createHDFSRecordWriter(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path, final RecordSchema schema) throws IOException, SchemaNotFoundException { final Schema avroSchema = AvroTypeUtil.extractAvroSchema(schema); final AvroParquetWriter.Builder<GenericRecord> parquetWriter = AvroParquetWriter .<GenericRecord>builder(path) .withSchema(avroSchema); applyCommonConfig(parquetWriter, context, flowFile, conf); return new AvroParquetHDFSRecordWriter(parquetWriter.build(), avroSchema); }
@Override public HDFSRecordWriter createHDFSRecordWriter(final ProcessContext context, final FlowFile flowFile, final Configuration conf, final Path path, final RecordSchema schema) throws IOException, SchemaNotFoundException { final Schema avroSchema = AvroTypeUtil.extractAvroSchema(schema); final AvroParquetWriter.Builder<GenericRecord> parquetWriter = AvroParquetWriter .<GenericRecord>builder(path) .withSchema(avroSchema); ParquetUtils.applyCommonConfig(parquetWriter, context, flowFile, conf, this); return new AvroParquetHDFSRecordWriter(parquetWriter.build(), avroSchema); }