/** Open a new file for data matching a schema with a random sync. */ public DataFileWriter<D> create(Schema schema, OutputStream outs) throws IOException { return create(schema, outs, null); }
public AvroGenericRecordHDFSWriter(FileRotationPolicy policy, Path path, FSDataOutputStream stream, Schema schema) throws IOException { super(policy, path); this.out = stream; this.schema = schema; DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); avroWriter = new DataFileWriter<>(datumWriter); avroWriter.create(this.schema, this.out); }
private static byte[] convertRecordToAvro(Schema schema, Map<String, Object> values) { ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); GenericData.Record record = new GenericData.Record(schema); values.forEach(record::put); try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema))) { dataFileWriter.create(schema, outputStream); dataFileWriter.append(record); dataFileWriter.close(); } catch (IOException e) { throw new UncheckedIOException("Failed to convert to Avro.", e); } return outputStream.toByteArray(); }
@Override public void process(InputStream in, OutputStream out) throws IOException { try (DataFileWriter<Record> w = failureWriter .create(inputSchema, out)) { for (Record record : badRecords) { w.append(record); } } } });
public AvroWriter(File baseDir, int index, Map<String, Generator> generatorMap, Schema schema) throws IOException { _generatorMap = generatorMap; _avroSchema = getAvroSchema(schema); _recordWriter = new DataFileWriter<>(new GenericDatumWriter<GenericData.Record>(_avroSchema)); _recordWriter.create(_avroSchema, new File(baseDir, "part-" + index + ".avro")); }
private byte[] serializeToAvro(Schema avroSchema, List<Record> docList) throws IOException { serializationBuffer.reset(); dataFileWriter.create(avroSchema, serializationBuffer); for (Record doc2 : docList) { dataFileWriter.append(doc2); } dataFileWriter.close(); return serializationBuffer.toByteArray(); }
public WriteAvroResultWithSchema(final Schema schema, final OutputStream out, final CodecFactory codec) throws IOException { super(out); this.schema = schema; final GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.setCodec(codec); dataFileWriter.create(schema, out); }
public FsAuditSink(Config config, ValueAuditRuntimeMetadata auditMetadata) throws IOException { this.auditDirPath = new Path(ConfigUtils.getString(config, FS_SINK_AUDIT_OUTPUT_PATH_KEY, FS_SINK_AUDIT_OUTPUT_DEFAULT_PATH)); this.fs = this.auditDirPath.getFileSystem(new Configuration()); this.auditMetadata = auditMetadata; this.auditFileOutputStream = closer.register(fs.create(getAuditFilePath())); DataFileWriter<GenericRecord> dataFileWriter = this.closer.register(new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>())); this.writer = this.closer.register(dataFileWriter.create(this.auditMetadata.getTableMetadata().getTableSchema(), this.auditFileOutputStream)); }
@Override public void afterCreate() throws IOException { // write the AVRO container format header dataFileWriter.create(getSchema(), getOutputStream()); }
/** * Create a new {@link DataFileWriter} for writing Avro records. * * @param codecFactory a {@link CodecFactory} object for building the compression codec * @throws IOException if there is something wrong creating a new {@link DataFileWriter} */ private DataFileWriter<GenericRecord> createDataFileWriter(CodecFactory codecFactory) throws IOException { @SuppressWarnings("resource") DataFileWriter<GenericRecord> writer = new DataFileWriter<>(this.datumWriter); writer.setCodec(codecFactory); // Open the file and return the DataFileWriter return writer.create(this.schema, this.stagingFileOutputStream); }
AvroKeyValueWriter(Schema keySchema, Schema valueSchema, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create the generic record schema for the key/value pair. mKeyValuePairSchema = AvroKeyValue .getSchema(keySchema, valueSchema); // Create an Avro container file and a writer to it. DatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>( mKeyValuePairSchema); mAvroFileWriter = new DataFileWriter<GenericRecord>( genericDatumWriter); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(mKeyValuePairSchema, outputStream); // Create a reusable output record. mOutputRecord = new AvroKeyValue<Object, Object>( new GenericData.Record(mKeyValuePairSchema)); }
private static GenericData.Record buildAvroRecord(Schema schema, ByteArrayOutputStream outputStream, Map<String, Object> values) { GenericData.Record record = new GenericData.Record(schema); values.forEach(record::put); try { DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema)); dataFileWriter.create(schema, outputStream); dataFileWriter.append(record); dataFileWriter.close(); } catch (IOException e) { throw new RuntimeException("Failed to convert to Avro.", e); } return record; }
public AvroFileWriter(LogFilePath logFilePath, CompressionCodec codec) throws IOException { file = new File(logFilePath.getLogFilePath()); file.getParentFile().mkdirs(); LOG.debug("Creating Brand new Writer for path {}", logFilePath.getLogFilePath()); topic = logFilePath.getTopic(); Schema schema = schemaRegistryClient.getSchema(topic); SpecificDatumWriter specificDatumWriter= new SpecificDatumWriter(schema); writer = new DataFileWriter(specificDatumWriter); writer.setCodec(getCodecFactory(codec)); writer.create(schema, file); }
public static void createEmptyAvroStream(final OutputStream outStream) throws IOException { final FieldAssembler<Schema> builder = SchemaBuilder.record("NiFi_ExecuteSQL_Record").namespace("any.data").fields(); final Schema schema = builder.endRecord(); final DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); try (final DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter)) { dataFileWriter.create(schema, outStream); } }
private byte[] writeRecord(Schema schema, GenericData.Record record) throws Exception { ByteArrayOutputStream byteStream = new ByteArrayOutputStream(); GenericDatumWriter<GenericData.Record> datumWriter = new GenericDatumWriter<>(schema); DataFileWriter<GenericData.Record> writer = new DataFileWriter<>(datumWriter); try { writer.create(schema, byteStream); writer.append(record); } finally { writer.close(); } return byteStream.toByteArray(); }
private ByteArrayOutputStream serializeAvroRecord(Schema schema, GenericRecord user2, DatumWriter<GenericRecord> datumWriter, Map<String, String> metadata) throws IOException { ByteArrayOutputStream out2 = new ByteArrayOutputStream(); DataFileWriter<GenericRecord> dataFileWriter2 = new DataFileWriter<>(datumWriter); if (metadata != null) { metadata.forEach(dataFileWriter2::setMeta); } dataFileWriter2.create(schema, out2); dataFileWriter2.append(user2); dataFileWriter2.close(); return out2; }
public void createAvroFileWithRepeatingRecords(File file, GenericRecord r, int count) throws IOException { DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>()); writer.create(getSchema(), new FileOutputStream(file)); for (int i = 0; i < count; ++i) { writer.append(r); } writer.close(); }
@Test(expected=AvroRuntimeException.class) public void testUseMetaAfterCreate() throws IOException { DataFileWriter<?> w = new DataFileWriter<>(new GenericDatumWriter<>()); w.create(Schema.create(Type.NULL), new ByteArrayOutputStream()); w.setMeta("foo", "bar"); }
@Test() public void testUseMeta() throws IOException { DataFileWriter<?> w = new DataFileWriter<>(new GenericDatumWriter<>()); File f = new File(DIR.getRoot().getPath(), "testDataFileMeta.avro"); w.setMeta("hello", "bar"); w.create(Schema.create(Type.NULL), f); w.close(); DataFileStream<Void> r = new DataFileStream<>(new FileInputStream(f), new GenericDatumReader<>()); assertTrue(r.getMetaKeys().contains("hello")); assertEquals("bar", r.getMetaString("hello")); }
public static void writeLinesFile(File dir) throws IOException { DatumWriter<Utf8> writer = new GenericDatumWriter<>(); try(DataFileWriter<Utf8> out = new DataFileWriter<>(writer)) { out.create(Schema.create(Schema.Type.STRING), dir); for (String line : LINES) { out.append(new Utf8(line)); } } }