@Override public String convertSchema(Schema inputSchema, WorkUnitState workUnit) throws SchemaConversionException { writer = new GenericDatumWriter<GenericRecord>(inputSchema); return inputSchema.toString(); }
public AvroGenericRecordSerializer() { this.closer =Closer.create(); this.byteArrayOutputStream = new ByteArrayOutputStream(); this.out = this.closer.register(new DataOutputStream(this.byteArrayOutputStream)); this.writer = new GenericDatumWriter<GenericRecord>(); }
private static <T> AvroFactory<T> fromGeneric(ClassLoader cl, Schema schema) { checkNotNull(schema, "Unable to create an AvroSerializer with a GenericRecord type without a schema"); GenericData genericData = new GenericData(cl); return new AvroFactory<>( genericData, schema, new GenericDatumReader<>(schema, schema, genericData), new GenericDatumWriter<>(schema, genericData) ); }
public GenericAvroSchema(SchemaInfo schemaInfo) { super(schemaInfo); this.byteArrayOutputStream = new ByteArrayOutputStream(); this.encoder = EncoderFactory.get().binaryEncoder(this.byteArrayOutputStream, encoder); this.datumWriter = new GenericDatumWriter(schema); this.datumReader = new GenericDatumReader(schema); }
public Serializer(Schema schema) { try { this.writer = new GenericDatumWriter<>(schema); this.outputStream = new ByteArrayOutputStream(); this.encoder = EncoderFactory.get().jsonEncoder(schema, this.outputStream); } catch (IOException ioe) { throw new RuntimeException("Could not initialize avro json encoder."); } }
private static byte[] convertRecordToAvro(Schema schema, Map<String, Object> values) { ByteArrayOutputStream outputStream = new ByteArrayOutputStream(); GenericData.Record record = new GenericData.Record(schema); values.forEach(record::put); try (DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema))) { dataFileWriter.create(schema, outputStream); dataFileWriter.append(record); dataFileWriter.close(); } catch (IOException e) { throw new UncheckedIOException("Failed to convert to Avro.", e); } return outputStream.toByteArray(); }
/** * Creates an Avro serialization schema for the given Avro schema string. * * @param avroSchemaString Avro schema string used to serialize Flink's row to Avro's record */ public AvroRowSerializationSchema(String avroSchemaString) { Preconditions.checkNotNull(avroSchemaString, "Avro schema must not be null."); this.recordClazz = null; this.schemaString = avroSchemaString; try { this.schema = new Schema.Parser().parse(avroSchemaString); } catch (SchemaParseException e) { throw new IllegalArgumentException("Could not parse Avro schema string.", e); } this.datumWriter = new GenericDatumWriter<>(schema); this.arrayOutputStream = new ByteArrayOutputStream(); this.encoder = EncoderFactory.get().binaryEncoder(arrayOutputStream, null); }
public AvroWriter(File baseDir, int index, Map<String, Generator> generatorMap, Schema schema) throws IOException { _generatorMap = generatorMap; _avroSchema = getAvroSchema(schema); _recordWriter = new DataFileWriter<>(new GenericDatumWriter<GenericData.Record>(_avroSchema)); _recordWriter.create(_avroSchema, new File(baseDir, "part-" + index + ".avro")); }
/** * Convert a GenericRecord to a byte array. */ public static byte[] recordToByteArray(GenericRecord record) throws IOException { try (ByteArrayOutputStream out = new ByteArrayOutputStream()) { Encoder encoder = EncoderFactory.get().directBinaryEncoder(out, null); DatumWriter<GenericRecord> writer = new GenericDatumWriter<>(record.getSchema()); writer.write(record, encoder); byte[] byteArray = out.toByteArray(); return byteArray; } }
public AvroGenericRecordHDFSWriter(FileRotationPolicy policy, Path path, FSDataOutputStream stream, Schema schema) throws IOException { super(policy, path); this.out = stream; this.schema = schema; DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); avroWriter = new DataFileWriter<>(datumWriter); avroWriter.create(this.schema, this.out); }
@Override public void init(final DataFileStream<GenericRecord> reader, final String codec, final OutputStream out) throws IOException { writer = new GenericDatumWriter<>(reader.getSchema()); encoder = EncoderFactory.get().binaryEncoder(out, null); }
public FsAuditSink(Config config, ValueAuditRuntimeMetadata auditMetadata) throws IOException { this.auditDirPath = new Path(ConfigUtils.getString(config, FS_SINK_AUDIT_OUTPUT_PATH_KEY, FS_SINK_AUDIT_OUTPUT_DEFAULT_PATH)); this.fs = this.auditDirPath.getFileSystem(new Configuration()); this.auditMetadata = auditMetadata; this.auditFileOutputStream = closer.register(fs.create(getAuditFilePath())); DataFileWriter<GenericRecord> dataFileWriter = this.closer.register(new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>())); this.writer = this.closer.register(dataFileWriter.create(this.auditMetadata.getTableMetadata().getTableSchema(), this.auditFileOutputStream)); }
public AvroFileInputStream(FileStatus status) throws IOException { pos = 0; buffer = new byte[0]; GenericDatumReader<Object> reader = new GenericDatumReader<Object>(); FileContext fc = FileContext.getFileContext(new Configuration()); fileReader = DataFileReader.openReader(new AvroFSInput(fc, status.getPath()),reader); Schema schema = fileReader.getSchema(); writer = new GenericDatumWriter<Object>(schema); output = new ByteArrayOutputStream(); encoder = EncoderFactory.get().jsonEncoder(schema, output); }
@Override public void write(DataOutput out) throws IOException { // Write schema since we need it to pull the data out. (see point #1 above) String schemaString = record.getSchema().toString(false); out.writeUTF(schemaString); schemaString = fileSchema.toString(false); out.writeUTF(schemaString); recordReaderID.write(out); // Write record to byte buffer GenericDatumWriter<GenericRecord> gdw = new GenericDatumWriter<GenericRecord>(); BinaryEncoder be = EncoderFactory.get().directBinaryEncoder((DataOutputStream)out, null); gdw.setSchema(record.getSchema()); gdw.write(record, be); }
@Override public void write(Kryo kryo, Output output, GenericContainer record) { String fingerPrint = this.getFingerprint(record.getSchema()); output.writeString(fingerPrint); GenericDatumWriter<GenericContainer> writer = new GenericDatumWriter<>(record.getSchema()); BinaryEncoder encoder = EncoderFactory .get() .directBinaryEncoder(output, null); try { writer.write(record, encoder); } catch (IOException e) { throw new RuntimeException(e); } }
private static GenericData.Record buildAvroRecord(Schema schema, ByteArrayOutputStream outputStream, Map<String, Object> values) { GenericData.Record record = new GenericData.Record(schema); values.forEach(record::put); try { DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(new GenericDatumWriter<>(schema)); dataFileWriter.create(schema, outputStream); dataFileWriter.append(record); dataFileWriter.close(); } catch (IOException e) { throw new RuntimeException("Failed to convert to Avro.", e); } return record; }
AvroKeyValueWriter(Schema keySchema, Schema valueSchema, CodecFactory compressionCodec, OutputStream outputStream, int syncInterval) throws IOException { // Create the generic record schema for the key/value pair. mKeyValuePairSchema = AvroKeyValue .getSchema(keySchema, valueSchema); // Create an Avro container file and a writer to it. DatumWriter<GenericRecord> genericDatumWriter = new GenericDatumWriter<GenericRecord>( mKeyValuePairSchema); mAvroFileWriter = new DataFileWriter<GenericRecord>( genericDatumWriter); mAvroFileWriter.setCodec(compressionCodec); mAvroFileWriter.setSyncInterval(syncInterval); mAvroFileWriter.create(mKeyValuePairSchema, outputStream); // Create a reusable output record. mOutputRecord = new AvroKeyValue<Object, Object>( new GenericData.Record(mKeyValuePairSchema)); }
/** * Writes given record using specified schema. * @param record record to serialize * @param schema schema to use for serialization * @return serialized record */ public static byte[] writeRecord(GenericRecord record, Schema schema) throws IOException { ByteArrayOutputStream stream = new ByteArrayOutputStream(); BinaryEncoder encoder = EncoderFactory.get().binaryEncoder(stream, null); new GenericDatumWriter<>(schema).write(record, encoder); encoder.flush(); return stream.toByteArray(); } }
public void createAvroFileWithRepeatingRecords(File file, GenericRecord r, int count) throws IOException { DataFileWriter<GenericRecord> writer = new DataFileWriter<>(new GenericDatumWriter<GenericRecord>()); writer.create(getSchema(), new FileOutputStream(file)); for (int i = 0; i < count; ++i) { writer.append(r); } writer.close(); }
public static void writeLinesBytesFile(File dir) throws IOException { FileUtil.fullyDelete(dir); File fileLines = new File(dir + "/lines.avro"); fileLines.getParentFile().mkdirs(); DatumWriter<ByteBuffer> writer = new GenericDatumWriter<>(); try(DataFileWriter<ByteBuffer> out = new DataFileWriter<>(writer)) { out.create(Schema.create(Schema.Type.BYTES), fileLines); for (String line : LINES) { out.append(ByteBuffer.wrap(line.getBytes(StandardCharsets.UTF_8))); } } }