@Override public String getFingerprint(Schema schema) { return schema.toString(); }
@Override public String toString() { return String.format("GenericRecord(\"%s\")", schema.toString()); }
private void writeObject(ObjectOutputStream oos) throws IOException { oos.writeUTF(schema.toString()); }
private void writeObject(ObjectOutputStream oos) throws IOException { if (schema == null) { oos.writeBoolean(false); } else { oos.writeBoolean(true); oos.writeUTF(schema.toString(false)); } }
@Override public void write(Kryo kryo, Output output, Schema object) { String schemaAsString = object.toString(false); output.writeString(schemaAsString); }
private void writeObject(java.io.ObjectOutputStream out) throws IOException { out.defaultWriteObject(); if (codec != null) { out.writeByte(codec.getCodecByte()); } else { out.writeByte(-1); } if (userDefinedSchema != null) { byte[] json = userDefinedSchema.toString().getBytes(ConfigConstants.DEFAULT_CHARSET); out.writeInt(json.length); out.write(json); } else { out.writeInt(0); } }
/** * AvroGenericRecordBolt must override this method because messages with different schemas cannot be written to the * same file. By treating the complete schema as the "key" AbstractHdfsBolt will associate a different writer for * every distinct schema. */ @Override protected String getWriterKey(Tuple tuple) { Schema recordSchema = ((GenericRecord) tuple.getValue(0)).getSchema(); return recordSchema.toString(); }
@Override public void writeSnapshot(DataOutputView out) throws IOException { checkNotNull(runtimeType); checkNotNull(schema); out.writeUTF(runtimeType.getName()); out.writeUTF(schema.toString(false)); }
/** * Creates a ParquetWriterFactory that accepts and writes Avro generic types. * The Parquet writers will use the given schema to build and write the columnar data. * * @param schema The schema of the generic type. */ public static ParquetWriterFactory<GenericRecord> forGenericRecord(Schema schema) { final String schemaString = schema.toString(); final ParquetBuilder<GenericRecord> builder = (out) -> createAvroParquetWriter(schemaString, GenericData.get(), out); return new ParquetWriterFactory<>(builder); }
/** * Creates an Avro serialization schema for the given specific record class. * * @param recordClazz Avro record class used to serialize Flink's row to Avro's record */ public AvroRowSerializationSchema(Class<? extends SpecificRecord> recordClazz) { Preconditions.checkNotNull(recordClazz, "Avro record class must not be null."); this.recordClazz = recordClazz; this.schema = SpecificData.get().getSchema(recordClazz); this.schemaString = schema.toString(); this.datumWriter = new SpecificDatumWriter<>(schema); this.arrayOutputStream = new ByteArrayOutputStream(); this.encoder = EncoderFactory.get().binaryEncoder(arrayOutputStream, null); }
/** * Creates a ParquetWriterFactory for the given type. The Parquet writers will use Avro * to reflectively create a schema for the type and use that schema to write the columnar data. * * @param type The class of the type to write. */ public static <T> ParquetWriterFactory<T> forReflectRecord(Class<T> type) { final String schemaString = ReflectData.get().getSchema(type).toString(); final ParquetBuilder<T> builder = (out) -> createAvroParquetWriter(schemaString, ReflectData.get(), out); return new ParquetWriterFactory<>(builder); }
/** * Creates a ParquetWriterFactory for an Avro specific type. The Parquet writers will use the * schema of that specific type to build and write the columnar data. * * @param type The class of the type to write. */ public static <T extends SpecificRecordBase> ParquetWriterFactory<T> forSpecificRecord(Class<T> type) { final String schemaString = SpecificData.get().getSchema(type).toString(); final ParquetBuilder<T> builder = (out) -> createAvroParquetWriter(schemaString, SpecificData.get(), out); return new ParquetWriterFactory<>(builder); }
@Test public void testGenericSerializeDeserialize() throws IOException { final Tuple3<GenericRecord, Row, Schema> testData = AvroTestUtils.getGenericTestData(); final AvroRowSerializationSchema serializationSchema = new AvroRowSerializationSchema(testData.f2.toString()); final AvroRowDeserializationSchema deserializationSchema = new AvroRowDeserializationSchema(testData.f2.toString()); final byte[] bytes = serializationSchema.serialize(testData.f1); final Row actual = deserializationSchema.deserialize(bytes); assertEquals(testData.f1, actual); }
@Test public void testAvroSchemaConversion() { final String schema = User.getClassSchema().toString(true); validateUserSchema(AvroSchemaConverter.convertToTypeInfo(schema)); }
@Test public void testGenericDeserializeSeveralTimes() throws IOException { final Tuple3<GenericRecord, Row, Schema> testData = AvroTestUtils.getGenericTestData(); final AvroRowSerializationSchema serializationSchema = new AvroRowSerializationSchema(testData.f2.toString()); final AvroRowDeserializationSchema deserializationSchema = new AvroRowDeserializationSchema(testData.f2.toString()); final byte[] bytes = serializationSchema.serialize(testData.f1); deserializationSchema.deserialize(bytes); deserializationSchema.deserialize(bytes); final Row actual = deserializationSchema.deserialize(bytes); assertEquals(testData.f1, actual); }
@Test public void testGenericSerializeSeveralTimes() throws IOException { final Tuple3<GenericRecord, Row, Schema> testData = AvroTestUtils.getGenericTestData(); final AvroRowSerializationSchema serializationSchema = new AvroRowSerializationSchema(testData.f2.toString()); final AvroRowDeserializationSchema deserializationSchema = new AvroRowDeserializationSchema(testData.f2.toString()); serializationSchema.serialize(testData.f1); serializationSchema.serialize(testData.f1); final byte[] bytes = serializationSchema.serialize(testData.f1); final Row actual = deserializationSchema.deserialize(bytes); assertEquals(testData.f1, actual); }
@Test public void testSpecificSerializeDeserializeFromSchema() throws IOException { final Tuple3<Class<? extends SpecificRecord>, SpecificRecord, Row> testData = AvroTestUtils.getSpecificTestData(); final String schemaString = testData.f1.getSchema().toString(); final AvroRowSerializationSchema serializationSchema = new AvroRowSerializationSchema(schemaString); final AvroRowDeserializationSchema deserializationSchema = new AvroRowDeserializationSchema(schemaString); final byte[] bytes = serializationSchema.serialize(testData.f2); final Row actual = deserializationSchema.deserialize(bytes); assertEquals(testData.f2, actual); }
@Test public void testSerializability() throws Exception { final Tuple3<Class<? extends SpecificRecord>, SpecificRecord, Row> testData = AvroTestUtils.getSpecificTestData(); final String schemaString = testData.f1.getSchema().toString(); // from class final AvroRowSerializationSchema classSer = new AvroRowSerializationSchema(testData.f0); final AvroRowDeserializationSchema classDeser = new AvroRowDeserializationSchema(testData.f0); testSerializability(classSer, classDeser, testData.f2); // from schema string final AvroRowSerializationSchema schemaSer = new AvroRowSerializationSchema(schemaString); final AvroRowDeserializationSchema schemaDeser = new AvroRowDeserializationSchema(schemaString); testSerializability(schemaSer, schemaDeser, testData.f2); }
@Test public void testSpecificDeserializeFromSchemaSeveralTimes() throws IOException { final Tuple3<Class<? extends SpecificRecord>, SpecificRecord, Row> testData = AvroTestUtils.getSpecificTestData(); final String schemaString = testData.f1.getSchema().toString(); final AvroRowSerializationSchema serializationSchema = new AvroRowSerializationSchema(schemaString); final AvroRowDeserializationSchema deserializationSchema = new AvroRowDeserializationSchema(schemaString); final byte[] bytes = serializationSchema.serialize(testData.f2); deserializationSchema.deserialize(bytes); deserializationSchema.deserialize(bytes); final Row actual = deserializationSchema.deserialize(bytes); assertEquals(testData.f2, actual); }
@Test public void testSpecificSerializeFromSchemaSeveralTimes() throws IOException { final Tuple3<Class<? extends SpecificRecord>, SpecificRecord, Row> testData = AvroTestUtils.getSpecificTestData(); final String schemaString = testData.f1.getSchema().toString(); final AvroRowSerializationSchema serializationSchema = new AvroRowSerializationSchema(schemaString); final AvroRowDeserializationSchema deserializationSchema = new AvroRowDeserializationSchema(schemaString); serializationSchema.serialize(testData.f2); serializationSchema.serialize(testData.f2); final byte[] bytes = serializationSchema.serialize(testData.f2); final Row actual = deserializationSchema.deserialize(bytes); assertEquals(testData.f2, actual); }