private Coder<T> getOutputCoder() { if (parseFn == null) { return AvroCoder.of((Class<T>) type, internOrParseSchemaString(readerSchemaString)); } else { return outputCoder; } }
@Override public void encode(Object value, OutputStream outputStream) throws IOException { if (converter == null) { converter = ConvertToIndexedRecord.getConverter((T) value); } IndexedRecord ir = converter.convertToAvro((T) value); if (internalAvroCoder == null) { Schema s = converter.getSchema(); avroSchemaHolder.put(s); @SuppressWarnings("unchecked") AvroCoder<IndexedRecord> tCoder = (AvroCoder<IndexedRecord>) (AvroCoder<? extends IndexedRecord>) AvroCoder .of(ir.getSchema()); internalAvroCoder = tCoder; } LOG.debug("Internal AvroCoder's schema is {}", internalAvroCoder.getSchema()); LOG.debug("Encode value is {}", value); internalAvroCoder.encode(convertToAvro(value), outputStream); }
@Override public T decode(InputStream inputStream) throws CoderException, IOException { if (internalAvroCoder == null) { @SuppressWarnings("unchecked") AvroCoder<IndexedRecord> tCoder = (AvroCoder<IndexedRecord>) (AvroCoder<? extends IndexedRecord>) AvroCoder .of(getSchema()); internalAvroCoder = tCoder; } return (T) internalAvroCoder.decode(inputStream); }
@Test public void testAvroCoderForGenerics() throws Exception { Schema fooSchema = AvroCoder.of(Foo.class).getSchema(); Schema schema = new Schema.Parser() .parse( "{" + "\"type\":\"record\"," + "\"name\":\"SomeGeneric\"," + "\"namespace\":\"ns\"," + "\"fields\":[" + " {\"name\":\"foo\", \"type\":" + fooSchema.toString() + "}" + "]}"); @SuppressWarnings("rawtypes") AvroCoder<SomeGeneric> coder = AvroCoder.of(SomeGeneric.class, schema); assertNonDeterministic(coder, reasonField(SomeGeneric.class, "foo", "erasure")); }
@Test public void testEncodingNotBuffered() throws Exception { // This test ensures that the coder doesn't read ahead and buffer data. // Reading ahead causes a problem if the stream consists of records of different // types. Pojo before = new Pojo("Hello", 42); AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class); SerializableCoder<Integer> intCoder = SerializableCoder.of(Integer.class); ByteArrayOutputStream outStream = new ByteArrayOutputStream(); Context context = Context.NESTED; coder.encode(before, outStream, context); intCoder.encode(10, outStream, context); ByteArrayInputStream inStream = new ByteArrayInputStream(outStream.toByteArray()); Pojo after = coder.decode(inStream, context); Assert.assertEquals(before, after); Integer intAfter = intCoder.decode(inStream, context); Assert.assertEquals(Integer.valueOf(10), intAfter); }
@Test public void testAvroCoderTreeMapDeterminism() throws Exception, NonDeterministicException { TreeMapField size1 = new TreeMapField(); TreeMapField size2 = new TreeMapField(); // Different order for entries size1.field.put("hello", "world"); size1.field.put("another", "entry"); size2.field.put("another", "entry"); size2.field.put("hello", "world"); AvroCoder<TreeMapField> coder = AvroCoder.of(TreeMapField.class); coder.verifyDeterministic(); ByteArrayOutputStream outStream1 = new ByteArrayOutputStream(); ByteArrayOutputStream outStream2 = new ByteArrayOutputStream(); Context context = Context.NESTED; coder.encode(size1, outStream1, context); coder.encode(size2, outStream2, context); assertTrue(Arrays.equals(outStream1.toByteArray(), outStream2.toByteArray())); }
@Test public void testEncodedTypeDescriptor() throws Exception { AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class); assertThat(coder.getEncodedTypeDescriptor(), equalTo(TypeDescriptor.of(Pojo.class))); }
/** * Helper to create Avro sink given {@link AvroCoder}. Keep in mind that configuration * object is altered to enable Avro output. */ public static <T> HDFSFileSink<T, AvroKey<T>, NullWritable> toAvro(String path, final AvroCoder<T> coder, Configuration conf) { SerializableFunction<T, KV<AvroKey<T>, NullWritable>> outputConverter = new SerializableFunction<T, KV<AvroKey<T>, NullWritable>>() { @Override public KV<AvroKey<T>, NullWritable> apply(T input) { return KV.of(new AvroKey<>(input), NullWritable.get()); } }; conf.set("avro.schema.output.key", coder.getSchema().toString()); return to( path, AvroKeyOutputFormat.class, (Class<AvroKey<T>>) (Class<?>) AvroKey.class, NullWritable.class, outputConverter).withConfiguration(conf); }
@Override public T deserialize(byte[] message) throws IOException { return coder.decode(new ByteArrayInputStream(message), Coder.Context.NESTED); }
@Override public byte[] serialize(T element) { if (out == null) { out = new ByteArrayOutputStream(); } try { out.reset(); coder.encode(element, out, Coder.Context.NESTED); } catch (IOException e) { throw new RuntimeException("Avro encoding failed.", e); } return out.toByteArray(); }
this.encoder = new EmptyOnDeserializationThreadLocal<>(); this.reflectData = Suppliers.memoize(new SerializableReflectDataSupplier(getType()));
/** * Tests that a parameterized class can have an automatically generated schema if the generic * field is annotated with a union tag. */ @Test public void testGenericClassWithUnionAnnotation() throws Exception { // Cast is safe as long as the same coder is used for encoding and decoding. @SuppressWarnings({"unchecked", "rawtypes"}) AvroCoder<GenericWithAnnotation<String>> coder = (AvroCoder) AvroCoder.of(GenericWithAnnotation.class); assertThat( coder.getSchema().getField("onlySomeTypesAllowed").schema().getType(), equalTo(Schema.Type.UNION)); CoderProperties.coderDecodeEncodeEqual(coder, new GenericWithAnnotation<>("hello")); }
/** * Helper to read from Avro source given {@link AvroCoder}. Keep in mind that configuration * object is altered to enable Avro input. */ public static <T> HDFSFileSource<T, AvroKey<T>, NullWritable> fromAvro(String filepattern, final AvroCoder<T> coder, Configuration conf) { Class<AvroKeyInputFormat<T>> formatClass = castClass(AvroKeyInputFormat.class); SerializableFunction<KV<AvroKey<T>, NullWritable>, T> inputConverter = new SerializableFunction<KV<AvroKey<T>, NullWritable>, T>() { @Override public T apply(KV<AvroKey<T>, NullWritable> input) { try { return CoderUtils.clone(coder, input.getKey().datum()); } catch (CoderException e) { throw new RuntimeException(e); } } }; conf.set("avro.schema.input.key", coder.getSchema().toString()); return from(filepattern, formatClass, coder, inputConverter).withConfiguration(conf); }
@Override public Coder<CountingSource.CounterMark> getCheckpointMarkCoder() { return AvroCoder.of(CountingSource.CounterMark.class); }
@Test public void testGenericRecordEncoding() throws Exception { String schemaString = "{\"namespace\": \"example.avro\",\n" + " \"type\": \"record\",\n" + " \"name\": \"User\",\n" + " \"fields\": [\n" + " {\"name\": \"name\", \"type\": \"string\"},\n" + " {\"name\": \"favorite_number\", \"type\": [\"int\", \"null\"]},\n" + " {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n" + " ]\n" + "}"; Schema schema = (new Schema.Parser()).parse(schemaString); GenericRecord before = new GenericData.Record(schema); before.put("name", "Bob"); before.put("favorite_number", 256); // Leave favorite_color null AvroCoder<GenericRecord> coder = AvroCoder.of(GenericRecord.class, schema); CoderProperties.coderDecodeEncodeEqual(coder, before); Assert.assertEquals(schema, coder.getSchema()); }
@Override public void encode(Object value, OutputStream outputStream) throws IOException { if (converter == null) { converter = ConvertToIndexedRecord.getConverter((T) value); } IndexedRecord ir = converter.convertToAvro((T) value); if (internalAvroCoder == null) { Schema s = converter.getSchema(); avroSchemaHolder.put(s); @SuppressWarnings("unchecked") AvroCoder<IndexedRecord> tCoder = (AvroCoder<IndexedRecord>) (AvroCoder<? extends IndexedRecord>) AvroCoder .of(ir.getSchema()); internalAvroCoder = tCoder; } LOG.debug("Internal AvroCoder's schema is {}", internalAvroCoder.getSchema()); LOG.debug("Encode value is {}", value); internalAvroCoder.encode(convertToAvro(value), outputStream); }
@Override public T decode(InputStream inputStream) throws CoderException, IOException { if (internalAvroCoder == null) { @SuppressWarnings("unchecked") AvroCoder<IndexedRecord> tCoder = (AvroCoder<IndexedRecord>) (AvroCoder<? extends IndexedRecord>) AvroCoder .of(getSchema()); internalAvroCoder = tCoder; } return (T) internalAvroCoder.decode(inputStream); }