/** * Helper to create Avro sink given {@link AvroCoder}. Keep in mind that configuration * object is altered to enable Avro output. */ public static <T> HDFSFileSink<T, AvroKey<T>, NullWritable> toAvro(String path, final AvroCoder<T> coder, Configuration conf) { SerializableFunction<T, KV<AvroKey<T>, NullWritable>> outputConverter = new SerializableFunction<T, KV<AvroKey<T>, NullWritable>>() { @Override public KV<AvroKey<T>, NullWritable> apply(T input) { return KV.of(new AvroKey<>(input), NullWritable.get()); } }; conf.set("avro.schema.output.key", coder.getSchema().toString()); return to( path, AvroKeyOutputFormat.class, (Class<AvroKey<T>>) (Class<?>) AvroKey.class, NullWritable.class, outputConverter).withConfiguration(conf); }
/** * Helper to read from Avro source given {@link AvroCoder}. Keep in mind that configuration * object is altered to enable Avro input. */ public static <T> HDFSFileSource<T, AvroKey<T>, NullWritable> fromAvro(String filepattern, final AvroCoder<T> coder, Configuration conf) { Class<AvroKeyInputFormat<T>> formatClass = castClass(AvroKeyInputFormat.class); SerializableFunction<KV<AvroKey<T>, NullWritable>, T> inputConverter = new SerializableFunction<KV<AvroKey<T>, NullWritable>, T>() { @Override public T apply(KV<AvroKey<T>, NullWritable> input) { try { return CoderUtils.clone(coder, input.getKey().datum()); } catch (CoderException e) { throw new RuntimeException(e); } } }; conf.set("avro.schema.input.key", coder.getSchema().toString()); return from(filepattern, formatClass, coder, inputConverter).withConfiguration(conf); }
@Override public CloudObject toCloudObject(AvroCoder target, SdkComponents sdkComponents) { CloudObject base = CloudObject.forClass(AvroCoder.class); Structs.addString(base, SCHEMA_FIELD, target.getSchema().toString()); Structs.addString(base, TYPE_FIELD, target.getType().getName()); return base; }
@Override public void encode(Object value, OutputStream outputStream) throws IOException { if (converter == null) { converter = ConvertToIndexedRecord.getConverter((T) value); } IndexedRecord ir = converter.convertToAvro((T) value); if (internalAvroCoder == null) { Schema s = converter.getSchema(); avroSchemaHolder.put(s); @SuppressWarnings("unchecked") AvroCoder<IndexedRecord> tCoder = (AvroCoder<IndexedRecord>) (AvroCoder<? extends IndexedRecord>) AvroCoder .of(ir.getSchema()); internalAvroCoder = tCoder; } LOG.debug("Internal AvroCoder's schema is {}", internalAvroCoder.getSchema()); LOG.debug("Encode value is {}", value); internalAvroCoder.encode(convertToAvro(value), outputStream); }
@Override public void encode(Object value, OutputStream outputStream) throws IOException { if (converter == null) { converter = ConvertToIndexedRecord.getConverter((T) value); } IndexedRecord ir = converter.convertToAvro((T) value); if (internalAvroCoder == null) { Schema s = converter.getSchema(); avroSchemaHolder.put(s); @SuppressWarnings("unchecked") AvroCoder<IndexedRecord> tCoder = (AvroCoder<IndexedRecord>) (AvroCoder<? extends IndexedRecord>) AvroCoder .of(ir.getSchema()); internalAvroCoder = tCoder; } LOG.debug("Internal AvroCoder's schema is {}", internalAvroCoder.getSchema()); LOG.debug("Encode value is {}", value); internalAvroCoder.encode(convertToAvro(value), outputStream); }
@Test public void testAvroCoderForGenerics() throws Exception { Schema fooSchema = AvroCoder.of(Foo.class).getSchema(); Schema schema = new Schema.Parser() .parse( "{" + "\"type\":\"record\"," + "\"name\":\"SomeGeneric\"," + "\"namespace\":\"ns\"," + "\"fields\":[" + " {\"name\":\"foo\", \"type\":" + fooSchema.toString() + "}" + "]}"); @SuppressWarnings("rawtypes") AvroCoder<SomeGeneric> coder = AvroCoder.of(SomeGeneric.class, schema); assertNonDeterministic(coder, reasonField(SomeGeneric.class, "foo", "erasure")); }
/** * Tests that a parameterized class can have an automatically generated schema if the generic * field is annotated with a union tag. */ @Test public void testGenericClassWithUnionAnnotation() throws Exception { // Cast is safe as long as the same coder is used for encoding and decoding. @SuppressWarnings({"unchecked", "rawtypes"}) AvroCoder<GenericWithAnnotation<String>> coder = (AvroCoder) AvroCoder.of(GenericWithAnnotation.class); assertThat( coder.getSchema().getField("onlySomeTypesAllowed").schema().getType(), equalTo(Schema.Type.UNION)); CoderProperties.coderDecodeEncodeEqual(coder, new GenericWithAnnotation<>("hello")); }
@Test public void testGenericRecordEncoding() throws Exception { String schemaString = "{\"namespace\": \"example.avro\",\n" + " \"type\": \"record\",\n" + " \"name\": \"User\",\n" + " \"fields\": [\n" + " {\"name\": \"name\", \"type\": \"string\"},\n" + " {\"name\": \"favorite_number\", \"type\": [\"int\", \"null\"]},\n" + " {\"name\": \"favorite_color\", \"type\": [\"string\", \"null\"]}\n" + " ]\n" + "}"; Schema schema = (new Schema.Parser()).parse(schemaString); GenericRecord before = new GenericData.Record(schema); before.put("name", "Bob"); before.put("favorite_number", 256); // Leave favorite_color null AvroCoder<GenericRecord> coder = AvroCoder.of(GenericRecord.class, schema); CoderProperties.coderDecodeEncodeEqual(coder, before); Assert.assertEquals(schema, coder.getSchema()); }
DatumWriter<T> datumWriter = coder.getType().equals(GenericRecord.class) ? new GenericDatumWriter<>(coder.getSchema()) : new ReflectDatumWriter<>(coder.getSchema()); try (DataFileWriter<T> writer = new DataFileWriter<>(datumWriter)) { writer.setCodec(CodecFactory.fromString(codec)); writer.create(coder.getSchema(), os);
for (Schema.Field field : AvroCoder.of(Bird.class).getSchema().getFields()) { Schema schema = field.schema(); if (field.name().equals("birthdayMoney")) { Schema subBirdSchema = AvroCoder.of(Bird.SubBird.class).getSchema(); GenericRecord nestedRecord = new GenericData.Record(subBirdSchema); nestedRecord.put("species", "other");
.apply(ParDo.of(new TableRowToIndexedRecordFn(defaultOutputCoder.getSchema()))) .setCoder(defaultOutputCoder);
.apply(ParDo.of(new TableRowToIndexedRecordFn(defaultOutputCoder.getSchema()))) .setCoder(defaultOutputCoder);