private Coder<T> getOutputCoder() { if (parseFn == null) { return AvroCoder.of((Class<T>) type, internOrParseSchemaString(readerSchemaString)); } else { return outputCoder; } }
@Override public Coder<CountingSource.CounterMark> getCheckpointMarkCoder() { return AvroCoder.of(CountingSource.CounterMark.class); }
AvroSerializationDeserializationSchema(Class<T> clazz) { this.avroType = clazz; this.coder = AvroCoder.of(clazz); this.out = new ByteArrayOutputStream(); }
public static void runCsvToAvro(SampleOptions options) throws IOException, IllegalArgumentException { FileSystems.setDefaultPipelineOptions(options); // Get Avro Schema String schemaJson = getSchema(options.getAvroSchema()); Schema schema = new Schema.Parser().parse(schemaJson); // Check schema field types before starting the Dataflow job checkFieldTypes(schema); // Create the Pipeline object with the options we defined above. Pipeline pipeline = Pipeline.create(options); // Convert CSV to Avro pipeline.apply("Read CSV files", TextIO.read().from(options.getInputFile())) .apply("Convert CSV to Avro formatted data", ParDo.of(new ConvertCsvToAvro(schemaJson, options.getCsvDelimiter()))) .setCoder(AvroCoder.of(GenericRecord.class, schema)) .apply("Write Avro formatted data", AvroIO.writeGenericRecords(schemaJson) .to(options.getOutput()).withCodec(CodecFactory.snappyCodec()).withSuffix(".avro")); // Run the pipeline. pipeline.run().waitUntilFinish(); }
@Override public <T> Coder<T> coderFor( TypeDescriptor<T> typeDescriptor, List<? extends Coder<?>> componentCoders) throws CannotProvideCoderException { try { return AvroCoder.of(typeDescriptor); } catch (AvroRuntimeException e) { throw new CannotProvideCoderException( String.format("%s is not compatible with Avro", typeDescriptor), e); } } }
@Override public Coder<IndexedRecord> getDefaultOutputCoder() { return AvroCoder.of(IndexedRecord.class, spec.getSchema()); } }
/** * Helper to read from Avro source given {@link Schema}. Keep in mind that configuration * object is altered to enable Avro input. */ public static HDFSFileSource<GenericRecord, AvroKey<GenericRecord>, NullWritable> fromAvro(String filepattern, Schema schema, Configuration conf) { return fromAvro(filepattern, AvroCoder.of(schema), conf); }
@Override public T decode(InputStream inputStream) throws CoderException, IOException { if (internalAvroCoder == null) { @SuppressWarnings("unchecked") AvroCoder<IndexedRecord> tCoder = (AvroCoder<IndexedRecord>) (AvroCoder<? extends IndexedRecord>) AvroCoder .of(getSchema()); internalAvroCoder = tCoder; } return (T) internalAvroCoder.decode(inputStream); }
public Coder<List> getAccumulatorCoder() { AvroCoder valueCoder = null; if (avroSchemaStr != null) { valueCoder = AvroCoder.of(new Schema.Parser().parse(avroSchemaStr)); } return (Coder<List>) (avroSchemaStr == null ? ListCoder.of(NullableCoder.of(StringUtf8Coder.of())) : ListCoder.of(NullableCoder.of(valueCoder))); } }
@Test public void testDeterministicInteger() { assertDeterministic(AvroCoder.of(Integer.class)); }
@Test public void testDeterminismCollectionValue() { assertNonDeterministic( AvroCoder.of(OrderedSetOfNonDetValues.class), reasonField(UnorderedMapClass.class, "mapField", "may not be deterministically ordered")); assertNonDeterministic( AvroCoder.of(ListOfNonDetValues.class), reasonField(UnorderedMapClass.class, "mapField", "may not be deterministically ordered")); }
@Test public void testDeterministicUnorderedMap() { assertNonDeterministic( AvroCoder.of(UnorderedMapClass.class), reasonField( UnorderedMapClass.class, "mapField", "java.util.Map<java.lang.String, java.lang.String> " + "may not be deterministically ordered")); }
@Test public void testDeterminismUnorderedMap() { // LinkedHashMap is not deterministically ordered, so we should fail. assertNonDeterministic( AvroCoder.of(LinkedHashMapField.class), reasonField( LinkedHashMapField.class, "nonDeterministicMap", "java.util.LinkedHashMap<java.lang.String, java.lang.String> " + "may not be deterministically ordered")); }
@Test public void testDeterminismHasCustomSchema() { assertNonDeterministic( AvroCoder.of(HasCustomSchema.class), reasonField( HasCustomSchema.class, "withCustomSchema", "Custom schemas are only supported for subtypes of IndexedRecord.")); }
@Test public void testEncodedTypeDescriptor() throws Exception { AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class); assertThat(coder.getEncodedTypeDescriptor(), equalTo(TypeDescriptor.of(Pojo.class))); }
@Test public void testPojoEncoding() throws Exception { Pojo value = new Pojo("Hello", 42); AvroCoder<Pojo> coder = AvroCoder.of(Pojo.class); CoderProperties.coderDecodeEncodeEqual(coder, value); }
@Test public void testDeterministicNonDeterministicChild() { // Super class has non deterministic fields. assertNonDeterministic( AvroCoder.of(SubclassOfUnorderedMapClass.class), reasonField(UnorderedMapClass.class, "mapField", "may not be deterministically ordered")); }
@Test public void testDeterminismCollection() { assertNonDeterministic( AvroCoder.of(StringCollection.class), reasonField( StringCollection.class, "stringCollection", "java.util.Collection<java.lang.String> may not be deterministically ordered")); }
@Override protected KV<AvroKey, NullWritable> nextPair() throws IOException, InterruptedException { // Not only is the AvroKey reused by the file format, but the underlying GenericRecord is as well. KV<AvroKey, NullWritable> kv = super.nextPair(); GenericRecord gr = (GenericRecord) kv.getKey().datum(); gr = CoderUtils.clone(AvroCoder.of(gr.getSchema()), gr); return KV.of(new AvroKey(gr), kv.getValue()); }
@Test public void testPerKeyOutputCoder() { p.enableAbandonedNodeEnforcement(false); KvCoder<String, Long> inputCoder = KvCoder.of(AvroCoder.of(String.class), AvroCoder.of(Long.class)); PCollection<KV<String, Long>> output = p.apply(Create.of(KV.of("foo", 1L)).withCoder(inputCoder)).apply(Latest.perKey()); assertEquals("Should use input coder for outputs", inputCoder, output.getCoder()); }