@Test @SuppressWarnings("unchecked") @Category(NeedsRunner.class) public void testMetadata() throws Exception { List<GenericClass> values = ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar")); File outputFile = tmpFolder.newFile("output.avro"); writePipeline .apply(Create.of(values)) .apply( AvroIO.write(GenericClass.class) .to(outputFile.getAbsolutePath()) .withoutSharding() .withMetadata( ImmutableMap.of( "stringKey", "stringValue", "longKey", 100L, "bytesKey", "bytesValue".getBytes(Charsets.UTF_8)))); writePipeline.run(); try (DataFileStream dataFileStream = new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader())) { assertEquals("stringValue", dataFileStream.getMetaString("stringKey")); assertEquals(100L, dataFileStream.getMetaLong("longKey")); assertArrayEquals("bytesValue".getBytes(Charsets.UTF_8), dataFileStream.getMeta("bytesKey")); } }
@Test @SuppressWarnings("unchecked") @Category(NeedsRunner.class) public void testCompressedWriteAndReadASingleFile() throws Throwable { List<GenericClass> values = ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar")); File outputFile = tmpFolder.newFile("output.avro"); writePipeline .apply(Create.of(values)) .apply( AvroIO.write(GenericClass.class) .to(outputFile.getAbsolutePath()) .withoutSharding() .withCodec(CodecFactory.deflateCodec(9))); writePipeline.run(); PAssert.that( readPipeline.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath()))) .containsInAnyOrder(values); readPipeline.run(); try (DataFileStream dataFileStream = new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader())) { assertEquals("deflate", dataFileStream.getMetaString("avro.codec")); } }
@Test @SuppressWarnings("unchecked") @Category(NeedsRunner.class) public void testWriteThenReadASingleFileWithNullCodec() throws Throwable { List<GenericClass> values = ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar")); File outputFile = tmpFolder.newFile("output.avro"); writePipeline .apply(Create.of(values)) .apply( AvroIO.write(GenericClass.class) .to(outputFile.getAbsolutePath()) .withoutSharding() .withCodec(CodecFactory.nullCodec())); writePipeline.run(); PAssert.that( readPipeline.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath()))) .containsInAnyOrder(values); readPipeline.run(); try (DataFileStream dataFileStream = new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader())) { assertEquals("null", dataFileStream.getMetaString("avro.codec")); } }
.apply(AvroIO.write(GenericClass.class).to(outputFile.getAbsolutePath()).withoutSharding()); writePipeline.run();
/** * Tests that {@code AvroIO} can read an upgraded version of an old class, as long as the schema * resolution process succeeds. This test covers the case when a new, {@code @Nullable} field has * been added. * * <p>For more information, see http://avro.apache.org/docs/1.7.7/spec.html#Schema+Resolution */ @Test @Category(NeedsRunner.class) public void testWriteThenReadSchemaUpgrade() throws Throwable { List<GenericClass> values = ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar")); File outputFile = tmpFolder.newFile("output.avro"); writePipeline .apply(Create.of(values)) .apply(AvroIO.write(GenericClass.class).to(outputFile.getAbsolutePath()).withoutSharding()); writePipeline.run(); List<GenericClassV2> expected = ImmutableList.of(new GenericClassV2(3, "hi", null), new GenericClassV2(5, "bar", null)); PAssert.that( readPipeline.apply( AvroIO.read(GenericClassV2.class).from(outputFile.getAbsolutePath()))) .containsInAnyOrder(expected); readPipeline.run(); }
@Test @Category(NeedsRunner.class) public void testWriteThenReadJavaClass() throws Throwable { List<GenericClass> values = ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar")); File outputFile = tmpFolder.newFile("output.avro"); writePipeline .apply(Create.of(values)) .apply( AvroIO.write(GenericClass.class) .to(writePipeline.newProvider(outputFile.getAbsolutePath())) .withoutSharding()); writePipeline.run(); PAssert.that( readPipeline.apply( "Read", AvroIO.read(GenericClass.class) .from(readPipeline.newProvider(outputFile.getAbsolutePath())))) .containsInAnyOrder(values); readPipeline.run(); }
private <T extends GenericRecord> void testWriteThenReadGeneratedClass( AvroIO.Write<T> writeTransform, AvroIO.Read<T> readTransform) throws Exception { File outputFile = tmpFolder.newFile("output.avro"); List<T> values = ImmutableList.of( (T) new AvroGeneratedUser("Bob", 256, null), (T) new AvroGeneratedUser("Alice", 128, null), (T) new AvroGeneratedUser("Ted", null, "white")); writePipeline .apply(Create.of(values)) .apply( writeTransform .to(writePipeline.newProvider(outputFile.getAbsolutePath())) .withoutSharding()); writePipeline.run(); PAssert.that( readPipeline.apply( "Read", readTransform.from(readPipeline.newProvider(outputFile.getAbsolutePath())))) .containsInAnyOrder(values); readPipeline.run(); }
@SuppressWarnings("deprecation") // using AvroCoder#createDatumReader for tests. private void runTestWrite(String[] expectedElements, int numShards) throws IOException { File baseOutputFile = new File(tmpFolder.getRoot(), "prefix"); String outputFilePrefix = baseOutputFile.getAbsolutePath(); AvroIO.Write<String> write = AvroIO.write(String.class).to(outputFilePrefix).withSuffix(".avro"); if (numShards > 1) { write = write.withNumShards(numShards); } else { write = write.withoutSharding(); } writePipeline.apply(Create.of(ImmutableList.copyOf(expectedElements))).apply(write); writePipeline.run(); String shardNameTemplate = firstNonNull( write.inner.getShardTemplate(), DefaultFilenamePolicy.DEFAULT_UNWINDOWED_SHARD_TEMPLATE); assertTestOutputs(expectedElements, numShards, outputFilePrefix, shardNameTemplate); }