org.apache.beam.sdk.io.AvroIO$Read java code examples

@Test
@SuppressWarnings("unchecked")
@Category(NeedsRunner.class)
public void testWriteThenReadASingleFileWithNullCodec() throws Throwable {
 List<GenericClass> values =
   ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
 File outputFile = tmpFolder.newFile("output.avro");
 writePipeline
   .apply(Create.of(values))
   .apply(
     AvroIO.write(GenericClass.class)
       .to(outputFile.getAbsolutePath())
       .withoutSharding()
       .withCodec(CodecFactory.nullCodec()));
 writePipeline.run();
 PAssert.that(
     readPipeline.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath())))
   .containsInAnyOrder(values);
 readPipeline.run();
 try (DataFileStream dataFileStream =
   new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader())) {
  assertEquals("null", dataFileStream.getMetaString("avro.codec"));
 }
}

  readPipeline.apply(
    "read_" + prefix,
    AvroIO.readGenericRecords(schemaFromPrefix(prefix)).from(expectedFilepattern));
PAssert.that(records).containsInAnyOrder(expectedElements.get(prefix));

"Read",
AvroIO.read(GenericClass.class)
  .from(tmpFolder.getRoot().getAbsolutePath() + "/first*")
  .watchForNewFiles(
    Duration.millis(100),
    Watch.Growth.afterTimeSinceNewOutput(Duration.standardSeconds(3)))))

PAssert.that(
    readPipeline.apply(
      "Read", AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath())))
  .containsInAnyOrder(values);
PAssert.that(
      "Read withHintMatchesManyFiles",
      AvroIO.read(GenericClass.class)
        .from(outputFile.getAbsolutePath())
        .withHintMatchesManyFiles()))
  .containsInAnyOrder(values);
PAssert.that(

  "Read",
  AvroIO.read(GenericClass.class)
    .from(readPipeline.newProvider(outputFile.getAbsolutePath())))
.apply(
  MapElements.via(

/**
 * Tests that {@code AvroIO} can read an upgraded version of an old class, as long as the schema
 * resolution process succeeds. This test covers the case when a new, {@code @Nullable} field has
 * been added.
 *
 * <p>For more information, see http://avro.apache.org/docs/1.7.7/spec.html#Schema+Resolution
 */
@Test
@Category(NeedsRunner.class)
public void testWriteThenReadSchemaUpgrade() throws Throwable {
 List<GenericClass> values =
   ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
 File outputFile = tmpFolder.newFile("output.avro");
 writePipeline
   .apply(Create.of(values))
   .apply(AvroIO.write(GenericClass.class).to(outputFile.getAbsolutePath()).withoutSharding());
 writePipeline.run();
 List<GenericClassV2> expected =
   ImmutableList.of(new GenericClassV2(3, "hi", null), new GenericClassV2(5, "bar", null));
 PAssert.that(
     readPipeline.apply(
       AvroIO.read(GenericClassV2.class).from(outputFile.getAbsolutePath())))
   .containsInAnyOrder(expected);
 readPipeline.run();
}

@Test
@SuppressWarnings("unchecked")
@Category(NeedsRunner.class)
public void testCompressedWriteAndReadASingleFile() throws Throwable {
 List<GenericClass> values =
   ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
 File outputFile = tmpFolder.newFile("output.avro");
 writePipeline
   .apply(Create.of(values))
   .apply(
     AvroIO.write(GenericClass.class)
       .to(outputFile.getAbsolutePath())
       .withoutSharding()
       .withCodec(CodecFactory.deflateCodec(9)));
 writePipeline.run();
 PAssert.that(
     readPipeline.apply(AvroIO.read(GenericClass.class).from(outputFile.getAbsolutePath())))
   .containsInAnyOrder(values);
 readPipeline.run();
 try (DataFileStream dataFileStream =
   new DataFileStream(new FileInputStream(outputFile), new GenericDatumReader())) {
  assertEquals("deflate", dataFileStream.getMetaString("avro.codec"));
 }
}

@Test
@Category(NeedsRunner.class)
public void testWriteThenReadJavaClass() throws Throwable {
 List<GenericClass> values =
   ImmutableList.of(new GenericClass(3, "hi"), new GenericClass(5, "bar"));
 File outputFile = tmpFolder.newFile("output.avro");
 writePipeline
   .apply(Create.of(values))
   .apply(
     AvroIO.write(GenericClass.class)
       .to(writePipeline.newProvider(outputFile.getAbsolutePath()))
       .withoutSharding());
 writePipeline.run();
 PAssert.that(
     readPipeline.apply(
       "Read",
       AvroIO.read(GenericClass.class)
         .from(readPipeline.newProvider(outputFile.getAbsolutePath()))))
   .containsInAnyOrder(values);
 readPipeline.run();
}

@Override
public PCollection<T> expand(PBegin input) {
 checkNotNull(getFilepattern(), "filepattern");
 checkNotNull(getSchema(), "schema");
 if (getMatchConfiguration().getWatchInterval() == null && !getHintMatchesManyFiles()) {
  return input.apply(
    "Read",
    org.apache.beam.sdk.io.Read.from(
      createSource(
        getFilepattern(),
        getMatchConfiguration().getEmptyMatchTreatment(),
        getRecordClass(),
        getSchema())));
 }
 // All other cases go through ReadAll.
 ReadAll<T> readAll =
   (getRecordClass() == GenericRecord.class)
     ? (ReadAll<T>) readAllGenericRecords(getSchema())
     : readAll(getRecordClass());
 readAll = readAll.withMatchConfiguration(getMatchConfiguration());
 return input
   .apply("Create filepattern", Create.ofProvider(getFilepattern(), StringUtf8Coder.of()))
   .apply("Via ReadAll", readAll);
}

private <T extends GenericRecord> void testWriteThenReadGeneratedClass(
  AvroIO.Write<T> writeTransform, AvroIO.Read<T> readTransform) throws Exception {
 File outputFile = tmpFolder.newFile("output.avro");
 List<T> values =
   ImmutableList.of(
     (T) new AvroGeneratedUser("Bob", 256, null),
     (T) new AvroGeneratedUser("Alice", 128, null),
     (T) new AvroGeneratedUser("Ted", null, "white"));
 writePipeline
   .apply(Create.of(values))
   .apply(
     writeTransform
       .to(writePipeline.newProvider(outputFile.getAbsolutePath()))
       .withoutSharding());
 writePipeline.run();
 PAssert.that(
     readPipeline.apply(
       "Read", readTransform.from(readPipeline.newProvider(outputFile.getAbsolutePath()))))
   .containsInAnyOrder(values);
 readPipeline.run();
}

/** Like {@link #from(ValueProvider)}. */
public Read<T> from(String filepattern) {
 return from(StaticValueProvider.of(filepattern));
}

/**
 * Continuously watches for new files matching the filepattern, polling it at the given
 * interval, until the given termination condition is reached. The returned {@link PCollection}
 * is unbounded.
 *
 * <p>This works only in runners supporting {@link Kind#SPLITTABLE_DO_FN}.
 */
@Experimental(Kind.SPLITTABLE_DO_FN)
public Read<T> watchForNewFiles(
  Duration pollInterval, TerminationCondition<String, ?> terminationCondition) {
 return withMatchConfiguration(
   getMatchConfiguration().continuously(pollInterval, terminationCondition));
}

/** Configures whether or not a filepattern matching no files is allowed. */
public Read<T> withEmptyMatchTreatment(EmptyMatchTreatment treatment) {
 return withMatchConfiguration(getMatchConfiguration().withEmptyMatchTreatment(treatment));
}

@Override
public void populateDisplayData(DisplayData.Builder builder) {
 super.populateDisplayData(builder);
 builder
   .addIfNotNull(
     DisplayData.item("filePattern", getFilepattern()).withLabel("Input File Pattern"))
   .include("matchConfiguration", getMatchConfiguration());
}

public static PipelineResult run(Options options) {
 Pipeline pipeline = Pipeline.create(options);
 BigtableIO.Write write =
   BigtableIO.write()
     .withProjectId(options.getBigtableProjectId())
     .withInstanceId(options.getBigtableInstanceId())
     .withTableId(options.getBigtableTableId());
 pipeline
   .apply("Read from Avro", AvroIO.read(BigtableRow.class).from(options.getInputFilePattern()))
   .apply("Transform to Bigtable", MapElements.via(new AvroToBigtableFn()))
   .apply("Write to Bigtable", write);
 return pipeline.run();
}

@Test
public void testAvroIOGetName() {
 assertEquals("AvroIO.Read", AvroIO.read(String.class).from("/tmp/foo*/baz").getName());
 assertEquals("AvroIO.Write", AvroIO.write(String.class).to("/tmp/foo/baz").getName());
}

@Test
@Category(ValidatesRunner.class)
public void testPrimitiveReadDisplayData() {
 DisplayDataEvaluator evaluator = DisplayDataEvaluator.create();
 AvroIO.Read<GenericRecord> read =
   AvroIO.readGenericRecords(Schema.create(Schema.Type.STRING)).from("/foo.*");
 Set<DisplayData> displayData = evaluator.displayDataForPrimitiveSourceTransforms(read);
 assertThat(
   "AvroIO.Read should include the file pattern in its primitive transform",
   displayData,
   hasItem(hasDisplayItem("filePattern")));
}

@Test
public void testReadDisplayData() {
 AvroIO.Read<String> read = AvroIO.read(String.class).from("/foo.*");
 DisplayData displayData = DisplayData.from(read);
 assertThat(displayData, hasDisplayItem("filePattern", "/foo.*"));
}

public static void runAvroToCsv(SampleOptions options)
  throws IOException, IllegalArgumentException {
 FileSystems.setDefaultPipelineOptions(options);
 // Get Avro Schema
 String schemaJson = getSchema(options.getAvroSchema());
 Schema schema = new Schema.Parser().parse(schemaJson);
 // Check schema field types before starting the Dataflow job
 checkFieldTypes(schema);
 // Create the Pipeline object with the options we defined above.
 Pipeline pipeline = Pipeline.create(options);
 // Convert Avro To CSV
 pipeline.apply("Read Avro files",
   AvroIO.readGenericRecords(schemaJson).from(options.getInputFile()))
   .apply("Convert Avro to CSV formatted data",
     ParDo.of(new ConvertAvroToCsv(schemaJson, options.getCsvDelimiter())))
   .apply("Write CSV formatted data", TextIO.write().to(options.getOutput())
     .withSuffix(".csv"));
 // Run the pipeline.
 pipeline.run().waitUntilFinish();
}

Javadoc

Implementation of #read and #readGenericRecords.

Most used methods

from
Reads from the given filename or filepattern.If it is known that the filepattern will match a very l
createSource
getFilepattern
getHintMatchesManyFiles
getMatchConfiguration
getName
getRecordClass
getSchema
toBuilder
watchForNewFiles
Continuously watches for new files matching the filepattern, polling it at the given interval, until
withHintMatchesManyFiles
Hints that the filepattern specified in #from(String) matches a very large number of files.This hint
withMatchConfiguration
Sets the MatchConfiguration.

Popular in Java

Reactive rest calls using spring rest template
getSharedPreferences (Context)
getResourceAsStream (ClassLoader)
notifyDataSetChanged (ArrayAdapter)
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
ExecutorService (java.util.concurrent)
An Executor that provides methods to manage termination and methods that can produce a Future for tr
DateTimeFormat (org.joda.time.format)
Factory that creates instances of DateTimeFormatter from patterns and styles. Datetime formatting i
BoxLayout (javax.swing)
Table (org.hibernate.mapping)
A relational table
Reflections (org.reflections)
Reflections one-stop-shop objectReflections scans your classpath, indexes the metadata, allows you t
Top 12 Jupyter Notebook extensions

How to useAvroIO$Read in org.apache.beam.sdk.io

Best Java code snippets using org.apache.beam.sdk.io.AvroIO$Read (Showing top 19 results out of 315)

How to use
AvroIO$Read
in
org.apache.beam.sdk.io