eventsInIndia.apply("WriteEventsInIndia", TextIO.write().to(options.getOutput() + "india"));
@Test @Category(NeedsRunner.class) public void testReadWatchForNewFiles() throws IOException, InterruptedException { final Path basePath = tempFolder.getRoot().toPath().resolve("readWatch"); basePath.toFile().mkdir(); p.apply(GenerateSequence.from(0).to(10).withRate(1, Duration.millis(100))) .apply( Window.<Long>into(FixedWindows.of(Duration.millis(150))) .withAllowedLateness(Duration.ZERO) .triggering(Repeatedly.forever(AfterPane.elementCountAtLeast(1))) .discardingFiredPanes()) .apply(ToString.elements()) .apply( TextIO.write() .to(basePath.resolve("data").toString()) .withNumShards(1) .withWindowedWrites()); PCollection<String> lines = p.apply( TextIO.read() .from(basePath.resolve("*").toString()) .watchForNewFiles( Duration.millis(100), Watch.Growth.afterTimeSinceNewOutput(Duration.standardSeconds(3)))); PAssert.that(lines).containsInAnyOrder("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"); p.run(); } }
public static void runAvroToCsv(SampleOptions options) throws IOException, IllegalArgumentException { FileSystems.setDefaultPipelineOptions(options); // Get Avro Schema String schemaJson = getSchema(options.getAvroSchema()); Schema schema = new Schema.Parser().parse(schemaJson); // Check schema field types before starting the Dataflow job checkFieldTypes(schema); // Create the Pipeline object with the options we defined above. Pipeline pipeline = Pipeline.create(options); // Convert Avro To CSV pipeline.apply("Read Avro files", AvroIO.readGenericRecords(schemaJson).from(options.getInputFile())) .apply("Convert Avro to CSV formatted data", ParDo.of(new ConvertAvroToCsv(schemaJson, options.getCsvDelimiter()))) .apply("Write CSV formatted data", TextIO.write().to(options.getOutput()) .withSuffix(".csv")); // Run the pipeline. pipeline.run().waitUntilFinish(); }
format.apply(TextIO.write().to(resultPath));
format.apply(TextIO.write().to(resultPath));
result.apply("WriteOutput", TextIO.write().to(pipelineOptions.getOutput()).withoutSharding());
result.apply("WriteOutput", TextIO.write().to(pipelineOptions.getOutput()).withoutSharding());
.apply(TextIO.write().to(options.getOutput()).withWindowedWrites().withNumShards(1));
"WriteErrorFile", TextIO.write() .to(options.getOutputFailureFile()) .withHeader("Filename,Error") .withoutSharding());
.apply( "Write Output Result", TextIO.write() .to(pipelineOptions.getOutput() + "-result") .withoutSharding());
.apply(TextIO.write().to(options.getOutput()));
format.apply(TextIO.write().to(resultPath));
.apply(TextIO.write().to("wordcounts"));
.apply(Combine.<KV<String, String>, KV<Double, Integer>>perKey(new IBSCalculator())) .apply(FormatIBSData.class.getSimpleName(), ParDo.of(new FormatIBSData())) .apply("WriteIBSData", TextIO.write().to(options.getOutput()));
.apply( "Write Output Result", TextIO.write() .to(pipelineOptions.getOutput() + "-result") .withoutSharding());
output.apply(TextIO.write().to(resultPath)); p.run();
.apply(Combine.<KV<String, String>, KV<Double, Integer>>perKey(new IBSCalculator())) .apply(FormatIBSData.class.getSimpleName(), ParDo.of(new FormatIBSData())) .apply("WriteIBSData", TextIO.write().to(options.getOutput()));
output.apply(TextIO.write().to(resultPath)); p.run();
.apply(sample) .apply(Flatten.iterables()) .apply("WriteMyFile", TextIO.write().to(outputFile.getPath()));
"Write to storage", TextIO.write().to(options.getTextWritePrefix()).withSuffix(".csv"));