/** * Runs this {@link Pipeline} according to the {@link PipelineOptions} used to create the {@link * Pipeline} via {@link #create(PipelineOptions)}. */ public PipelineResult run() { return run(defaultOptions); }
public static void runAvroToCsv(SampleOptions options) throws IOException, IllegalArgumentException { FileSystems.setDefaultPipelineOptions(options); // Get Avro Schema String schemaJson = getSchema(options.getAvroSchema()); Schema schema = new Schema.Parser().parse(schemaJson); // Check schema field types before starting the Dataflow job checkFieldTypes(schema); // Create the Pipeline object with the options we defined above. Pipeline pipeline = Pipeline.create(options); // Convert Avro To CSV pipeline.apply("Read Avro files", AvroIO.readGenericRecords(schemaJson).from(options.getInputFile())) .apply("Convert Avro to CSV formatted data", ParDo.of(new ConvertAvroToCsv(schemaJson, options.getCsvDelimiter()))) .apply("Write CSV formatted data", TextIO.write().to(options.getOutput()) .withSuffix(".csv")); // Run the pipeline. pipeline.run().waitUntilFinish(); }
public static void runCsvToAvro(SampleOptions options) throws IOException, IllegalArgumentException { FileSystems.setDefaultPipelineOptions(options); // Get Avro Schema String schemaJson = getSchema(options.getAvroSchema()); Schema schema = new Schema.Parser().parse(schemaJson); // Check schema field types before starting the Dataflow job checkFieldTypes(schema); // Create the Pipeline object with the options we defined above. Pipeline pipeline = Pipeline.create(options); // Convert CSV to Avro pipeline.apply("Read CSV files", TextIO.read().from(options.getInputFile())) .apply("Convert CSV to Avro formatted data", ParDo.of(new ConvertCsvToAvro(schemaJson, options.getCsvDelimiter()))) .setCoder(AvroCoder.of(GenericRecord.class, schema)) .apply("Write Avro formatted data", AvroIO.writeGenericRecords(schemaJson) .to(options.getOutput()).withCodec(CodecFactory.snappyCodec()).withSuffix(".avro")); // Run the pipeline. pipeline.run().waitUntilFinish(); }
protected void afterUserCodeFinished() { if (!runAttempted && enableAutoRunIfMissing) { pipeline.run().waitUntilFinish(); } } }
p.run().waitUntilFinish();
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); String instanceId = options.getInstanceId(); String databaseId = options.getDatabaseId(); // [START spanner_dataflow_read] // Query for all the columns and rows in the specified Spanner table PCollection<Struct> records = p.apply( SpannerIO.read() .withInstanceId(instanceId) .withDatabaseId(databaseId) .withQuery("SELECT * FROM " + options.getTable())); // [END spanner_dataflow_read] PCollection<Long> tableEstimatedSize = records // Estimate the size of every row .apply(EstimateSize.create()) // Sum all the row sizes to get the total estimated size of the table .apply(Sum.longsGlobally()); // Write the total size to a file tableEstimatedSize .apply(ToString.elements()) .apply(TextIO.write().to(options.getOutput()).withoutSharding()); p.run().waitUntilFinish(); } }
.apply(TextIO.write().to(options.getOutput()).withoutSharding()); p.run().waitUntilFinish();
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); SpannerConfig spannerConfig = SpannerConfig.create() .withInstanceId(options.getInstanceId()) .withDatabaseId(options.getDatabaseId()); // [START spanner_dataflow_readall] PCollection<Struct> allRecords = p.apply(SpannerIO.read() .withSpannerConfig(spannerConfig) .withQuery("SELECT t.table_name FROM information_schema.tables AS t WHERE t" + ".table_catalog = '' AND t.table_schema = ''")).apply( MapElements.into(TypeDescriptor.of(ReadOperation.class)) .via((SerializableFunction<Struct, ReadOperation>) input -> { String tableName = input.getString(0); return ReadOperation.create().withQuery("SELECT * FROM " + tableName); })).apply(SpannerIO.readAll().withSpannerConfig(spannerConfig)); // [END spanner_dataflow_readall] PCollection<Long> dbEstimatedSize = allRecords.apply(EstimateSize.create()) .apply(Sum.longsGlobally()); dbEstimatedSize.apply(ToString.elements()).apply(TextIO.write().to(options.getOutput()) .withoutSharding()); p.run().waitUntilFinish(); }
p.run().waitUntilFinish();
p.run().waitUntilFinish();
public static void main(String[] args) { PipelineOptionsFactory.register(ExportOptions.class); ExportOptions opts = PipelineOptionsFactory .fromArgs(args).withValidation() .as(ExportOptions.class); Pipeline pipeline = buildPipeline(opts); PipelineResult result = pipeline.run(); if (opts.getWait()) { Utils.waitForPipelineToFinish(result); } }
public static void main(String[] args) { Pipeline p = initializePipeline(args); KafkaOptions options = getOptions(p); FlinkKafkaConsumer08<MyType> kafkaConsumer = new FlinkKafkaConsumer08<>(options.getKafkaAvroTopic(), new AvroSerializationDeserializationSchema<>(MyType.class), getKafkaProps(options)); p .apply(Read.from(UnboundedFlinkSource.of(kafkaConsumer))) .setCoder(AvroCoder.of(MyType.class)) .apply(ParDo.of(new PrintFn<>())); p.run(); }
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(Options.class); options.setRunner(FlinkRunner.class); Pipeline p = Pipeline.create(options); p.apply("ReadLines", TextIO.Read.from(options.getInput())) .apply(new CountWords()) .apply(MapElements.via(new FormatAsTextFn())) .apply("WriteCounts", TextIO.Write.to(options.getOutput())); p.run(); }
@Test @Category(NeedsRunner.class) public void testEncodingNPException() throws Exception { Pipeline p = pipelineWith( new CustomTestCoder(null, NULL_POINTER_EXCEPTION, null, null, EXCEPTION_MESSAGE)); thrown.expect(Exception.class); thrown.expect(new ExceptionMatcher("java.lang.NullPointerException: Super Unique Message!!!")); p.run().waitUntilFinish(); }
@Test public void splitsInputs() { Pipeline p = getPipeline(); PCollection<Long> longs = p.apply(Read.from(MustSplitSource.of(CountingSource.upTo(3)))); PAssert.that(longs).containsInAnyOrder(0L, 1L, 2L); p.run(); }
@Test @Category(NeedsRunner.class) public void testDecodingIOException() throws Exception { Pipeline p = pipelineWith(new CustomTestCoder(IO_EXCEPTION, null, null, null, EXCEPTION_MESSAGE)); thrown.expect(Exception.class); thrown.expect(new ExceptionMatcher("java.io.IOException: Super Unique Message!!!")); p.run().waitUntilFinish(); }
@Test @Category(NeedsRunner.class) public void testEncodingIOException() throws Exception { Pipeline p = pipelineWith(new CustomTestCoder(null, IO_EXCEPTION, null, null, EXCEPTION_MESSAGE)); thrown.expect(Exception.class); thrown.expect(new ExceptionMatcher("java.io.IOException: Super Unique Message!!!")); p.run().waitUntilFinish(); }
public static void main(String[] args) { WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation() .as(WordCountOptions.class); Pipeline p = Pipeline.create(options); p.apply("ReadLines", TextIO.read().from(options.getInputFile())) .apply(new CountWords()) .apply(MapElements.via(new FormatAsTextFn())) .apply("WriteCounts", TextIO.write().to(options.getOutput())); p.run(); } }
@Test public void testContainsInAnyOrder() throws Exception { ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class); options.setRunner(TestApexRunner.class); Pipeline pipeline = Pipeline.create(options); PCollection<Integer> pcollection = pipeline.apply(Create.of(1, 2, 3, 4)); PAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3); // TODO: terminate faster based on processed assertion vs. auto-shutdown pipeline.run(); }
@Ignore("DirectRunner doesn't decode coders so this test does not pass.") @Test @Category(NeedsRunner.class) public void testDeserializationIOException() throws Exception { Pipeline p = pipelineWith(new CustomTestCoder(null, null, null, IO_EXCEPTION, EXCEPTION_MESSAGE)); thrown.expect(Exception.class); thrown.expect(new ExceptionMatcher("java.io.IOException: Super Unique Message!!!")); p.run().waitUntilFinish(); }