public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); if (options.getInput() == null) { options.setInput(Options.GDELT_EVENTS_URL + options.getDate() + ".export.csv"); } LOG.info(options.toString()); System.out.println(options.toString()); Pipeline pipeline = Pipeline.create(options); pipeline .apply("ReadFromGDELTFile", TextIO.read().from(options.getInput())) .apply("WriteToFS", TextIO.write().to(options.getOutput())); pipeline.run(); } }
@Test public void testPathValidatorOverride() { String[] args = new String[] { "--runner=DataflowRunner", "--tempLocation=/tmp/testing", "--project=test-project", "--credentialFactoryClass=" + NoopCredentialFactory.class.getName(), "--pathValidatorClass=" + NoopPathValidator.class.getName(), }; // Should not crash, because gcpTempLocation should get set from tempLocation TestPipeline.fromOptions(PipelineOptionsFactory.fromArgs(args).create()); }
public static void main(String[] args) throws Exception { PipelineOptions options = PipelineOptionsFactory.fromArgs(args).create(); runPipeline(Pipeline.create(options)); }
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); runReadPipeline(options); }
public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline pipeline = Pipeline.create(options); pipeline .apply("Generate sequence", GenerateSequence.from(0).to(10)) .apply("Produce text lines", ParDo.of(new DeterministicallyConstructTestTextLineFn())) .apply("Produce Avro records", ParDo.of(new DeterministicallyConstructAvroRecordsFn())) .setCoder(AvroCoder.of(SCHEMA)) .apply( "Write Parquet files", FileIO.<GenericRecord>write().via(ParquetIO.sink(SCHEMA)).to(options.getOutput())); pipeline.run(); }
public static void main(String[] args) { WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation().as(WordCountOptions.class); Pipeline p = Pipeline.create(options); // Concepts #2 and #3: Our pipeline applies the composite CountWords transform, and passes the // static FormatAsTextFn() to the ParDo transform. p.apply("ReadLines", TextIO.read().from(options.getInputFile())) .apply(new CountWords()) .apply(MapElements.via(new FormatAsTextFn())) .apply("WriteCounts", TextIO.write().to(options.getOutput())); p.run().waitUntilFinish(); } }
/** * Runs a pipeline to import Avro files in GCS to a Cloud Bigtable table. * * @param args arguments to the pipeline */ public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); PipelineResult result = run(options); // Wait for pipeline to finish only if it is not constructing a template. if (options.as(DataflowPipelineOptions.class).getTemplateLocation() == null) { result.waitUntilFinish(); } }
/** * Runs a pipeline to export data from a Cloud Bigtable table to Avro files in GCS. * * @param args arguments to the pipeline */ public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); PipelineResult result = run(options); // Wait for pipeline to finish only if it is not constructing a template. if (options.as(DataflowPipelineOptions.class).getTemplateLocation() == null) { result.waitUntilFinish(); } }
@Test public void testReadWithRuntimeParametersValidationDisabled() { ReadOptions options = PipelineOptionsFactory.fromArgs().withValidation().as(ReadOptions.class); BigtableIO.Read read = BigtableIO.read() .withoutValidation() .withProjectId(options.getBigtableProject()) .withInstanceId(options.getBigtableInstanceId()) .withTableId(options.getBigtableTableId()); // Not running a pipeline therefore this is expected. thrown.expect(PipelineRunMissingException.class); p.apply(read); }
/** * Get command line options */ public static Options getOptions(String[] args) { PipelineOptionsFactory.register(Options.class); Options options = PipelineOptionsFactory.fromArgs(args) .withValidation() .as(Options.class); return options; }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); Pipeline p = Pipeline.create(options); // the following two 'applys' create multiple inputs to our pipeline, one for each // of our two input sources. PCollection<TableRow> eventsTable = p.apply(BigQueryIO.readTableRows().from(GDELT_EVENTS_TABLE)); PCollection<TableRow> countryCodes = p.apply(BigQueryIO.readTableRows().from(COUNTRY_CODES)); PCollection<String> formattedResults = joinEvents(eventsTable, countryCodes); formattedResults.apply(TextIO.write().to(options.getOutput())); p.run().waitUntilFinish(); } }
@Test public void testFullName() { String[] args = new String[] {String.format("--runner=%s", FlinkRunner.class.getName())}; PipelineOptions opts = PipelineOptionsFactory.fromArgs(args).create(); assertEquals(opts.getRunner(), FlinkRunner.class); }
/** * The main entry-point for pipeline execution. This method will start the pipeline but will not * wait for it's execution to finish. If blocking execution is required, use the {@link * KafkaToBigQuery#run(Options)} method to start the pipeline and invoke {@code * result.waitUntilFinish()} on the {@link PipelineResult}. * * @param args The command-line args passed by the executor. */ public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); run(options); }
/** * Main entry point for executing the pipeline. * @param args The command-line arguments to the pipeline. */ public static void main(String[] args) { Options options = PipelineOptionsFactory .fromArgs(args) .withValidation() .as(Options.class); options.setStreaming(true); run(options); }
/** * Main entry point for executing the pipeline. * * @param args The command-line arguments to the pipeline. */ public static void main(String[] args) { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); options.setStreaming(true); run(options); }
public static void main(String[] args) { PipelineOptionsFactory.register(ExportOptions.class); ExportOptions opts = PipelineOptionsFactory .fromArgs(args).withValidation() .as(ExportOptions.class); Pipeline pipeline = buildPipeline(opts); PipelineResult result = pipeline.run(); if (opts.getWait()) { Utils.waitForPipelineToFinish(result); } }
@Test public void testFullName() { String[] args = new String[] {String.format("--runner=%s", GearpumpRunner.class.getName())}; PipelineOptions opts = PipelineOptionsFactory.fromArgs(args).create(); assertEquals(opts.getRunner(), GearpumpRunner.class); }
public static void main(String[] args) throws Exception { Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class); options.setRunner(FlinkRunner.class); Pipeline pipeline = Pipeline.create(options); pipeline.getCoderRegistry().registerCoder(URI.class, StringDelegateCoder.of(URI.class)); pipeline .apply(new ReadDocuments(listInputDocuments(options))) .apply(new ComputeTfIdf()) .apply(new WriteTfIdf(options.getOutput())); pipeline.run(); } }
@Test public void testClassName() { String[] args = new String[] {String.format("--runner=%s", FlinkRunner.class.getSimpleName())}; PipelineOptions opts = PipelineOptionsFactory.fromArgs(args).create(); assertEquals(opts.getRunner(), FlinkRunner.class); } }
@Test public void testClassName() { String[] args = new String[] {String.format("--runner=%s", FlinkRunner.class.getSimpleName())}; PipelineOptions opts = PipelineOptionsFactory.fromArgs(args).create(); assertEquals(opts.getRunner(), FlinkRunner.class); }