org.apache.beam.sdk.Pipeline.run java code examples

/**
 * Runs this {@link Pipeline} according to the {@link PipelineOptions} used to create the {@link
 * Pipeline} via {@link #create(PipelineOptions)}.
 */
public PipelineResult run() {
 return run(defaultOptions);
}

public static void runAvroToCsv(SampleOptions options)
  throws IOException, IllegalArgumentException {
 FileSystems.setDefaultPipelineOptions(options);
 // Get Avro Schema
 String schemaJson = getSchema(options.getAvroSchema());
 Schema schema = new Schema.Parser().parse(schemaJson);
 // Check schema field types before starting the Dataflow job
 checkFieldTypes(schema);
 // Create the Pipeline object with the options we defined above.
 Pipeline pipeline = Pipeline.create(options);
 // Convert Avro To CSV
 pipeline.apply("Read Avro files",
   AvroIO.readGenericRecords(schemaJson).from(options.getInputFile()))
   .apply("Convert Avro to CSV formatted data",
     ParDo.of(new ConvertAvroToCsv(schemaJson, options.getCsvDelimiter())))
   .apply("Write CSV formatted data", TextIO.write().to(options.getOutput())
     .withSuffix(".csv"));
 // Run the pipeline.
 pipeline.run().waitUntilFinish();
}

public static void runCsvToAvro(SampleOptions options)
  throws IOException, IllegalArgumentException {
 FileSystems.setDefaultPipelineOptions(options);
 // Get Avro Schema
 String schemaJson = getSchema(options.getAvroSchema());
 Schema schema = new Schema.Parser().parse(schemaJson);
 // Check schema field types before starting the Dataflow job
 checkFieldTypes(schema);
 // Create the Pipeline object with the options we defined above.
 Pipeline pipeline = Pipeline.create(options);
 // Convert CSV to Avro
 pipeline.apply("Read CSV files", TextIO.read().from(options.getInputFile()))
   .apply("Convert CSV to Avro formatted data",
     ParDo.of(new ConvertCsvToAvro(schemaJson, options.getCsvDelimiter())))
   .setCoder(AvroCoder.of(GenericRecord.class, schema))
   .apply("Write Avro formatted data", AvroIO.writeGenericRecords(schemaJson)
     .to(options.getOutput()).withCodec(CodecFactory.snappyCodec()).withSuffix(".avro"));
 // Run the pipeline.
 pipeline.run().waitUntilFinish();
}

 protected void afterUserCodeFinished() {
  if (!runAttempted && enableAutoRunIfMissing) {
   pipeline.run().waitUntilFinish();
  }
 }
}

p.run().waitUntilFinish();

 public static void main(String[] args) {
  Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
  Pipeline p = Pipeline.create(options);

  String instanceId = options.getInstanceId();
  String databaseId = options.getDatabaseId();
  // [START spanner_dataflow_read]
  // Query for all the columns and rows in the specified Spanner table
  PCollection<Struct> records = p.apply(
    SpannerIO.read()
      .withInstanceId(instanceId)
      .withDatabaseId(databaseId)
      .withQuery("SELECT * FROM " + options.getTable()));
  // [END spanner_dataflow_read]


  PCollection<Long> tableEstimatedSize = records
    // Estimate the size of every row
    .apply(EstimateSize.create())
    // Sum all the row sizes to get the total estimated size of the table
    .apply(Sum.longsGlobally());

  // Write the total size to a file
  tableEstimatedSize
    .apply(ToString.elements())
    .apply(TextIO.write().to(options.getOutput()).withoutSharding());

  p.run().waitUntilFinish();
 }
}

  .apply(TextIO.write().to(options.getOutput()).withoutSharding());
p.run().waitUntilFinish();

public static void main(String[] args) {
 Options options = PipelineOptionsFactory.fromArgs(args).withValidation().as(Options.class);
 Pipeline p = Pipeline.create(options);
 SpannerConfig spannerConfig = SpannerConfig.create()
   .withInstanceId(options.getInstanceId())
   .withDatabaseId(options.getDatabaseId());
 // [START spanner_dataflow_readall]
 PCollection<Struct> allRecords = p.apply(SpannerIO.read()
   .withSpannerConfig(spannerConfig)
   .withQuery("SELECT t.table_name FROM information_schema.tables AS t WHERE t"
     + ".table_catalog = '' AND t.table_schema = ''")).apply(
   MapElements.into(TypeDescriptor.of(ReadOperation.class))
     .via((SerializableFunction<Struct, ReadOperation>) input -> {
      String tableName = input.getString(0);
      return ReadOperation.create().withQuery("SELECT * FROM " + tableName);
     })).apply(SpannerIO.readAll().withSpannerConfig(spannerConfig));
 // [END spanner_dataflow_readall]
 PCollection<Long> dbEstimatedSize = allRecords.apply(EstimateSize.create())
   .apply(Sum.longsGlobally());
 dbEstimatedSize.apply(ToString.elements()).apply(TextIO.write().to(options.getOutput())
   .withoutSharding());
 p.run().waitUntilFinish();
}

p.run().waitUntilFinish();

p.run().waitUntilFinish();

public static void main(String[] args) {
 PipelineOptionsFactory.register(ExportOptions.class);
 ExportOptions opts = PipelineOptionsFactory
   .fromArgs(args).withValidation()
   .as(ExportOptions.class);
 Pipeline pipeline = buildPipeline(opts);
 PipelineResult result = pipeline.run();
 if (opts.getWait()) {
  Utils.waitForPipelineToFinish(result);
 }
}

public static void main(String[] args) {
 Pipeline p = initializePipeline(args);
 KafkaOptions options = getOptions(p);
 FlinkKafkaConsumer08<MyType> kafkaConsumer =
   new FlinkKafkaConsumer08<>(options.getKafkaAvroTopic(),
     new AvroSerializationDeserializationSchema<>(MyType.class), getKafkaProps(options));
 p
   .apply(Read.from(UnboundedFlinkSource.of(kafkaConsumer)))
     .setCoder(AvroCoder.of(MyType.class))
   .apply(ParDo.of(new PrintFn<>()));
 p.run();
}

public static void main(String[] args) {
 Options options = PipelineOptionsFactory.fromArgs(args).withValidation()
   .as(Options.class);
 options.setRunner(FlinkRunner.class);
 Pipeline p = Pipeline.create(options);
 p.apply("ReadLines", TextIO.Read.from(options.getInput()))
   .apply(new CountWords())
   .apply(MapElements.via(new FormatAsTextFn()))
   .apply("WriteCounts", TextIO.Write.to(options.getOutput()));
 p.run();
}

@Test
@Category(NeedsRunner.class)
public void testEncodingNPException() throws Exception {
 Pipeline p =
   pipelineWith(
     new CustomTestCoder(null, NULL_POINTER_EXCEPTION, null, null, EXCEPTION_MESSAGE));
 thrown.expect(Exception.class);
 thrown.expect(new ExceptionMatcher("java.lang.NullPointerException: Super Unique Message!!!"));
 p.run().waitUntilFinish();
}

@Test
public void splitsInputs() {
 Pipeline p = getPipeline();
 PCollection<Long> longs = p.apply(Read.from(MustSplitSource.of(CountingSource.upTo(3))));
 PAssert.that(longs).containsInAnyOrder(0L, 1L, 2L);
 p.run();
}

@Test
@Category(NeedsRunner.class)
public void testDecodingIOException() throws Exception {
 Pipeline p =
   pipelineWith(new CustomTestCoder(IO_EXCEPTION, null, null, null, EXCEPTION_MESSAGE));
 thrown.expect(Exception.class);
 thrown.expect(new ExceptionMatcher("java.io.IOException: Super Unique Message!!!"));
 p.run().waitUntilFinish();
}

@Test
@Category(NeedsRunner.class)
public void testEncodingIOException() throws Exception {
 Pipeline p =
   pipelineWith(new CustomTestCoder(null, IO_EXCEPTION, null, null, EXCEPTION_MESSAGE));
 thrown.expect(Exception.class);
 thrown.expect(new ExceptionMatcher("java.io.IOException: Super Unique Message!!!"));
 p.run().waitUntilFinish();
}

 public static void main(String[] args) {
  WordCountOptions options = PipelineOptionsFactory.fromArgs(args).withValidation()
   .as(WordCountOptions.class);
  Pipeline p = Pipeline.create(options);
  p.apply("ReadLines", TextIO.read().from(options.getInputFile()))
   .apply(new CountWords())
   .apply(MapElements.via(new FormatAsTextFn()))
   .apply("WriteCounts", TextIO.write().to(options.getOutput()));

  p.run();
 }
}

@Test
public void testContainsInAnyOrder() throws Exception {
 ApexPipelineOptions options = PipelineOptionsFactory.create().as(ApexPipelineOptions.class);
 options.setRunner(TestApexRunner.class);
 Pipeline pipeline = Pipeline.create(options);
 PCollection<Integer> pcollection = pipeline.apply(Create.of(1, 2, 3, 4));
 PAssert.that(pcollection).containsInAnyOrder(2, 1, 4, 3);
 // TODO: terminate faster based on processed assertion vs. auto-shutdown
 pipeline.run();
}

@Ignore("DirectRunner doesn't decode coders so this test does not pass.")
@Test
@Category(NeedsRunner.class)
public void testDeserializationIOException() throws Exception {
 Pipeline p =
   pipelineWith(new CustomTestCoder(null, null, null, IO_EXCEPTION, EXCEPTION_MESSAGE));
 thrown.expect(Exception.class);
 thrown.expect(new ExceptionMatcher("java.io.IOException: Super Unique Message!!!"));
 p.run().waitUntilFinish();
}

Javadoc

Runs this Pipeline according to the PipelineOptions used to create the Pipeline via #create(PipelineOptions).

Popular methods of Pipeline

apply
Like #apply(String,PTransform) but the transform node in the Pipeline graph will be named according
create
Constructs a pipeline from the provided PipelineOptions.
getCoderRegistry
Returns the CoderRegistry that this Pipeline uses.
traverseTopologically
For internal use only; no backwards-compatibility guarantees.Invokes the PipelineVisitor PipelineVis
begin
Returns a PBegin owned by this Pipeline. This serves as the input of a root PTransform such as Read
replaceAll
For internal use only; no backwards-compatibility guarantees.Replaces all nodes that match a PTransf
getOptions
applyTransform
For internal use only; no backwards-compatibility guarantees.Like #applyTransform(String,PInput,PTra
<init>
validate
applyInternal
Applies a PTransform to the given PInput.
applyReplacement

Popular in Java

Updating database using SQL prepared statement
runOnUiThread (Activity)
setRequestProperty (URLConnection)
onRequestPermissionsResult (Fragment)
Timestamp (java.sql)
A Java representation of the SQL TIMESTAMP type. It provides the capability of representing the SQL
TimeZone (java.util)
TimeZone represents a time zone offset, and also figures out daylight savings. Typically, you get a
Executor (java.util.concurrent)
An object that executes submitted Runnable tasks. This interface provides a way of decoupling task s
LogFactory (org.apache.commons.logging)
Factory for creating Log instances, with discovery and configuration features similar to that employ
Font (java.awt)
The Font class represents fonts, which are used to render text in a visible way. A font provides the
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
Github Copilot alternatives

How to use runmethodin org.apache.beam.sdk.Pipeline

Best Java code snippets using org.apache.beam.sdk.Pipeline.run (Showing top 20 results out of 459)

How to use
run
method
in
org.apache.beam.sdk.Pipeline