org.apache.beam.runners.dataflow.DataflowRunner.replaceTransforms java code examples

/** Tests that all reads are consumed by at least one {@link PTransform}. */
@Test
public void testUnconsumedReads() throws IOException {
 DataflowPipelineOptions dataflowOptions = buildPipelineOptions();
 RuntimeTestOptions options = dataflowOptions.as(RuntimeTestOptions.class);
 Pipeline p = buildDataflowPipeline(dataflowOptions);
 p.apply(TextIO.read().from(options.getInput()));
 DataflowRunner.fromOptions(dataflowOptions).replaceTransforms(p);
 final AtomicBoolean unconsumedSeenAsInput = new AtomicBoolean();
 p.traverseTopologically(
   new PipelineVisitor.Defaults() {
    @Override
    public void visitPrimitiveTransform(Node node) {
     unconsumedSeenAsInput.set(true);
    }
   });
 assertThat(unconsumedSeenAsInput.get(), is(true));
}

 options.setStreaming(true);
replaceTransforms(pipeline);

@Test
public void testToIterableTranslationWithIsmSideInput() throws Exception {
 // A "change detector" test that makes sure the translation
 // of getting a PCollectionView<Iterable<T>> does not change
 // in bad ways during refactor
 DataflowPipelineOptions options = buildPipelineOptions();
 DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
 Pipeline pipeline = Pipeline.create(options);
 pipeline.apply(Create.of(1, 2, 3)).apply(View.asIterable());
 DataflowRunner runner = DataflowRunner.fromOptions(options);
 runner.replaceTransforms(pipeline);
 Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob();
 assertAllStepOutputsHaveUniqueIds(job);
 List<Step> steps = job.getSteps();
 assertEquals(3, steps.size());
 @SuppressWarnings("unchecked")
 List<Map<String, Object>> toIsmRecordOutputs =
   (List<Map<String, Object>>) steps.get(1).getProperties().get(PropertyNames.OUTPUT_INFO);
 assertTrue(
   Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format"));
 Step collectionToSingletonStep = steps.get(2);
 assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
}

@Test
public void testToSingletonTranslationWithIsmSideInput() throws Exception {
 // A "change detector" test that makes sure the translation
 // of getting a PCollectionView<T> does not change
 // in bad ways during refactor
 DataflowPipelineOptions options = buildPipelineOptions();
 DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
 Pipeline pipeline = Pipeline.create(options);
 pipeline.apply(Create.of(1)).apply(View.asSingleton());
 DataflowRunner runner = DataflowRunner.fromOptions(options);
 runner.replaceTransforms(pipeline);
 Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob();
 assertAllStepOutputsHaveUniqueIds(job);
 List<Step> steps = job.getSteps();
 assertEquals(9, steps.size());
 @SuppressWarnings("unchecked")
 List<Map<String, Object>> toIsmRecordOutputs =
   (List<Map<String, Object>>) steps.get(7).getProperties().get(PropertyNames.OUTPUT_INFO);
 assertTrue(
   Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format"));
 Step collectionToSingletonStep = steps.get(8);
 assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind());
}

private Pipeline buildPipeline(DataflowPipelineOptions options) {
 options.setRunner(DataflowRunner.class);
 Pipeline p = Pipeline.create(options);
 p.apply("ReadMyFile", TextIO.read().from("gs://bucket/object"))
   .apply("WriteMyFile", TextIO.write().to("gs://bucket/object"));
 DataflowRunner runner = DataflowRunner.fromOptions(options);
 runner.replaceTransforms(p);
 return p;
}

@Test
public void testSingleOutputOverrideNonCrashing() throws Exception {
 DataflowPipelineOptions options = buildPipelineOptions();
 options.setRunner(DataflowRunner.class);
 Pipeline pipeline = Pipeline.create(options);
 DummyStatefulDoFn fn = new DummyStatefulDoFn();
 pipeline.apply(Create.of(KV.of(1, 2))).apply(ParDo.of(fn));
 DataflowRunner runner = DataflowRunner.fromOptions(options);
 runner.replaceTransforms(pipeline);
 assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}

@Test
public void testFnApiSingleOutputOverrideNonCrashing() throws Exception {
 DataflowPipelineOptions options = buildPipelineOptions("--experiments=beam_fn_api");
 options.setRunner(DataflowRunner.class);
 Pipeline pipeline = Pipeline.create(options);
 DummyStatefulDoFn fn = new DummyStatefulDoFn();
 pipeline.apply(Create.of(KV.of(1, 2))).apply(ParDo.of(fn));
 DataflowRunner runner = DataflowRunner.fromOptions(options);
 runner.replaceTransforms(pipeline);
 assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}

/**
 * Test that in translation the name for a collection (in this case just a Create output) is
 * overridden to be what the Dataflow service expects.
 */
@Test
public void testNamesOverridden() throws Exception {
 DataflowPipelineOptions options = buildPipelineOptions();
 DataflowRunner runner = DataflowRunner.fromOptions(options);
 options.setStreaming(false);
 DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options);
 Pipeline pipeline = Pipeline.create(options);
 pipeline.apply("Jazzy", Create.of(3)).setName("foobizzle");
 runner.replaceTransforms(pipeline);
 Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob();
 // The Create step
 Step step = job.getSteps().get(0);
 // This is the name that is "set by the user" that the Dataflow translator must override
 String userSpecifiedName =
   getString(
     Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null).get(0),
     PropertyNames.USER_NAME);
 // This is the calculated name that must actually be used
 String calculatedName = getString(step.getProperties(), PropertyNames.USER_NAME) + ".out0";
 assertThat(userSpecifiedName, equalTo(calculatedName));
}

      .withOutputTags(mainOutputTag, TupleTagList.empty()));
runner.replaceTransforms(pipeline);

outputs.get(tag3).setName("froonazzle");
runner.replaceTransforms(pipeline);

runner.replaceTransforms(pipeline);
Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob();
assertAllStepOutputsHaveUniqueIds(job);

@Test
public void testMultiOutputOverrideNonCrashing() throws Exception {
 DataflowPipelineOptions options = buildPipelineOptions();
 options.setRunner(DataflowRunner.class);
 Pipeline pipeline = Pipeline.create(options);
 TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {};
 TupleTag<Integer> sideOutputTag = new TupleTag<Integer>() {};
 DummyStatefulDoFn fn = new DummyStatefulDoFn();
 pipeline
   .apply(Create.of(KV.of(1, 2)))
   .apply(ParDo.of(fn).withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)));
 DataflowRunner runner = DataflowRunner.fromOptions(options);
 runner.replaceTransforms(pipeline);
 assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}

@Test
@Ignore(
  "TODO: BEAM-2902 Add support for user state in a ParDo.Multi once PTransformMatcher "
    + "exposes a way to know when the replacement is not required by checking that the "
    + "preceding ParDos to a GBK are key preserving.")
public void testFnApiMultiOutputOverrideNonCrashing() throws Exception {
 DataflowPipelineOptions options = buildPipelineOptions("--experiments=beam_fn_api");
 options.setRunner(DataflowRunner.class);
 Pipeline pipeline = Pipeline.create(options);
 TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {};
 TupleTag<Integer> sideOutputTag = new TupleTag<Integer>() {};
 DummyStatefulDoFn fn = new DummyStatefulDoFn();
 pipeline
   .apply(Create.of(KV.of(1, 2)))
   .apply(ParDo.of(fn).withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag)));
 DataflowRunner runner = DataflowRunner.fromOptions(options);
 runner.replaceTransforms(pipeline);
 assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn));
}

windowedInput.apply(ParDo.of(new TestSplittableFn()));
runner.replaceTransforms(pipeline);

windowedInput.apply(ParDo.of(new TestSplittableFn()));
runner.replaceTransforms(pipeline);

Popular methods of DataflowRunner

fromOptions
Construct a runner from the provided options.
getContainerImageForJob
run
<init>
addPCollectionRequiringIndexedFormat
Marks the passed in PCollection as requiring to be materialized using an indexed format.
combineValuesTranslation
Returns a PTransformMatcher that matches PTransforms of class Combine.GroupedValues that will be tra
containsUnboundedPCollection
debuggerMessage
doesPCollectionRequireIndexedFormat
Returns true if the passed in PCollection needs to be materialiazed using an indexed format.
getEnvironmentVersion
Helper to configure the Dataflow Job Environment based on the user's job options.
getJobIdFromName
Finds the id for the running job of the given name.
getOverrides

Popular in Java

Reactive rest calls using spring rest template
putExtra (Intent)
notifyDataSetChanged (ArrayAdapter)
runOnUiThread (Activity)
DecimalFormat (java.text)
A concrete subclass of NumberFormat that formats decimal numbers. It has a variety of features desig
Collection (java.util)
Collection is the root of the collection hierarchy. It defines operations on data collections and t
Dictionary (java.util)
Note: Do not use this class since it is obsolete. Please use the Map interface for new implementatio
Stack (java.util)
Stack is a Last-In/First-Out(LIFO) data structure which represents a stack of objects. It enables u
Color (java.awt)
The Color class is used to encapsulate colors in the default sRGB color space or colors in arbitrary
Table (org.hibernate.mapping)
A relational table
Top plugins for Android Studio

How to use replaceTransformsmethodin org.apache.beam.runners.dataflow.DataflowRunner

Best Java code snippets using org.apache.beam.runners.dataflow.DataflowRunner.replaceTransforms (Showing top 15 results out of 315)

How to use
replaceTransforms
method
in
org.apache.beam.runners.dataflow.DataflowRunner