/** Tests that all reads are consumed by at least one {@link PTransform}. */ @Test public void testUnconsumedReads() throws IOException { DataflowPipelineOptions dataflowOptions = buildPipelineOptions(); RuntimeTestOptions options = dataflowOptions.as(RuntimeTestOptions.class); Pipeline p = buildDataflowPipeline(dataflowOptions); p.apply(TextIO.read().from(options.getInput())); DataflowRunner.fromOptions(dataflowOptions).replaceTransforms(p); final AtomicBoolean unconsumedSeenAsInput = new AtomicBoolean(); p.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void visitPrimitiveTransform(Node node) { unconsumedSeenAsInput.set(true); } }); assertThat(unconsumedSeenAsInput.get(), is(true)); }
options.setStreaming(true); replaceTransforms(pipeline);
@Test public void testToIterableTranslationWithIsmSideInput() throws Exception { // A "change detector" test that makes sure the translation // of getting a PCollectionView<Iterable<T>> does not change // in bad ways during refactor DataflowPipelineOptions options = buildPipelineOptions(); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); pipeline.apply(Create.of(1, 2, 3)).apply(View.asIterable()); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob(); assertAllStepOutputsHaveUniqueIds(job); List<Step> steps = job.getSteps(); assertEquals(3, steps.size()); @SuppressWarnings("unchecked") List<Map<String, Object>> toIsmRecordOutputs = (List<Map<String, Object>>) steps.get(1).getProperties().get(PropertyNames.OUTPUT_INFO); assertTrue( Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format")); Step collectionToSingletonStep = steps.get(2); assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind()); }
@Test public void testToSingletonTranslationWithIsmSideInput() throws Exception { // A "change detector" test that makes sure the translation // of getting a PCollectionView<T> does not change // in bad ways during refactor DataflowPipelineOptions options = buildPipelineOptions(); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); pipeline.apply(Create.of(1)).apply(View.asSingleton()); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob(); assertAllStepOutputsHaveUniqueIds(job); List<Step> steps = job.getSteps(); assertEquals(9, steps.size()); @SuppressWarnings("unchecked") List<Map<String, Object>> toIsmRecordOutputs = (List<Map<String, Object>>) steps.get(7).getProperties().get(PropertyNames.OUTPUT_INFO); assertTrue( Structs.getBoolean(Iterables.getOnlyElement(toIsmRecordOutputs), "use_indexed_format")); Step collectionToSingletonStep = steps.get(8); assertEquals("CollectionToSingleton", collectionToSingletonStep.getKind()); }
private Pipeline buildPipeline(DataflowPipelineOptions options) { options.setRunner(DataflowRunner.class); Pipeline p = Pipeline.create(options); p.apply("ReadMyFile", TextIO.read().from("gs://bucket/object")) .apply("WriteMyFile", TextIO.write().to("gs://bucket/object")); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(p); return p; }
@Test public void testSingleOutputOverrideNonCrashing() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); options.setRunner(DataflowRunner.class); Pipeline pipeline = Pipeline.create(options); DummyStatefulDoFn fn = new DummyStatefulDoFn(); pipeline.apply(Create.of(KV.of(1, 2))).apply(ParDo.of(fn)); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn)); }
@Test public void testFnApiSingleOutputOverrideNonCrashing() throws Exception { DataflowPipelineOptions options = buildPipelineOptions("--experiments=beam_fn_api"); options.setRunner(DataflowRunner.class); Pipeline pipeline = Pipeline.create(options); DummyStatefulDoFn fn = new DummyStatefulDoFn(); pipeline.apply(Create.of(KV.of(1, 2))).apply(ParDo.of(fn)); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn)); }
/** * Test that in translation the name for a collection (in this case just a Create output) is * overridden to be what the Dataflow service expects. */ @Test public void testNamesOverridden() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); DataflowRunner runner = DataflowRunner.fromOptions(options); options.setStreaming(false); DataflowPipelineTranslator translator = DataflowPipelineTranslator.fromOptions(options); Pipeline pipeline = Pipeline.create(options); pipeline.apply("Jazzy", Create.of(3)).setName("foobizzle"); runner.replaceTransforms(pipeline); Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob(); // The Create step Step step = job.getSteps().get(0); // This is the name that is "set by the user" that the Dataflow translator must override String userSpecifiedName = getString( Structs.getListOfMaps(step.getProperties(), PropertyNames.OUTPUT_INFO, null).get(0), PropertyNames.USER_NAME); // This is the calculated name that must actually be used String calculatedName = getString(step.getProperties(), PropertyNames.USER_NAME) + ".out0"; assertThat(userSpecifiedName, equalTo(calculatedName)); }
.withOutputTags(mainOutputTag, TupleTagList.empty())); runner.replaceTransforms(pipeline);
outputs.get(tag3).setName("froonazzle"); runner.replaceTransforms(pipeline);
runner.replaceTransforms(pipeline); Job job = translator.translate(pipeline, runner, Collections.emptyList()).getJob(); assertAllStepOutputsHaveUniqueIds(job);
@Test public void testMultiOutputOverrideNonCrashing() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); options.setRunner(DataflowRunner.class); Pipeline pipeline = Pipeline.create(options); TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {}; TupleTag<Integer> sideOutputTag = new TupleTag<Integer>() {}; DummyStatefulDoFn fn = new DummyStatefulDoFn(); pipeline .apply(Create.of(KV.of(1, 2))) .apply(ParDo.of(fn).withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag))); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn)); }
@Test @Ignore( "TODO: BEAM-2902 Add support for user state in a ParDo.Multi once PTransformMatcher " + "exposes a way to know when the replacement is not required by checking that the " + "preceding ParDos to a GBK are key preserving.") public void testFnApiMultiOutputOverrideNonCrashing() throws Exception { DataflowPipelineOptions options = buildPipelineOptions("--experiments=beam_fn_api"); options.setRunner(DataflowRunner.class); Pipeline pipeline = Pipeline.create(options); TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {}; TupleTag<Integer> sideOutputTag = new TupleTag<Integer>() {}; DummyStatefulDoFn fn = new DummyStatefulDoFn(); pipeline .apply(Create.of(KV.of(1, 2))) .apply(ParDo.of(fn).withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag))); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn)); }
windowedInput.apply(ParDo.of(new TestSplittableFn())); runner.replaceTransforms(pipeline);
windowedInput.apply(ParDo.of(new TestSplittableFn())); runner.replaceTransforms(pipeline);