@Test public void parDoMultiWithState() { AppliedPTransform<?, ?, ?> parDoApplication = getAppliedTransform( ParDo.of(doFnWithState).withOutputTags(new TupleTag<>(), TupleTagList.empty())); assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(true)); assertThat(PTransformMatchers.splittableParDoMulti().matches(parDoApplication), is(false)); assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(false)); assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(false)); }
@Test public void parDoMulti() { AppliedPTransform<?, ?, ?> parDoApplication = getAppliedTransform(ParDo.of(doFn).withOutputTags(new TupleTag<>(), TupleTagList.empty())); assertThat(PTransformMatchers.splittableParDoMulti().matches(parDoApplication), is(false)); assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(false)); assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(false)); assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(false)); }
@Test public void parDoSplittable() { AppliedPTransform<?, ?, ?> parDoApplication = getAppliedTransform( ParDo.of(splittableDoFn).withOutputTags(new TupleTag<>(), TupleTagList.empty())); assertThat(PTransformMatchers.splittableParDo().matches(parDoApplication), is(true)); assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(false)); assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(false)); assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(false)); }
@Test public void parDoWithFnTypeWithMatchingType() { DoFn<Object, Object> fn = new DoFn<Object, Object>() { @ProcessElement public void process(ProcessContext ctxt) {} }; AppliedPTransform<?, ?, ?> parDoSingle = getAppliedTransform(ParDo.of(fn)); AppliedPTransform<?, ?, ?> parDoMulti = getAppliedTransform(ParDo.of(fn).withOutputTags(new TupleTag<>(), TupleTagList.empty())); PTransformMatcher matcher = PTransformMatchers.parDoWithFnType(fn.getClass()); assertThat(matcher.matches(parDoSingle), is(true)); assertThat(matcher.matches(parDoMulti), is(true)); }
@Test @Category({ValidatesRunner.class, UsesStatefulParDo.class, UsesParDoLifecycle.class}) public void testFnCallSequenceStateful() { PCollectionList.of(p.apply("Impolite", Create.of(KV.of("a", 1), KV.of("b", 2), KV.of("a", 4)))) .and( p.apply( "Polite", Create.of(KV.of("b", 3), KV.of("a", 5), KV.of("c", 6), KV.of("c", 7)))) .apply(Flatten.pCollections()) .apply( ParDo.of(new CallSequenceEnforcingStatefulFn<String, Integer>()) .withOutputTags(new TupleTag<KV<String, Integer>>() {}, TupleTagList.empty())); p.run(); }
@Override public PCollectionTuple expand(PCollection<KV<byte[], KV<InputT, RestrictionT>>> input) { return input .apply("Drop key", Values.create()) .apply("Reshuffle", Reshuffle.of()) .apply( "NaiveProcess", ParDo.of( new NaiveProcessFn<InputT, OutputT, RestrictionT, TrackerT>(original.getFn())) .withSideInputs(original.getSideInputs()) .withOutputTags(original.getMainOutputTag(), original.getAdditionalOutputTags())); } }
@Test @Category(NeedsRunner.class) public void testMainOutputUnregisteredExplicitCoder() { PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3))); final TupleTag<TestDummy> mainOutputTag = new TupleTag<>("unregisteredMain"); final TupleTag<Integer> additionalOutputTag = new TupleTag<Integer>("additionalOutput") {}; PCollectionTuple outputTuple = input.apply( ParDo.of(new MainOutputDummyFn(mainOutputTag, additionalOutputTag)) .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag))); outputTuple.get(mainOutputTag).setCoder(new TestDummyCoder()); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testTaggedOutputUnknownCoder() throws Exception { PCollection<Integer> input = pipeline.apply(Create.of(Arrays.asList(1, 2, 3))); final TupleTag<Integer> mainOutputTag = new TupleTag<>("main"); final TupleTag<TestDummy> additionalOutputTag = new TupleTag<>("unknownSide"); input.apply( ParDo.of(new TaggedOutputDummyFn(mainOutputTag, additionalOutputTag)) .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag))); thrown.expect(IllegalStateException.class); thrown.expectMessage("Unable to return a default Coder"); pipeline.run(); }
private ParDo.MultiOutput<PubsubMessage, Row> createParserParDo() { return ParDo.of( PubsubMessageToRow.builder() .messageSchema(getSchema()) .useDlq(getDeadLetterQueue() != null) .build()) .withOutputTags(MAIN_TAG, useDlq() ? TupleTagList.of(DLQ_TAG) : TupleTagList.empty()); }
@Test public void parDoMultiSplittable() { AppliedPTransform<?, ?, ?> parDoApplication = getAppliedTransform( ParDo.of(splittableDoFn).withOutputTags(new TupleTag<>(), TupleTagList.empty())); assertThat(PTransformMatchers.splittableParDoMulti().matches(parDoApplication), is(true)); assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(false)); assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(false)); assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(false)); }
@Override public PCollection<OutputT> expand(PCollection<? extends InputT> input) { TupleTag<OutputT> mainOutput = new TupleTag<>(); return input.apply(original.withOutputTags(mainOutput, TupleTagList.empty())).get(mainOutput); } }
@Override public PCollectionTuple expand(PCollection<PublishRequest> input) { checkArgument(getTopicName() != null, "withTopicName() is required"); return input.apply( ParDo.of(new SnsWriterFn(this)) .withOutputTags(getResultOutputTag(), TupleTagList.empty())); }
@Test public void retainOnlyPrimitivesWithOnlyPrimitivesUnchanged() { Pipeline p = Pipeline.create(); p.apply("Read", Read.from(CountingSource.unbounded())) .apply( "multi-do", ParDo.of(new TestFn()).withOutputTags(new TupleTag<>(), TupleTagList.empty())); Components originalComponents = PipelineTranslation.toProto(p).getComponents(); Collection<String> primitiveComponents = QueryablePipeline.getPrimitiveTransformIds(originalComponents); assertThat(primitiveComponents, equalTo(originalComponents.getTransformsMap().keySet())); }
@Test public void parDoMultiWithTimers() { AppliedPTransform<?, ?, ?> parDoApplication = getAppliedTransform( ParDo.of(doFnWithTimers).withOutputTags(new TupleTag<>(), TupleTagList.empty())); assertThat(PTransformMatchers.stateOrTimerParDoMulti().matches(parDoApplication), is(true)); assertThat(PTransformMatchers.splittableParDoMulti().matches(parDoApplication), is(false)); assertThat(PTransformMatchers.splittableParDoSingle().matches(parDoApplication), is(false)); assertThat(PTransformMatchers.stateOrTimerParDoSingle().matches(parDoApplication), is(false)); }
@Override public PCollectionTuple expand(PCollection<FeatureRowExtended> input) { return input.apply( ParDo.of(new SplitFeaturesDoFn<>(strategy, specs)) .withOutputTags(MAIN_TAG, strategy.getTags())); } }
private PCollection<String> applySplittableParDo( String name, PCollection<Integer> input, DoFn<Integer, String> fn) { ParDo.MultiOutput<Integer, String> multiOutput = ParDo.of(fn).withOutputTags(MAIN_OUTPUT_TAG, TupleTagList.empty()); PCollectionTuple output = multiOutput.expand(input); output.get(MAIN_OUTPUT_TAG).setName("main"); AppliedPTransform<PCollection<Integer>, PCollectionTuple, ?> transform = AppliedPTransform.of("ParDo", input.expand(), output.expand(), multiOutput, pipeline); return input.apply(name, SplittableParDo.forAppliedParDo(transform)).get(MAIN_OUTPUT_TAG); }
@Test @Category({ValidatesRunner.class, UsesParDoLifecycle.class}) public void testFnCallSequenceMulti() { PCollectionList.of(p.apply("Impolite", Create.of(1, 2, 4))) .and(p.apply("Polite", Create.of(3, 5, 6, 7))) .apply(Flatten.pCollections()) .apply( ParDo.of(new CallSequenceEnforcingFn<Integer>()) .withOutputTags(new TupleTag<Integer>() {}, TupleTagList.empty())); p.run(); }
private PCollectionTuple buildPCollectionTupleWithTags( TupleTag<Integer> mainOutputTag, TupleTag<Integer> additionalOutputTag) { PCollection<Integer> input = p.apply(Create.of(1, 2, 3)); PCollectionTuple tuple = input.apply( ParDo.of(new IdentityDoFn()) .withOutputTags(mainOutputTag, TupleTagList.of(additionalOutputTag))); return tuple; }
@Test public void parDoWithState() { AppliedPTransform<?, ?, ?> statefulApplication = getAppliedTransform( ParDo.of(doFnWithState).withOutputTags(new TupleTag<>(), TupleTagList.empty())); assertThat(PTransformMatchers.stateOrTimerParDo().matches(statefulApplication), is(true)); AppliedPTransform<?, ?, ?> splittableApplication = getAppliedTransform( ParDo.of(splittableDoFn).withOutputTags(new TupleTag<>(), TupleTagList.empty())); assertThat(PTransformMatchers.stateOrTimerParDo().matches(splittableApplication), is(false)); }
@Test public void testParDoMultiNameBasedDoFnWithTrimmerSuffix() { assertThat( ParDo.of(new TaggedOutputDummyFn(null, null)).withOutputTags(null, null).getName(), containsString("ParMultiDo(TaggedOutputDummy)")); }