/** * {@inheritDoc}. * * <p>The input {@link PCollectionList} that is constructed will have the same values in the same */ private PCollectionList<T> getInput(Map<TupleTag<?>, PValue> inputs, Pipeline p) { PCollectionList<T> pCollections = PCollectionList.empty(p); for (PValue input : inputs.values()) { PCollection<T> pcollection = (PCollection<T>) input; pCollections = pCollections.and(pcollection); } return pCollections; }
/** * Returns a singleton {@link PCollectionList} containing the given {@link PCollection}. * * <p>Longer {@link PCollectionList PCollectionLists} can be created by calling {@link #and} on * the result. */ public static <T> PCollectionList<T> of(PCollection<T> pc) { return new PCollectionList<T>(pc.getPipeline()).and(pc); }
@Test public void duplicatesInsertsMultipliers() { PTransform<PCollectionList<String>, PCollection<String>> replacement = new DeduplicatedFlattenFactory.FlattenWithoutDuplicateInputs<>(); final PCollectionList<String> inputList = PCollectionList.of(first).and(second).and(first).and(first); inputList.apply(replacement); pipeline.traverseTopologically( new Defaults() { @Override public void visitPrimitiveTransform(TransformHierarchy.Node node) { if (node.getTransform() instanceof Flatten.PCollections) { assertThat(node.getInputs(), not(equalTo(inputList.expand()))); } } }); }
@Test @Category(NeedsRunner.class) public void testOverride() { final PCollectionList<String> inputList = PCollectionList.of(first).and(second).and(first).and(first); PTransform<PCollectionList<String>, PCollection<String>> replacement = new FlattenWithoutDuplicateInputs<>(); PCollection<String> flattened = inputList.apply(replacement); PAssert.that(flattened).containsInAnyOrder("one", "two", "one", "one"); pipeline.run(); }
@Test public void testExpandWithDuplicates() { Pipeline p = TestPipeline.create(); PCollection<Long> createOne = p.apply("CreateOne", Create.of(1L, 2L, 3L)); PCollectionList<Long> list = PCollectionList.of(createOne).and(createOne).and(createOne); assertThat(list.expand().values(), containsInAnyOrder(createOne, createOne, createOne)); }
@Test public void outputMapping() { final PCollectionList<String> inputList = PCollectionList.of(first).and(second).and(first).and(first); PCollection<String> original = inputList.apply(Flatten.pCollections()); PCollection<String> replacement = inputList.apply(new FlattenWithoutDuplicateInputs<>()); assertThat( factory.mapOutputs(original.expand(), replacement), Matchers.hasEntry( replacement, ReplacementOutput.of( TaggedPValue.ofExpandedValue(original), TaggedPValue.ofExpandedValue(replacement)))); } }
@Override public PCollectionList<T> expand(PCollection<T> in) { final TupleTagList outputTags = partitionDoFn.getOutputTags(); PCollectionTuple outputs = in.apply(ParDo.of(partitionDoFn).withOutputTags(new TupleTag<Void>() {}, outputTags)); PCollectionList<T> pcs = PCollectionList.empty(in.getPipeline()); Coder<T> coder = in.getCoder(); for (TupleTag<?> outputTag : outputTags.getAll()) { // All the tuple tags are actually TupleTag<T> // And all the collections are actually PCollection<T> @SuppressWarnings("unchecked") TupleTag<T> typedOutputTag = (TupleTag<T>) outputTag; pcs = pcs.and(outputs.get(typedOutputTag).setCoder(coder)); } return pcs; }
@Test public void testMapOutputsMultipleOriginalOutputsFails() { PCollection<Integer> input = pipeline.apply(Create.of(1, 2, 3)); PCollection<Integer> output = input.apply("Map", MapElements.via(fn)); PCollection<Integer> reappliedOutput = input.apply("ReMap", MapElements.via(fn)); thrown.expect(IllegalArgumentException.class); factory.mapOutputs( PCollectionList.of(output).and(input).and(reappliedOutput).expand(), reappliedOutput); } }
@Test public void getEnvironmentWithEnvironment() { Pipeline p = Pipeline.create(); PCollection<Long> longs = p.apply("BoundedRead", Read.from(CountingSource.upTo(100L))); PCollectionList.of(longs).and(longs).and(longs).apply("flatten", Flatten.pCollections()); Components components = PipelineTranslation.toProto(p).getComponents(); QueryablePipeline qp = QueryablePipeline.forPrimitivesIn(components); PTransformNode environmentalRead = PipelineNode.pTransform("BoundedRead", components.getTransformsOrThrow("BoundedRead")); PTransformNode nonEnvironmentalTransform = PipelineNode.pTransform("flatten", components.getTransformsOrThrow("flatten")); assertThat(qp.getEnvironment(environmentalRead).isPresent(), is(true)); assertThat( qp.getEnvironment(environmentalRead).get(), equalTo(Environments.JAVA_SDK_HARNESS_ENVIRONMENT)); assertThat(qp.getEnvironment(nonEnvironmentalTransform).isPresent(), is(false)); }
@Test public void getInputNonEmptyThrows() { PCollectionList<Long> nonEmpty = PCollectionList.of(pipeline.apply("unbounded", GenerateSequence.from(0))) .and(pipeline.apply("bounded", GenerateSequence.from(0).to(100))); thrown.expect(IllegalArgumentException.class); thrown.expectMessage(nonEmpty.expand().toString()); thrown.expectMessage(EmptyFlattenAsCreateFactory.class.getSimpleName()); factory.getReplacementTransform( AppliedPTransform.of( "nonEmptyInput", nonEmpty.expand(), Collections.emptyMap(), Flatten.pCollections(), pipeline)); }
@Test @Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class}) public void testUnboundedSourceSplits() throws Exception { long numElements = 1000; int numSplits = 10; UnboundedSource<Long, ?> initial = CountingSource.unbounded(); List<? extends UnboundedSource<Long, ?>> splits = initial.split(numSplits, p.getOptions()); assertEquals("Expected exact splitting", numSplits, splits.size()); long elementsPerSplit = numElements / numSplits; assertEquals("Expected even splits", numElements, elementsPerSplit * numSplits); PCollectionList<Long> pcollections = PCollectionList.empty(p); for (int i = 0; i < splits.size(); ++i) { pcollections = pcollections.and( p.apply("split" + i, Read.from(splits.get(i)).withMaxNumRecords(elementsPerSplit))); } PCollection<Long> input = pcollections.apply(Flatten.pCollections()); addCountingAsserts(input, numElements); p.run(); }
@Test public void getValueToConsumersWithDuplicateInputSucceeds() { PCollection<String> created = p.apply(Create.of("1", "2", "3")); PCollection<String> flattened = PCollectionList.of(created).and(created).apply(Flatten.pCollections()); p.traverseTopologically(visitor); DirectGraph graph = visitor.getGraph(); AppliedPTransform<?, ?, ?> flattenedProducer = graph.getProducer(flattened); assertThat( graph.getPerElementConsumers(created), Matchers.containsInAnyOrder(new Object[] {flattenedProducer, flattenedProducer})); assertThat(graph.getPerElementConsumers(flattened), emptyIterable()); }
@Test public void testIncompatibleWindowFnPropagationFailure() { p.enableAbandonedNodeEnforcement(false); PCollection<String> input1 = p.apply("CreateInput1", Create.of("Input1")) .apply("Window1", Window.into(FixedWindows.of(Duration.standardMinutes(1)))); PCollection<String> input2 = p.apply("CreateInput2", Create.of("Input2")) .apply("Window2", Window.into(FixedWindows.of(Duration.standardMinutes(2)))); try { PCollectionList.of(input1).and(input2).apply(Flatten.pCollections()); Assert.fail("Exception should have been thrown"); } catch (IllegalStateException e) { Assert.assertTrue( e.getMessage().startsWith("Inputs to Flatten had incompatible window windowFns")); } }
@Test @Category({ValidatesRunner.class, UsesParDoLifecycle.class}) public void testFnCallSequence() { PCollectionList.of(p.apply("Impolite", Create.of(1, 2, 4))) .and(p.apply("Polite", Create.of(3, 5, 6, 7))) .apply(Flatten.pCollections()) .apply(ParDo.of(new CallSequenceEnforcingFn<>())); p.run(); }
@Test @Category({ValidatesRunner.class, UsesParDoLifecycle.class}) public void testFnCallSequenceMulti() { PCollectionList.of(p.apply("Impolite", Create.of(1, 2, 4))) .and(p.apply("Polite", Create.of(3, 5, 6, 7))) .apply(Flatten.pCollections()) .apply( ParDo.of(new CallSequenceEnforcingFn<Integer>()) .withOutputTags(new TupleTag<Integer>() {}, TupleTagList.empty())); p.run(); }
@Test @Category(ValidatesRunner.class) public void testMultipleApply() { PTransform<PCollection<? extends String>, PCollection<String>> myTransform = addSuffix("+"); PCollection<String> input = pipeline.apply(Create.of(ImmutableList.of("a", "b"))); PCollection<String> left = input.apply("Left1", myTransform).apply("Left2", myTransform); PCollection<String> right = input.apply("Right", myTransform); PCollection<String> both = PCollectionList.of(left).and(right).apply(Flatten.pCollections()); PAssert.that(both).containsInAnyOrder("a++", "b++", "a+", "b+"); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testComposeBasicSerializableFunction() throws Exception { PCollection<Integer> output = pipeline .apply(Create.of(1, 2, 3)) .apply( PTransform.compose( (PCollection<Integer> numbers) -> { PCollection<Integer> inverted = numbers.apply(MapElements.into(integers()).via(input -> -input)); return PCollectionList.of(numbers) .and(inverted) .apply(Flatten.pCollections()); })); PAssert.that(output).containsInAnyOrder(-2, -1, -3, 2, 1, 3); pipeline.run(); } }
@Test @Category({ValidatesRunner.class, UsesStatefulParDo.class, UsesParDoLifecycle.class}) public void testFnCallSequenceStateful() { PCollectionList.of(p.apply("Impolite", Create.of(KV.of("a", 1), KV.of("b", 2), KV.of("a", 4)))) .and( p.apply( "Polite", Create.of(KV.of("b", 3), KV.of("a", 5), KV.of("c", 6), KV.of("c", 7)))) .apply(Flatten.pCollections()) .apply( ParDo.of(new CallSequenceEnforcingStatefulFn<String, Integer>()) .withOutputTags(new TupleTag<KV<String, Integer>>() {}, TupleTagList.empty())); p.run(); }
@Test @Category(NeedsRunner.class) public void testEqualWindowFnPropagation() { PCollection<String> input1 = p.apply("CreateInput1", Create.of("Input1")) .apply("Window1", Window.into(FixedWindows.of(Duration.standardMinutes(1)))); PCollection<String> input2 = p.apply("CreateInput2", Create.of("Input2")) .apply("Window2", Window.into(FixedWindows.of(Duration.standardMinutes(1)))); PCollection<String> output = PCollectionList.of(input1).and(input2).apply(Flatten.pCollections()); p.run(); Assert.assertTrue( output .getWindowingStrategy() .getWindowFn() .isCompatible(FixedWindows.of(Duration.standardMinutes(1)))); }
@Test @Category(NeedsRunner.class) public void testCompatibleWindowFnPropagation() { PCollection<String> input1 = p.apply("CreateInput1", Create.of("Input1")) .apply("Window1", Window.into(Sessions.withGapDuration(Duration.standardMinutes(1)))); PCollection<String> input2 = p.apply("CreateInput2", Create.of("Input2")) .apply("Window2", Window.into(Sessions.withGapDuration(Duration.standardMinutes(2)))); PCollection<String> output = PCollectionList.of(input1).and(input2).apply(Flatten.pCollections()); p.run(); Assert.assertTrue( output .getWindowingStrategy() .getWindowFn() .isCompatible(Sessions.withGapDuration(Duration.standardMinutes(2)))); }