/** Traverse the pipeline and return the first {@link Combine.GroupedValues} found. */ private static AppliedPTransform<?, ?, ?> getCombineGroupedValuesFrom(TestPipeline pipeline) { final AppliedPTransform<?, ?, ?>[] transform = new AppliedPTransform<?, ?, ?>[1]; pipeline.traverseTopologically( new Pipeline.PipelineVisitor.Defaults() { @Override public CompositeBehavior enterCompositeTransform(TransformHierarchy.Node node) { if (!node.isRootNode() && node.toAppliedPTransform(getPipeline()) .getTransform() .getClass() .equals(Combine.GroupedValues.class)) { transform[0] = node.toAppliedPTransform(getPipeline()); return CompositeBehavior.DO_NOT_ENTER_TRANSFORM; } return CompositeBehavior.ENTER_TRANSFORM; } }); return transform[0]; }
private void validateConsumed() { final Set<PValue> consumedOutputs = new HashSet<>(); final Set<PValue> allReadOutputs = new HashSet<>(); pipeline.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void visitPrimitiveTransform(Node node) { consumedOutputs.addAll(node.getInputs().values()); } @Override public void visitValue(PValue value, Node producer) { if (producer.getTransform() instanceof Read.Bounded || producer.getTransform() instanceof Read.Unbounded) { allReadOutputs.add(value); } } }); assertThat(consumedOutputs, Matchers.hasItems(allReadOutputs.toArray(new PValue[0]))); } }
@Test public void traverseMultipleTimesThrows() { p.apply(Create.of(1, 2, 3)); p.traverseTopologically(visitor); thrown.expect(IllegalStateException.class); thrown.expectMessage(DirectGraphVisitor.class.getSimpleName()); thrown.expectMessage("is finalized"); p.traverseTopologically(visitor); }
@Test(expected = IllegalArgumentException.class) public void testOutputChecking() throws Exception { p.enableAbandonedNodeEnforcement(false); p.apply(new InvalidCompositeTransform()); p.traverseTopologically(new Pipeline.PipelineVisitor.Defaults() {}); }
@Test public void traverseIndependentPathsSucceeds() { p.apply("left", Create.of(1, 2, 3)); p.apply("right", Create.of("foo", "bar", "baz")); p.traverseTopologically(visitor); }
@Test public void noInputUnkeyedOutput() { PCollection<KV<Integer, Iterable<Void>>> unkeyed = p.apply( Create.of(KV.<Integer, Iterable<Void>>of(-1, Collections.emptyList())) .withCoder(KvCoder.of(VarIntCoder.of(), IterableCoder.of(VoidCoder.of())))); p.traverseTopologically(visitor); assertThat(visitor.getKeyedPValues(), not(hasItem(unkeyed))); }
@Test public void keyedInputWithoutKeyPreserving() { PCollection<KV<String, Iterable<Integer>>> onceKeyed = p.apply(Create.of(KV.of("hello", 42))) .apply(GroupByKey.create()) .apply(ParDo.of(new IdentityFn<>())); p.traverseTopologically(visitor); assertThat(visitor.getKeyedPValues(), not(hasItem(onceKeyed))); }
/** * With {@link #testWindowIntoNullWindowFnNoAssign()}, demonstrates that the expansions of the * {@link Window} transform depends on if it actually assigns elements to windows. */ @Test public void testWindowIntoWindowFnAssign() { pipeline .apply(Create.of(1, 2, 3)) .apply( Window.into(FixedWindows.of(Duration.standardMinutes(11L).plus(Duration.millis(1L))))); final AtomicBoolean foundAssign = new AtomicBoolean(false); pipeline.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void visitPrimitiveTransform(TransformHierarchy.Node node) { if (node.getTransform() instanceof Window.Assign) { foundAssign.set(true); } } }); assertThat(foundAssign.get(), is(true)); }
@Test public void traverseMultipleTimesThrows() { p.apply( Create.of(KV.of(1, (Void) null), KV.of(2, (Void) null), KV.of(3, (Void) null)) .withCoder(KvCoder.of(VarIntCoder.of(), VoidCoder.of()))) .apply(GroupByKey.create()) .apply(Keys.create()); p.traverseTopologically(visitor); thrown.expect(IllegalStateException.class); thrown.expectMessage("already been finalized"); thrown.expectMessage(KeyedPValueTrackingVisitor.class.getSimpleName()); p.traverseTopologically(visitor); }
@Test public void testToFromProto() throws InvalidProtocolBufferException { pipeline.apply(GenerateSequence.from(0)).apply(Window.<Long>into((WindowFn) windowFn)); final AtomicReference<AppliedPTransform<?, ?, Assign<?>>> assign = new AtomicReference<>(null); pipeline.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void visitPrimitiveTransform(Node node) { if (node.getTransform() instanceof Window.Assign) { checkState(assign.get() == null); assign.set( (AppliedPTransform<?, ?, Assign<?>>) node.toAppliedPTransform(getPipeline())); } } }); checkState(assign.get() != null); SdkComponents components = SdkComponents.create(); components.registerEnvironment(Environments.createDockerEnvironment("java")); WindowIntoPayload payload = WindowIntoTranslation.toProto(assign.get().getTransform(), components); assertEquals(windowFn, WindowingStrategyTranslation.windowFnFromProto(payload.getWindowFn())); }
@Test public void duplicatesInsertsMultipliers() { PTransform<PCollectionList<String>, PCollection<String>> replacement = new DeduplicatedFlattenFactory.FlattenWithoutDuplicateInputs<>(); final PCollectionList<String> inputList = PCollectionList.of(first).and(second).and(first).and(first); inputList.apply(replacement); pipeline.traverseTopologically( new Defaults() { @Override public void visitPrimitiveTransform(TransformHierarchy.Node node) { if (node.getTransform() instanceof Flatten.PCollections) { assertThat(node.getInputs(), not(equalTo(inputList.expand()))); } } }); }
@Test public void getRootTransformsContainsEmptyFlatten() { PCollections<String> flatten = Flatten.pCollections(); PCollectionList<String> emptyList = PCollectionList.empty(p); PCollection<String> empty = emptyList.apply(flatten); empty.setCoder(StringUtf8Coder.of()); p.traverseTopologically(visitor); DirectGraph graph = visitor.getGraph(); assertThat( graph.getRootTransforms(), Matchers.containsInAnyOrder(new Object[] {graph.getProducer(empty)})); AppliedPTransform<?, ?, ?> onlyRoot = Iterables.getOnlyElement(graph.getRootTransforms()); assertThat((Object) onlyRoot.getTransform(), equalTo(flatten)); assertThat(onlyRoot.getInputs().entrySet(), emptyIterable()); assertThat(onlyRoot.getOutputs(), equalTo(empty.expand())); }
@Test public void groupByKeyProducesKeyedOutput() { PCollection<KV<String, Iterable<Integer>>> keyed = p.apply(Create.of(KV.of("foo", 3))) .apply(new DirectGroupByKeyOnly<>()) .apply( new DirectGroupAlsoByWindow<>( WindowingStrategy.globalDefault(), WindowingStrategy.globalDefault())); p.traverseTopologically(visitor); assertThat(visitor.getKeyedPValues(), hasItem(keyed)); }
@Before public void setup() { created = p.apply(Create.empty(ByteArrayCoder.of())); transformed = created.apply(ParDo.of(new IdentityDoFn<>())); DirectGraphVisitor visitor = new DirectGraphVisitor(); p.traverseTopologically(visitor); factory = ImmutabilityCheckingBundleFactory.create( ImmutableListBundleFactory.create(), visitor.getGraph()); }
@Test public void getValueToConsumersWithDuplicateInputSucceeds() { PCollection<String> created = p.apply(Create.of("1", "2", "3")); PCollection<String> flattened = PCollectionList.of(created).and(created).apply(Flatten.pCollections()); p.traverseTopologically(visitor); DirectGraph graph = visitor.getGraph(); AppliedPTransform<?, ?, ?> flattenedProducer = graph.getProducer(flattened); assertThat( graph.getPerElementConsumers(created), Matchers.containsInAnyOrder(new Object[] {flattenedProducer, flattenedProducer})); assertThat(graph.getPerElementConsumers(flattened), emptyIterable()); }
@Test public void doesNotConsumeAlreadyConsumedRead() { Unbounded<Long> transform = Read.from(CountingSource.unbounded()); final PCollection<Long> output = pipeline.apply(transform); final Flatten.PCollections<Long> consumer = Flatten.pCollections(); PCollectionList.of(output).apply(consumer); UnconsumedReads.ensureAllReadsConsumed(pipeline); pipeline.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void visitPrimitiveTransform(Node node) { // The output should only be consumed by a single consumer if (node.getInputs().values().contains(output)) { assertThat(node.getTransform(), Matchers.is(consumer)); } } }); }
@Test public void getRootTransformsContainsRootTransforms() { PCollection<String> created = p.apply(Create.of("foo", "bar")); PCollection<Long> counted = p.apply(Read.from(CountingSource.upTo(1234L))); PCollection<Long> unCounted = p.apply(GenerateSequence.from(0)); p.traverseTopologically(visitor); DirectGraph graph = visitor.getGraph(); assertThat(graph.getRootTransforms(), hasSize(3)); assertThat( graph.getRootTransforms(), Matchers.containsInAnyOrder( new Object[] { graph.getProducer(created), graph.getProducer(counted), graph.getProducer(unCounted) })); for (AppliedPTransform<?, ?, ?> root : graph.getRootTransforms()) { // Root transforms will have no inputs assertThat(root.getInputs().entrySet(), emptyIterable()); assertThat( Iterables.getOnlyElement(root.getOutputs().values()), Matchers.<POutput>isOneOf(created, counted, unCounted)); } }
/** * With {@link #testWindowIntoWindowFnAssign()}, demonstrates that the expansions of the {@link * Window} transform depends on if it actually assigns elements to windows. */ @Test public void testWindowIntoNullWindowFnNoAssign() { pipeline .apply(Create.of(1, 2, 3)) .apply( Window.<Integer>configure() .triggering(AfterWatermark.pastEndOfWindow()) .withAllowedLateness(Duration.ZERO) .accumulatingFiredPanes()); pipeline.traverseTopologically( new PipelineVisitor.Defaults() { @Override public void visitPrimitiveTransform(TransformHierarchy.Node node) { assertThat(node.getTransform(), not(instanceOf(Window.Assign.class))); } }); }
@Test public void unkeyedInputWithKeyPreserving() { PCollection<KV<String, Iterable<WindowedValue<KV<String, Integer>>>>> input = p.apply( Create.of( KV.of( "hello", (Iterable<WindowedValue<KV<String, Integer>>>) Collections.<WindowedValue<KV<String, Integer>>>emptyList())) .withCoder( KvCoder.of( StringUtf8Coder.of(), IterableCoder.of( WindowedValue.getValueOnlyCoder( KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())))))); PCollection<KeyedWorkItem<String, KV<String, Integer>>> unkeyed = input .apply(ParDo.of(new ParDoMultiOverrideFactory.ToKeyedWorkItem<>())) .setCoder( KeyedWorkItemCoder.of( StringUtf8Coder.of(), KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of()), GlobalWindow.Coder.INSTANCE)); p.traverseTopologically(visitor); assertThat(visitor.getKeyedPValues(), not(hasItem(unkeyed))); }
@Test public void getViewsReturnsViews() { PCollectionView<List<String>> listView = p.apply("listCreate", Create.of("foo", "bar")) .apply( ParDo.of( new DoFn<String, String>() { @ProcessElement public void processElement(DoFn<String, String>.ProcessContext c) throws Exception { c.output(Integer.toString(c.element().length())); } })) .apply(View.asList()); PCollectionView<Object> singletonView = p.apply("singletonCreate", Create.<Object>of(1, 2, 3)).apply(View.asSingleton()); p.replaceAll( DirectRunner.fromOptions(TestPipeline.testingPipelineOptions()) .defaultTransformOverrides()); p.traverseTopologically(visitor); assertThat(visitor.getGraph().getViews(), Matchers.containsInAnyOrder(listView, singletonView)); }