@Test @Category({ValidatesRunner.class, DataflowPortabilityApiUnsupported.class}) @SuppressWarnings({"rawtypes", "unchecked"}) public void testSimpleCombine() { runTestSimpleCombine( Arrays.asList(KV.of("a", 1), KV.of("a", 1), KV.of("a", 4), KV.of("b", 1), KV.of("b", 13)), 20, Arrays.asList(KV.of("a", "114"), KV.of("b", "113"))); }
/** Tests creation of a per-key {@link Combine} via a Java 8 method reference. */ @Test @Category(ValidatesRunner.class) public void testCombinePerKeyInstanceMethodReference() { PCollection<KV<String, Integer>> output = pipeline .apply(Create.of(KV.of("a", 1), KV.of("b", 2), KV.of("a", 3), KV.of("c", 4))) .apply(Combine.perKey(new Summer()::sum)); PAssert.that(output).containsInAnyOrder(KV.of("a", 4), KV.of("b", 2), KV.of("c", 4)); pipeline.run(); }
@Test public void countAssertsSucceeds() { PCollection<Integer> create = pipeline.apply("FirstCreate", Create.of(1, 2, 3)); PAssert.that(create).containsInAnyOrder(1, 2, 3); PAssert.thatSingleton(create.apply(Sum.integersGlobally())).isEqualTo(6); PAssert.thatMap(pipeline.apply("CreateMap", Create.of(KV.of(1, 2)))) .isEqualTo(Collections.singletonMap(1, 2)); assertThat(PAssert.countAsserts(pipeline), equalTo(3)); }
@Test public void testSingleOutputOverrideNonCrashing() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); options.setRunner(DataflowRunner.class); Pipeline pipeline = Pipeline.create(options); DummyStatefulDoFn fn = new DummyStatefulDoFn(); pipeline.apply(Create.of(KV.of(1, 2))).apply(ParDo.of(fn)); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn)); }
@Test public void noInputUnkeyedOutput() { PCollection<KV<Integer, Iterable<Void>>> unkeyed = p.apply( Create.of(KV.<Integer, Iterable<Void>>of(-1, Collections.emptyList())) .withCoder(KvCoder.of(VarIntCoder.of(), IterableCoder.of(VoidCoder.of())))); p.traverseTopologically(visitor); assertThat(visitor.getKeyedPValues(), not(hasItem(unkeyed))); }
/** Creates a simple pipeline with a {@link Combine.GroupedValues}. */ private static TestPipeline createCombineGroupedValuesPipeline() { TestPipeline pipeline = TestPipeline.create().enableAbandonedNodeEnforcement(false); PCollection<KV<String, Integer>> input = pipeline .apply(Create.of(KV.of("key", 1))) .setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())); input.apply(GroupByKey.create()).apply(Combine.groupedValues(new SumCombineFn())); return pipeline; }
@Test @Category(NeedsRunner.class) @SuppressWarnings("unchecked") public void testCountPerElementBasic() { PCollection<String> input = p.apply(Create.of(WORDS)); PCollection<KV<String, Long>> output = input.apply(Count.perElement()); PAssert.that(output) .containsInAnyOrder( KV.of("hi", 4L), KV.of("there", 1L), KV.of("sue", 2L), KV.of("bob", 2L), KV.of("", 3L), KV.of("ZOW", 1L)); p.run(); }
@Test @Category(ValidatesRunner.class) @SuppressWarnings({"rawtypes", "unchecked"}) public void testSimpleCombineWithContext() { runTestSimpleCombineWithContext( Arrays.asList(KV.of("a", 1), KV.of("a", 1), KV.of("a", 4), KV.of("b", 1), KV.of("b", 13)), 20, Arrays.asList(KV.of("a", "20:114"), KV.of("b", "20:113")), new String[] {"20:111134"}); }
@Test public void testFnApiSingleOutputOverrideNonCrashing() throws Exception { DataflowPipelineOptions options = buildPipelineOptions("--experiments=beam_fn_api"); options.setRunner(DataflowRunner.class); Pipeline pipeline = Pipeline.create(options); DummyStatefulDoFn fn = new DummyStatefulDoFn(); pipeline.apply(Create.of(KV.of(1, 2))).apply(ParDo.of(fn)); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn)); }
@Test public void countAssertsMultipleCallsIndependent() { PCollection<Integer> create = pipeline.apply("FirstCreate", Create.of(1, 2, 3)); PAssert.that(create).containsInAnyOrder(1, 2, 3); PAssert.thatSingleton(create.apply(Sum.integersGlobally())).isEqualTo(6); assertThat(PAssert.countAsserts(pipeline), equalTo(2)); PAssert.thatMap(pipeline.apply("CreateMap", Create.of(KV.of(1, 2)))) .isEqualTo(Collections.singletonMap(1, 2)); assertThat(PAssert.countAsserts(pipeline), equalTo(3)); } }
/** Creates a simple pipeline with a {@link Combine.PerKey} with side inputs. */ private static TestPipeline createCombinePerKeyWithSideInputsPipeline() { TestPipeline pipeline = TestPipeline.create().enableAbandonedNodeEnforcement(false); PCollection<KV<String, Integer>> input = pipeline .apply(Create.of(KV.of("key", 1))) .setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())); PCollection<String> sideInput = pipeline.apply(Create.of("side input")); PCollectionView<String> sideInputView = sideInput.apply(View.asSingleton()); input.apply( Combine.<String, Integer, Integer>perKey(new SumCombineFnWithContext()) .withSideInputs(sideInputView)); return pipeline; }
/** Tests creation of a per-key {@link Combine} via a Java 8 lambda. */ @Test @Category(ValidatesRunner.class) public void testCombinePerKeyLambda() { PCollection<KV<String, Integer>> output = pipeline .apply(Create.of(KV.of("a", 1), KV.of("b", 2), KV.of("a", 3), KV.of("c", 4))) .apply( Combine.perKey( integers -> { int sum = 0; for (int i : integers) { sum += i; } return sum; })); PAssert.that(output).containsInAnyOrder(KV.of("a", 4), KV.of("b", 2), KV.of("c", 4)); pipeline.run(); }
@Test public void testKvMatcherGBKLikeFailure() throws Exception { AssertionError exc = assertionShouldFail( () -> assertThat( KV.of("key", ImmutableList.of(1, 2, 3)), SerializableMatchers.<String, Iterable<Integer>>kv( anything(), containsInAnyOrder(1, 2, 3, 4)))); assertThat(exc.getMessage(), Matchers.containsString("value did not match")); }
@Test public void testMultiOutputOverrideNonCrashing() throws Exception { DataflowPipelineOptions options = buildPipelineOptions(); options.setRunner(DataflowRunner.class); Pipeline pipeline = Pipeline.create(options); TupleTag<Integer> mainOutputTag = new TupleTag<Integer>() {}; TupleTag<Integer> sideOutputTag = new TupleTag<Integer>() {}; DummyStatefulDoFn fn = new DummyStatefulDoFn(); pipeline .apply(Create.of(KV.of(1, 2))) .apply(ParDo.of(fn).withOutputTags(mainOutputTag, TupleTagList.of(sideOutputTag))); DataflowRunner runner = DataflowRunner.fromOptions(options); runner.replaceTransforms(pipeline); assertThat(findBatchStatefulDoFn(pipeline), equalTo((DoFn) fn)); }
/** Creates a simple pipeline with a {@link Combine.PerKey}. */ private static TestPipeline createCombinePerKeyPipeline() { TestPipeline pipeline = TestPipeline.create().enableAbandonedNodeEnforcement(false); PCollection<KV<String, Integer>> input = pipeline .apply(Create.of(KV.of("key", 1))) .setCoder(KvCoder.of(StringUtf8Coder.of(), VarIntCoder.of())); input.apply(Combine.perKey(new SumCombineFn())); return pipeline; }
@Test @Category(ValidatesRunner.class) public void testCreateWithKVVoidType() throws Exception { PCollection<KV<Void, Void>> output = p.apply(Create.of(KV.of((Void) null, (Void) null), KV.of((Void) null, (Void) null))); PAssert.that(output) .containsInAnyOrder(KV.of((Void) null, (Void) null), KV.of((Void) null, (Void) null)); p.run(); }
@Test public void testStructuralKeyEquality() { MultimapView<byte[], Integer> view = InMemoryMultimapSideInputView.fromIterable( ByteArrayCoder.of(), ImmutableList.of(KV.of(new byte[] {0x00}, 0), KV.of(new byte[] {0x01}, 1))); assertEquals(view.get(new byte[] {0x00}), ImmutableList.of(0)); assertEquals(view.get(new byte[] {0x01}), ImmutableList.of(1)); assertEquals(view.get(new byte[] {0x02}), ImmutableList.of()); }