/** * Returns a {@code PTransform} that takes an input {@code PCollection<KV<K, Integer>>} and * returns a {@code PCollection<KV<K, Integer>>} that contains an output element mapping each * distinct key in the input {@code PCollection} to the sum of the values associated with that key * in the input {@code PCollection}. */ public static <K> Combine.PerKey<K, Integer, Integer> integersPerKey() { return Combine.perKey(Sum.ofIntegers()); }
/** * Returns a {@code PTransform} that takes an input {@code PCollection<Integer>} and returns a * {@code PCollection<Integer>} whose contents is the sum of the input {@code PCollection}'s * elements, or {@code 0} if there are no elements. */ public static Combine.Globally<Integer, Integer> integersGlobally() { return Combine.globally(Sum.ofIntegers()); }
/** {@link CombineFn} for Sum based on {@link Sum} and {@link Combine.BinaryCombineFn}. */ static CombineFn createSum(Schema.TypeName fieldType) { switch (fieldType) { case INT32: return Sum.ofIntegers(); case INT16: return new ShortSum(); case BYTE: return new ByteSum(); case INT64: return Sum.ofLongs(); case FLOAT: return new FloatSum(); case DOUBLE: return Sum.ofDoubles(); case DECIMAL: return new BigDecimalSum(); default: throw new UnsupportedOperationException( String.format("[%s] is not support in SUM", fieldType)); } }
@Test public void testGoodStateParameterSuperclassStateType() throws Exception { DoFnSignatures.getSignature( new DoFn<KV<String, Integer>, Long>() { @StateId("my-id") private final StateSpec<CombiningState<Integer, int[], Integer>> state = StateSpecs.combining(Sum.ofIntegers()); @ProcessElement public void myProcessElement( ProcessContext context, @StateId("my-id") GroupingState<Integer, Integer> groupingState) {} }.getClass()); }
@Test public void usesMatcher() { final AtomicBoolean matcherUsed = new AtomicBoolean(); Matcher<Integer> matcher = new TypeSafeMatcher<Integer>() { @Override public void describeTo(Description description) {} @Override protected boolean matchesSafely(Integer item) { matcherUsed.set(true); return item == 30; } }; CombineFnTester.testCombineFn( Sum.ofIntegers(), Arrays.asList(1, 1, 2, 2, 3, 3, 4, 4, 5, 5), matcher); assertThat(matcherUsed.get(), is(true)); try { CombineFnTester.testCombineFn( Sum.ofIntegers(), Arrays.asList(1, 2, 3, 4, 5), Matchers.not(Matchers.equalTo(15))); } catch (AssertionError ignored) { // Success! Return to avoid the call to fail(); return; } fail("The matcher should have failed, throwing an error"); } }
@Test public void testSumIntegerFn() { testCombineFn(Sum.ofIntegers(), Lists.newArrayList(1, 2, 3, 4), 10); }
@Test public void testToFnWithContextIdempotent() throws Exception { CombineFnWithContext<Integer, int[], Integer> fnWithContext = CombineFnUtil.toFnWithContext(Sum.ofIntegers()); assertTrue(fnWithContext == CombineFnUtil.toFnWithContext(fnWithContext)); }
@Test public void testToFnWithContext() throws Exception { CombineFnWithContext<Integer, int[], Integer> fnWithContext = CombineFnUtil.toFnWithContext(Sum.ofIntegers()); List<Integer> inputs = ImmutableList.of(1, 2, 3, 4); Context nullContext = CombineContextFactory.nullContext(); int[] accum = fnWithContext.createAccumulator(nullContext); for (Integer i : inputs) { accum = fnWithContext.addInput(accum, i, nullContext); } assertEquals(10, fnWithContext.extractOutput(accum, nullContext).intValue()); } }
@Parameters(name = "{index}: {0}") public static Iterable<Combine.CombineFn<Integer, ?, ?>> params() { BinaryCombineIntegerFn sum = Sum.ofIntegers(); CombineFn<Integer, ?, Long> count = Count.combineFn(); TestCombineFn test = new TestCombineFn(); return ImmutableList.<CombineFn<Integer, ?, ?>>builder() .add(sum) .add(count) .add(test) .build(); }
@Test public void testOnElementCombiningDiscarding() throws Exception { // Test basic execution of a trigger using a non-combining window set and discarding mode. WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(10))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.DISCARDING_FIRED_PANES) .withAllowedLateness(Duration.millis(100)); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining( strategy, mockTriggerStateMachine, Sum.ofIntegers(), VarIntCoder.of()); injectElement(tester, 2); when(mockTriggerStateMachine.shouldFire(anyTriggerContext())).thenReturn(true); injectElement(tester, 3); when(mockTriggerStateMachine.shouldFire(anyTriggerContext())).thenReturn(true); triggerShouldFinish(mockTriggerStateMachine); injectElement(tester, 4); // This element shouldn't be seen, because the trigger has finished injectElement(tester, 6); assertThat( tester.extractOutput(), contains( isSingleWindowedValue(equalTo(5), 2, 0, 10), isSingleWindowedValue(equalTo(4), 4, 0, 10))); assertTrue(tester.isMarkedFinished(firstWindow)); tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow); }
@Test public void testOnElementCombiningAccumulating() throws Exception { // Test basic execution of a trigger using a non-combining window set and accumulating mode. WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(10))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.millis(100)); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining( strategy, mockTriggerStateMachine, Sum.ofIntegers(), VarIntCoder.of()); injectElement(tester, 1); when(mockTriggerStateMachine.shouldFire(anyTriggerContext())).thenReturn(true); injectElement(tester, 2); when(mockTriggerStateMachine.shouldFire(anyTriggerContext())).thenReturn(true); triggerShouldFinish(mockTriggerStateMachine); injectElement(tester, 3); // This element shouldn't be seen, because the trigger has finished injectElement(tester, 4); assertThat( tester.extractOutput(), contains( isSingleWindowedValue(equalTo(3), 1, 0, 10), isSingleWindowedValue(equalTo(6), 3, 0, 10))); assertTrue(tester.isMarkedFinished(firstWindow)); tester.assertHasOnlyGlobalAndFinishedSetsFor(firstWindow); }
@Test public void testIntegerStats() { for (TestCase<Integer> t : INTEGER_CASES) { assertEquals(t.sum, Sum.ofIntegers().apply(t.data)); assertEquals(t.min, Min.ofIntegers().apply(t.data)); assertEquals(t.max, Max.ofIntegers().apply(t.data)); assertEquals(t.mean, Mean.<Integer>of().apply(t.data)); } }
/** * Tests that if end-of-window and GC timers come in together, that the pane is correctly marked * as final. */ @Test public void testCombiningAccumulatingEventTime() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.millis(1)) .withTrigger(Repeatedly.forever(AfterWatermark.pastEndOfWindow())); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100 injectElement(tester, 5); tester.advanceInputWatermark(new Instant(1000)); assertThat( tester.extractOutput(), contains( isSingleWindowedValue( equalTo(7), 2, 0, 100, PaneInfo.createPane(true, true, Timing.ON_TIME, 0, 0)))); }
@Test @Category({ValidatesRunner.class, UsesStatefulParDo.class}) public void testCombiningStateParameterSuperclass() { final String stateId = "foo"; DoFn<KV<Integer, Integer>, String> fn = new DoFn<KV<Integer, Integer>, String>() { private static final int EXPECTED_SUM = 8; @StateId(stateId) private final StateSpec<CombiningState<Integer, int[], Integer>> state = StateSpecs.combining(Sum.ofIntegers()); @ProcessElement public void processElement( @Element KV<Integer, Integer> element, @StateId(stateId) GroupingState<Integer, Integer> state, OutputReceiver<String> r) { state.add(element.getValue()); Integer currentValue = state.read(); if (currentValue == EXPECTED_SUM) { r.output("right on"); } } }; PCollection<String> output = pipeline .apply(Create.of(KV.of(123, 4), KV.of(123, 7), KV.of(123, -3))) .apply(ParDo.of(fn)); // There should only be one moment at which the sum is exactly 8 PAssert.that(output).containsInAnyOrder("right on"); pipeline.run(); }
/** * Tests that when a processing time timers comes in after a window is expired and GC'd it does * not cause a spurious output. */ @Test public void testCombiningAccumulatingProcessingTimeSeparateBundles() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.ZERO) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); tester.advanceProcessingTime(new Instant(5000)); injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100 injectElement(tester, 5); tester.advanceInputWatermark(new Instant(100)); tester.advanceProcessingTime(new Instant(5011)); assertThat( tester.extractOutput(), contains( isSingleWindowedValue( equalTo(7), 2, 0, 100, PaneInfo.createPane(true, true, Timing.ON_TIME, 0, 0)))); }
/** Tests that a processing time timer does not cause window GC. */ @Test public void testProcessingTimeTimerDoesNotGc() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.ZERO) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); tester.advanceProcessingTime(new Instant(5000)); injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100 injectElement(tester, 5); tester.advanceProcessingTime(new Instant(10000)); tester.assertHasOnlyGlobalAndStateFor(new IntervalWindow(new Instant(0), new Instant(100))); assertThat( tester.extractOutput(), contains( isSingleWindowedValue( equalTo(7), 2, 0, 100, PaneInfo.createPane(true, false, Timing.EARLY, 0, 0)))); }
@Test @Category(ValidatesRunner.class) public void testCombinedMapSideInput() { final PCollectionView<Map<String, Integer>> view = pipeline .apply("CreateSideInput", Create.of(KV.of("a", 1), KV.of("a", 20), KV.of("b", 3))) .apply("SumIntegers", Combine.perKey(Sum.ofIntegers())) .apply(View.asMap()); PCollection<KV<String, Integer>> output = pipeline .apply("CreateMainInput", Create.of("apple", "banana", "blackberry")) .apply( "Output", ParDo.of( new DoFn<String, KV<String, Integer>>() { @ProcessElement public void processElement(ProcessContext c) { c.output( KV.of( c.element(), c.sideInput(view).get(c.element().substring(0, 1)))); } }) .withSideInputs(view)); PAssert.that(output) .containsInAnyOrder(KV.of("apple", 21), KV.of("banana", 3), KV.of("blackberry", 3)); pipeline.run(); }
/** * Tests that when a processing time timer comes in after a window is expired it is just ignored. */ @Test public void testLateProcessingTimeTimer() throws Exception { WindowingStrategy<?, IntervalWindow> strategy = WindowingStrategy.of((WindowFn<?, IntervalWindow>) FixedWindows.of(Duration.millis(100))) .withTimestampCombiner(TimestampCombiner.EARLIEST) .withMode(AccumulationMode.ACCUMULATING_FIRED_PANES) .withAllowedLateness(Duration.ZERO) .withTrigger( Repeatedly.forever( AfterProcessingTime.pastFirstElementInPane().plusDelayOf(Duration.millis(10)))); ReduceFnTester<Integer, Integer, IntervalWindow> tester = ReduceFnTester.combining(strategy, Sum.ofIntegers(), VarIntCoder.of()); tester.advanceProcessingTime(new Instant(5000)); injectElement(tester, 2); // processing timer @ 5000 + 10; EOW timer @ 100 injectElement(tester, 5); // After this advancement, the window is expired and only the GC process // should be allowed to touch it tester.advanceInputWatermarkNoTimers(new Instant(100)); // This should not output tester.advanceProcessingTime(new Instant(6000)); assertThat(tester.extractOutput(), emptyIterable()); }
@Test public void testGetAccumulatorCoderEquals() { Combine.BinaryCombineIntegerFn sumIntegerFn = Sum.ofIntegers(); assertEquals( sumIntegerFn.getAccumulatorCoder(STANDARD_REGISTRY, VarIntCoder.of()), sumIntegerFn.getAccumulatorCoder(STANDARD_REGISTRY, VarIntCoder.of())); assertNotEquals( sumIntegerFn.getAccumulatorCoder(STANDARD_REGISTRY, VarIntCoder.of()), sumIntegerFn.getAccumulatorCoder(STANDARD_REGISTRY, BigEndianIntegerCoder.of())); Combine.BinaryCombineLongFn sumLongFn = Sum.ofLongs(); assertEquals( sumLongFn.getAccumulatorCoder(STANDARD_REGISTRY, VarLongCoder.of()), sumLongFn.getAccumulatorCoder(STANDARD_REGISTRY, VarLongCoder.of())); assertNotEquals( sumLongFn.getAccumulatorCoder(STANDARD_REGISTRY, VarLongCoder.of()), sumLongFn.getAccumulatorCoder(STANDARD_REGISTRY, BigEndianLongCoder.of())); Combine.BinaryCombineDoubleFn sumDoubleFn = Sum.ofDoubles(); assertEquals( sumDoubleFn.getAccumulatorCoder(STANDARD_REGISTRY, DoubleCoder.of()), sumDoubleFn.getAccumulatorCoder(STANDARD_REGISTRY, DoubleCoder.of())); } }
@Test @Category(NeedsRunner.class) public void testGloballyWithSchemaAggregateFn() { Collection<AggregatePojos> elements = ImmutableList.of( new AggregatePojos(1, 1, 2), new AggregatePojos(2, 1, 3), new AggregatePojos(3, 2, 4), new AggregatePojos(4, 2, 5)); PCollection<Row> aggregate = pipeline .apply(Create.of(elements)) .apply( Group.<AggregatePojos>globally() .aggregateField("field1", Sum.ofLongs(), "field1_sum") .aggregateField("field3", Sum.ofIntegers(), "field3_sum") .aggregateField("field1", Top.largestLongsFn(1), "field1_top")); Schema aggregateSchema = Schema.builder() .addInt64Field("field1_sum") .addInt32Field("field3_sum") .addArrayField("field1_top", FieldType.INT64) .build(); Row expectedRow = Row.withSchema(aggregateSchema).addValues(10L, 14).addArray(4L).build(); PAssert.that(aggregate).containsInAnyOrder(expectedRow); pipeline.run(); }