kv("k3", 0))); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(0L), Duration.millis(5L))) .satisfies( containsKvs( kv("k1", 3), kv("k5", Integer.MIN_VALUE, Integer.MAX_VALUE), kv("k2", 66))); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(5L), Duration.millis(5L))) .satisfies(containsKvs(kv("k1", 4), kv("k2", -33), kv("k3", 0)));
@Test @Category(NeedsRunner.class) public void testSampleAny() { PCollection<Integer> input = pipeline .apply( Create.timestamped(ImmutableList.of(tv(0), tv(1), tv(2), tv(3), tv(4), tv(5))) .withCoder(BigEndianIntegerCoder.of())) .apply(Window.into(FixedWindows.of(Duration.standardSeconds(3)))); PCollection<Integer> output = input.apply(Sample.any(2)); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(0), Duration.standardSeconds(3))) .satisfies(new VerifyCorrectSample<>(2, Arrays.asList(0, 1, 2))); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(3000), Duration.standardSeconds(3))) .satisfies(new VerifyCorrectSample<>(2, Arrays.asList(3, 4, 5))); pipeline.run(); }
.inWindow(window) .containsInAnyOrder(KV.of(redTeam, 7), KV.of(blueTeam, 18));
.apply(Distinct.create()); PAssert.that(distinctValues) .inWindow(new IntervalWindow(base, base.plus(Duration.standardSeconds(30)))) .containsInAnyOrder("k1", "k2", "k3"); PAssert.that(distinctValues) .inWindow( new IntervalWindow( base.plus(Duration.standardSeconds(30)), base.plus(Duration.standardSeconds(60)))) .containsInAnyOrder("k1", "k2", "k3"); PAssert.that(distinctValues) .inWindow( new IntervalWindow( base.plus(Duration.standardSeconds(60)), base.plus(Duration.standardSeconds(90))))
.inWindow(WindowOddEvenBuckets.EVEN_WINDOW) .containsInAnyOrder(0L, 2L, 4L, 6L, 8L); PAssert.that(initialWindows) .inWindow(WindowOddEvenBuckets.ODD_WINDOW) .containsInAnyOrder(1L, 3L, 5L, 7L, 9L); .inWindow(WindowOddEvenBuckets.EVEN_WINDOW) .containsInAnyOrder(true, true, true, true, true); PAssert.that(upOne) .inWindow(WindowOddEvenBuckets.ODD_WINDOW) .containsInAnyOrder(false, false, false, false, false);
.inWindow(new IntervalWindow(new Instant(0L), Duration.millis(6L))) .containsInAnyOrder(KV.of("foo", 1L), KV.of("bar", 6L), KV.of("bizzle", 3L)); PAssert.that("Elements should appear in all the windows they are assigned to", combined) .inWindow(new IntervalWindow(new Instant(-3L), Duration.millis(6L))) .containsInAnyOrder(KV.of("foo", 1L), KV.of("bar", 2L)); PAssert.that(combined) .inWindow(new IntervalWindow(new Instant(6L), Duration.millis(6L))) .containsInAnyOrder(KV.of("bizzle", 11L)); PAssert.that(combined)
/** * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs * correctly per window and per key. */ @Test public void testScoreUpdatesPerWindow() { TestStream<KV<String, GameActionInfo>> createEvents = TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class))) .advanceWatermarkTo(baseTime) .addElements( event(TestUser.RED_ONE, 50, Duration.standardMinutes(1)), event(TestUser.RED_TWO, 50, Duration.standardMinutes(2)), event(TestUser.RED_ONE, 50, Duration.standardMinutes(3)), event(TestUser.RED_ONE, 60, Duration.standardMinutes(6)), event(TestUser.RED_TWO, 60, Duration.standardMinutes(7))) .advanceWatermarkToInfinity(); Duration teamWindowDuration = Duration.standardMinutes(5); PCollection<KV<String, Integer>> teamScores = p.apply(createEvents) .apply(Window.<KV<String, GameActionInfo>>into(FixedWindows.of(teamWindowDuration))) .apply(ParDo.of(new UpdateTeamScoreFn(100))); String redTeam = TestUser.RED_ONE.getTeam(); String blueTeam = TestUser.BLUE_ONE.getTeam(); IntervalWindow window1 = new IntervalWindow(baseTime, teamWindowDuration); IntervalWindow window2 = new IntervalWindow(window1.end(), teamWindowDuration); PAssert.that(teamScores).inWindow(window1).containsInAnyOrder(KV.of(redTeam, 100)); PAssert.that(teamScores).inWindow(window2).containsInAnyOrder(KV.of(redTeam, 120)); p.run().waitUntilFinish(); }
.inWindow(new IntervalWindow(new Instant(-50L), new Instant(150L))) .containsInAnyOrder(1); PAssert.that(pcollection) .inWindow(new IntervalWindow(new Instant(50L), new Instant(250L))) .containsInAnyOrder(2, 1); PAssert.that(pcollection) .inWindow(new IntervalWindow(new Instant(150L), new Instant(350L))) .containsInAnyOrder(2, 3); PAssert.that(pcollection) .inWindow(new IntervalWindow(new Instant(250L), new Instant(450L))) .containsInAnyOrder(4, 3); PAssert.that(pcollection) .inWindow(new IntervalWindow(new Instant(350L), new Instant(550L))) .containsInAnyOrder(4); pipeline.run();
@Test @Category(ValidatesRunner.class) public void testGroupByKey() { List<KV<String, Integer>> ungroupedPairs = Arrays.asList( KV.of("k1", 3), KV.of("k5", Integer.MAX_VALUE), KV.of("k5", Integer.MIN_VALUE), KV.of("k2", 66), KV.of("k1", 4), KV.of("k2", -33), KV.of("k3", 0)); PCollection<KV<String, Integer>> input = p.apply( Create.of(ungroupedPairs) .withCoder(KvCoder.of(StringUtf8Coder.of(), BigEndianIntegerCoder.of()))); PCollection<KV<String, Iterable<Integer>>> output = input.apply(GroupByKey.create()); SerializableFunction<Iterable<KV<String, Iterable<Integer>>>, Void> checker = containsKvs( kv("k1", 3, 4), kv("k5", Integer.MIN_VALUE, Integer.MAX_VALUE), kv("k2", 66, -33), kv("k3", 0)); PAssert.that(output).satisfies(checker); PAssert.that(output).inWindow(GlobalWindow.INSTANCE).satisfies(checker); p.run(); }
@Test @Category(ValidatesRunner.class) public void testGroupByKeyMultipleWindows() { PCollection<KV<String, Integer>> windowedInput = p.apply( Create.timestamped( TimestampedValue.of(KV.of("foo", 1), new Instant(1)), TimestampedValue.of(KV.of("foo", 4), new Instant(4)), TimestampedValue.of(KV.of("bar", 3), new Instant(3)))) .apply( Window.into(SlidingWindows.of(Duration.millis(5L)).every(Duration.millis(3L)))); PCollection<KV<String, Iterable<Integer>>> output = windowedInput.apply(GroupByKey.create()); PAssert.that(output) .satisfies( containsKvs(kv("foo", 1, 4), kv("foo", 1), kv("foo", 4), kv("bar", 3), kv("bar", 3))); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(-3L), Duration.millis(5L))) .satisfies(containsKvs(kv("foo", 1))); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(0L), Duration.millis(5L))) .satisfies(containsKvs(kv("foo", 1, 4), kv("bar", 3))); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(3L), Duration.millis(5L))) .satisfies(containsKvs(kv("foo", 4), kv("bar", 3))); p.run(); }
/** * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs * correctly for multiple teams. */ @Test public void testScoreUpdatesPerTeam() { TestStream<KV<String, GameActionInfo>> createEvents = TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class))) .advanceWatermarkTo(baseTime) .addElements( event(TestUser.RED_ONE, 50, Duration.standardSeconds(10)), event(TestUser.RED_TWO, 50, Duration.standardSeconds(20)), event(TestUser.BLUE_ONE, 70, Duration.standardSeconds(30)), event(TestUser.BLUE_TWO, 80, Duration.standardSeconds(40)), event(TestUser.BLUE_TWO, 50, Duration.standardSeconds(50))) .advanceWatermarkToInfinity(); PCollection<KV<String, Integer>> teamScores = p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100))); String redTeam = TestUser.RED_ONE.getTeam(); String blueTeam = TestUser.BLUE_ONE.getTeam(); PAssert.that(teamScores) .inWindow(GlobalWindow.INSTANCE) .containsInAnyOrder(KV.of(redTeam, 100), KV.of(blueTeam, 150), KV.of(blueTeam, 200)); p.run().waitUntilFinish(); }
@Test @Category(ValidatesRunner.class) public void testGroupByKeyMergingWindows() { PCollection<KV<String, Integer>> windowedInput = p.apply( Create.timestamped( TimestampedValue.of(KV.of("foo", 1), new Instant(1)), TimestampedValue.of(KV.of("foo", 4), new Instant(4)), TimestampedValue.of(KV.of("bar", 3), new Instant(3)), TimestampedValue.of(KV.of("foo", 9), new Instant(9)))) .apply(Window.into(Sessions.withGapDuration(Duration.millis(4L)))); PCollection<KV<String, Iterable<Integer>>> output = windowedInput.apply(GroupByKey.create()); PAssert.that(output).satisfies(containsKvs(kv("foo", 1, 4), kv("foo", 9), kv("bar", 3))); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(1L), new Instant(8L))) .satisfies(containsKvs(kv("foo", 1, 4))); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(3L), new Instant(7L))) .satisfies(containsKvs(kv("bar", 3))); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(9L), new Instant(13L))) .satisfies(containsKvs(kv("foo", 9))); p.run(); }
@Test @Category(ValidatesRunner.class) public void testSideInputsWithMultipleWindows() { // Tests that the runner can safely run a DoFn that uses side inputs // on an input where the element is in multiple windows. The complication is // that side inputs are per-window, so the runner has to make sure // to process each window individually. MutableDateTime mutableNow = Instant.now().toMutableDateTime(); mutableNow.setMillisOfSecond(0); Instant now = mutableNow.toInstant(); SlidingWindows windowFn = SlidingWindows.of(Duration.standardSeconds(5)).every(Duration.standardSeconds(1)); PCollectionView<Integer> view = pipeline.apply(Create.of(1)).apply(View.asSingleton()); PCollection<String> res = pipeline .apply(Create.timestamped(TimestampedValue.of("a", now))) .apply(Window.into(windowFn)) .apply(ParDo.of(new FnWithSideInputs(view)).withSideInputs(view)); for (int i = 0; i < 4; ++i) { Instant base = now.minus(Duration.standardSeconds(i)); IntervalWindow window = new IntervalWindow(base, base.plus(Duration.standardSeconds(5))); PAssert.that(res).inWindow(window).containsInAnyOrder("a:1"); } pipeline.run(); }
/** * Tests that {@link UpdateTeamScoreFn} {@link org.apache.beam.sdk.transforms.DoFn} outputs * correctly for one team. */ @Test public void testScoreUpdatesOneTeam() { TestStream<KV<String, GameActionInfo>> createEvents = TestStream.create(KvCoder.of(StringUtf8Coder.of(), AvroCoder.of(GameActionInfo.class))) .advanceWatermarkTo(baseTime) .addElements( event(TestUser.RED_TWO, 99, Duration.standardSeconds(10)), event(TestUser.RED_ONE, 1, Duration.standardSeconds(20)), event(TestUser.RED_ONE, 0, Duration.standardSeconds(30)), event(TestUser.RED_TWO, 100, Duration.standardSeconds(40)), event(TestUser.RED_TWO, 201, Duration.standardSeconds(50))) .advanceWatermarkToInfinity(); PCollection<KV<String, Integer>> teamScores = p.apply(createEvents).apply(ParDo.of(new UpdateTeamScoreFn(100))); String redTeam = TestUser.RED_ONE.getTeam(); PAssert.that(teamScores) .inWindow(GlobalWindow.INSTANCE) .containsInAnyOrder(KV.of(redTeam, 100), KV.of(redTeam, 200), KV.of(redTeam, 401)); p.run().waitUntilFinish(); }
@Test @Category({NeedsRunner.class, UsesTestStream.class}) public void testElementsAtAlmostPositiveInfinity() { Instant endOfGlobalWindow = GlobalWindow.INSTANCE.maxTimestamp(); TestStream<String> stream = TestStream.create(StringUtf8Coder.of()) .addElements( TimestampedValue.of("foo", endOfGlobalWindow), TimestampedValue.of("bar", endOfGlobalWindow)) .advanceWatermarkToInfinity(); FixedWindows windows = FixedWindows.of(Duration.standardHours(6)); PCollection<String> windowedValues = p.apply(stream) .apply(Window.into(windows)) .apply(WithKeys.of(1)) .apply(GroupByKey.create()) .apply(Values.create()) .apply(Flatten.iterables()); PAssert.that(windowedValues) .inWindow(windows.assignWindow(endOfGlobalWindow)) .containsInAnyOrder("foo", "bar"); p.run(); }
@Test @Category(NeedsRunner.class) public void testSampleAnyZero() { PCollection<Integer> input = pipeline.apply( Create.timestamped(ImmutableList.of(tv(0), tv(1), tv(2), tv(3), tv(4), tv(5))) .withCoder(BigEndianIntegerCoder.of())); PCollection<Integer> output = input .apply(Window.into(FixedWindows.of(Duration.standardSeconds(3)))) .apply(Sample.any(0)); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(0), Duration.standardSeconds(3))) .satisfies(new VerifyCorrectSample<>(0, EMPTY)); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(3000), Duration.standardSeconds(3))) .satisfies(new VerifyCorrectSample<>(0, EMPTY)); pipeline.run(); }
@Test @Category(NeedsRunner.class) public void testSampleAnyInsufficientElements() { PCollection<Integer> input = pipeline.apply(Create.empty(BigEndianIntegerCoder.of())); PCollection<Integer> output = input .apply(Window.into(FixedWindows.of(Duration.standardSeconds(3)))) .apply(Sample.any(10)); PAssert.that(output) .inWindow(new IntervalWindow(new Instant(0), Duration.standardSeconds(3))) .satisfies(new VerifyCorrectSample<>(0, EMPTY)); pipeline.run(); }
/** Tests that {@code containsInAnyOrder} is actually order-independent. */ @Test @Category(ValidatesRunner.class) public void testGlobalWindowContainsInAnyOrder() throws Exception { PCollection<Integer> pcollection = pipeline.apply(Create.of(1, 2, 3, 4)); PAssert.that(pcollection).inWindow(GlobalWindow.INSTANCE).containsInAnyOrder(2, 1, 4, 3); pipeline.run(); }