@Override protected Dataset<Set<Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Count.of(3)) .outputValues(); }
@Override protected Dataset<Set<Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Count.of(3)) .outputValues(); }
@Override protected Dataset<Pair<Integer, Set<Integer>>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Count.of(3)) .output(); }
.valueBy(e -> 1L) .reduceBy(Sums.ofLongs()) .windowBy(TimeSliding.of(Duration.ofMillis(10), Duration.ofMillis(5))) .output();
.valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Count.of(3)) .output();
.keyBy(e -> e) .reduceBy(values -> 1L) .windowBy(Time.of(Duration.ofSeconds(1))) .output(new Util.TestHint(), new Util.TestHint2());
@Test public void testBasics() throws Exception { final Duration READ_DELAY = Duration.ofMillis(100L); ListDataSink<Set<String>> out = ListDataSink.get(); Fluent.flow("Test") .read(ListDataSource.unbounded( asList("0-one 1-two 0-three 1-four 0-five 1-six 0-seven".split(" "))) .withReadDelay(READ_DELAY)) // ~ create windows of size three .apply(input -> ReduceByKey.of(input) .keyBy(e -> "") .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Count.of(3))) // ~ strip the needless key and flatten out the elements thereby // creating multiple elements in the output belonging to the same window .flatMap((Pair<String, Set<String>> e, Collector<String> c) -> e.getSecond().stream().forEachOrdered(c::collect)) // ~ we now expect to reconstruct the same windowing // as the very initial step .apply(input -> ReduceByKey.of(input) .keyBy(e -> "") .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet()))) // ~ strip the needless key .mapElements(Pair::getSecond) .persist(out) .execute(new LocalExecutor()); }
@Test(expected = IllegalArgumentException.class) public void testMultipleOutputsToSameSink() throws Exception { flow = Flow.create(getClass().getSimpleName()); input = flow.createInput(new MockStreamDataSource<>()); Dataset<Object> mapped = MapElements.of(input).using(e -> e).output(); Dataset<Pair<Object, Long>> reduced = ReduceByKey .of(mapped) .keyBy(e -> e).reduceBy(values -> 1L) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); Dataset<Pair<Object, Long>> output = Join.of(mapped, reduced) .by(e -> e, Pair::getFirst) .using((Object l, Pair<Object, Long> r, Collector<Long> c) -> { c.collect(r.getSecond()); }) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); ListDataSink<Pair<Object, Long>> sink = ListDataSink.get(); output.persist(sink); reduced.persist(sink); FlowUnfolder.unfold(flow, Executor.getBasicOps()); }
@Override protected Dataset<Triple<TimeInterval, Integer, Set<String>>> getOutput (Dataset<Pair<String, Integer>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); Dataset<Pair<Integer, Set<String>>> reduced = ReduceByKey.of(input) .keyBy(e -> e.getFirst().charAt(0) - '0') .valueBy(Pair::getFirst) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Session.of(Duration.ofMillis(5))) .output(); return FlatMap.of(reduced) .using((UnaryFunctor<Pair<Integer, Set<String>>, Triple<TimeInterval, Integer, Set<String>>>) (elem, context) -> context.collect(Triple.of((TimeInterval) context.getWindow(), elem.getFirst(), elem.getSecond()))) .output(); }
@Override protected Dataset<Triple<TimeInterval, Integer, Set<String>>> getOutput (Dataset<Pair<String, Integer>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); Dataset<Pair<Integer, Set<String>>> reduced = ReduceByKey.of(input) .keyBy(e -> e.getFirst().charAt(0) - '0') .valueBy(Pair::getFirst) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Session.of(Duration.ofMillis(5))) .output(); return FlatMap.of(reduced) .using((UnaryFunctor<Pair<Integer, Set<String>>, Triple<TimeInterval, Integer, Set<String>>>) (elem, context) -> context.collect(Triple.of((TimeInterval) context.getWindow(), elem.getFirst(), elem.getSecond()))) .output(); }
@Before public void before() throws Exception { flow = Flow.create(getClass().getSimpleName()); input = flow.createInput(new MockStreamDataSource<>()); Dataset<Object> mapped = MapElements.of(input).using(e -> e).output(); Dataset<Pair<Object, Long>> reduced = ReduceByKey .of(mapped) .keyBy(e -> e).reduceBy(values -> 1L) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); Dataset<Pair<Object, Long>> output = Join.of(mapped, reduced) .by(e -> e, Pair::getFirst) .using((Object l, Pair<Object, Long> r, Collector<Long> c) -> c.collect(r.getSecond())) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); output.persist(new StdoutSink<>()); }
@Override protected Dataset<Pair<Integer, Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2) .valueBy(e -> e) .reduceBy(Fold.of(0, (Integer a, Integer b, Collector<Integer> ctx) -> { if (b % 2 == 0) { ctx.getCounter("evens").increment(); } else { ctx.getCounter("odds").increment(); } ctx.collect(a + b); })) .windowBy(GlobalWindowing.get()) .output(); }
@Override protected Dataset<Pair<Integer, Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2) .valueBy(e -> e) .reduceBy(Fold.of(0, (Integer a, Integer b, Collector<Integer> ctx) -> { if (b % 2 == 0) { ctx.getCounter("evens").increment(); } else { ctx.getCounter("odds").increment(); } ctx.collect(a + b); })) .windowBy(GlobalWindowing.get()) .output(); }
@Override protected Dataset<Pair<Integer, Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .reduceBy(Fold.whileEmittingEach(0, (a, b) -> a + b)) .windowBy(Count.of(3)) .output(); }
@Override protected Dataset<Pair<Integer, Set<Integer>>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Count.of(3)) .output(); }
@Override protected Dataset<Pair<Integer, Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .reduceBy(Fold.whileEmittingEach(0, (a, b) -> a + b)) .windowBy(Count.of(3)) .output(); }