@Override protected Dataset<Pair<String, List<Long>>> getOutput(Dataset<Pair<String, Long>> input) { return ReduceByKey.of(input) .keyBy(Pair::getFirst, String.class) .valueBy(Pair::getSecond) .reduceBy((Stream<Long> values, Collector<List<Long>> coll) -> coll.collect(values.limit(2).collect(Collectors.toList()))) .withSortedValues(Long::compareTo) .output(); } });
@Override protected Dataset<Pair<String, List<Long>>> getOutput(Dataset<Pair<String, Long>> input) { return ReduceByKey.of(input) .keyBy(Pair::getFirst, String.class) .valueBy(Pair::getSecond) .reduceBy((Stream<Long> values, Collector<List<Long>> coll) -> coll.collect(values.limit(2).collect(Collectors.toList()))) .withSortedValues(Long::compareTo) .output(); } });
@Override protected Dataset<Pair<Integer, Set<Integer>>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Count.of(3)) .output(); }
@Test public void testBuild_OutputValues() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); Dataset<Long> reduced = ReduceByKey.named("ReduceByKeyValues") .of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .outputValues(); assertEquals(flow, reduced.getFlow()); assertEquals(2, flow.size()); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertEquals(flow, reduce.getFlow()); assertEquals("ReduceByKeyValues", reduce.getName()); assertNotNull(reduce.getKeyExtractor()); assertNotNull(reduce.getValueExtractor()); assertNotNull(reduce.getReducer()); assertNull(reduce.getWindowing()); }
@Override protected Dataset<Triple<TimeInterval, Integer, Set<String>>> getOutput (Dataset<Pair<String, Integer>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); Dataset<Pair<Integer, Set<String>>> reduced = ReduceByKey.of(input) .keyBy(e -> e.getFirst().charAt(0) - '0') .valueBy(Pair::getFirst) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Session.of(Duration.ofMillis(5))) .output(); return FlatMap.of(reduced) .using((UnaryFunctor<Pair<Integer, Set<String>>, Triple<TimeInterval, Integer, Set<String>>>) (elem, context) -> context.collect(Triple.of((TimeInterval) context.getWindow(), elem.getFirst(), elem.getSecond()))) .output(); }
@Override protected Dataset<Triple<TimeInterval, Integer, Set<String>>> getOutput (Dataset<Pair<String, Integer>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); Dataset<Pair<Integer, Set<String>>> reduced = ReduceByKey.of(input) .keyBy(e -> e.getFirst().charAt(0) - '0') .valueBy(Pair::getFirst) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Session.of(Duration.ofMillis(5))) .output(); return FlatMap.of(reduced) .using((UnaryFunctor<Pair<Integer, Set<String>>, Triple<TimeInterval, Integer, Set<String>>>) (elem, context) -> context.collect(Triple.of((TimeInterval) context.getWindow(), elem.getFirst(), elem.getSecond()))) .output(); }
@Override protected Dataset<Pair<Integer, Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2) .valueBy(e -> e) .reduceBy(Fold.of(0, (Integer a, Integer b, Collector<Integer> ctx) -> { if (b % 2 == 0) { ctx.getCounter("evens").increment(); } else { ctx.getCounter("odds").increment(); } ctx.collect(a + b); })) .windowBy(GlobalWindowing.get()) .output(); }
@Override protected Dataset<Pair<Integer, Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2) .valueBy(e -> e) .reduceBy(Fold.of(0, (Integer a, Integer b, Collector<Integer> ctx) -> { if (b % 2 == 0) { ctx.getCounter("evens").increment(); } else { ctx.getCounter("odds").increment(); } ctx.collect(a + b); })) .windowBy(GlobalWindowing.get()) .output(); }
@Test public void testWindow_applyIfNot() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .withSortedValues(Long::compare) .applyIf(false, b -> b, b -> b.windowBy(Time.of(Duration.ofHours(1)))) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertTrue(reduce.getWindowing() instanceof Time); }
@Test public void testWindow_applyIf() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .withSortedValues(Long::compare) .applyIf(true, b -> b.windowBy(Time.of(Duration.ofHours(1)))) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertTrue(reduce.getWindowing() instanceof Time); }
@Test public void testBuild_sortedValues() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .withSortedValues(Long::compare) .windowBy(Time.of(Duration.ofHours(1))) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertNotNull(reduce.valueComparator); }
@Override public ListDataSink<Pair<Integer, Long>> modifySink( ListDataSink<Pair<Integer, Long>> sink) { return sink.withPrepareDataset(d -> { ReduceByKey.of(d) .keyBy(p -> p.getFirst() % 2) .valueBy(Pair::getSecond) .reduceBy((Stream<Long> values, Collector<Long> c) -> values.forEach(c::collect)) .withSortedValues(Long::compare) .output() .persist(sink); }); }
@Test public void testBuild_sortedValuesWithNoWindowing() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .withSortedValues(Long::compare) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertNotNull(reduce.valueComparator); }
@Override public ListDataSink<Pair<Integer, Long>> modifySink( ListDataSink<Pair<Integer, Long>> sink) { return sink.withPrepareDataset(d -> { ReduceByKey.of(d) .keyBy(p -> p.getFirst() % 2) .valueBy(Pair::getSecond) .reduceBy((Stream<Long> values, Collector<Long> c) -> values.forEach(c::collect)) .withSortedValues(Long::compare) .output() .persist(sink); }); }
@Override protected Dataset<Pair<String, List<Long>>> getOutput(Dataset<Pair<String, Long>> input) { return ReduceByKey.of(input) .keyBy(Pair::getFirst, String.class) .valueBy(Pair::getSecond) .reduceBy((Stream<Long> values, Collector<List<Long>> coll) -> coll.collect(values.collect(Collectors.toList()))) .withSortedValues(Long::compareTo) .output(); } });
@Override protected Dataset<Set<Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Count.of(3)) .outputValues(); }
@Override protected Dataset<Set<Integer>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Count.of(3)) .outputValues(); }
@Override protected Dataset<Pair<Integer, Set<Integer>>> getOutput(Dataset<Integer> input) { return ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Count.of(3)) .output(); }
@Test public void testBuild_ReduceBy() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertNotNull(reduce.reducer); }
@Override protected Dataset<Pair<String, List<Long>>> getOutput(Dataset<Pair<String, Long>> input) { return ReduceByKey.of(input) .keyBy(Pair::getFirst, String.class) .valueBy(Pair::getSecond) .reduceBy((Stream<Long> values, Collector<List<Long>> coll) -> coll.collect(values.collect(Collectors.toList()))) .withSortedValues(Long::compareTo) .output(); } });