@Override protected Dataset<Pair<String, List<Long>>> getOutput(Dataset<Pair<String, Long>> input) { return ReduceByKey.of(input) .keyBy(Pair::getFirst, String.class) .valueBy(Pair::getSecond) .reduceBy((Stream<Long> values, Collector<List<Long>> coll) -> coll.collect(values.limit(2).collect(Collectors.toList()))) .withSortedValues(Long::compareTo) .output(); } });
@Override protected Dataset<Pair<String, List<Long>>> getOutput(Dataset<Pair<String, Long>> input) { return ReduceByKey.of(input) .keyBy(Pair::getFirst, String.class) .valueBy(Pair::getSecond) .reduceBy((Stream<Long> values, Collector<List<Long>> coll) -> coll.collect(values.limit(2).collect(Collectors.toList()))) .withSortedValues(Long::compareTo) .output(); } });
@Override protected Dataset<Pair<String, List<Long>>> getOutput(Dataset<Pair<String, Long>> input) { return ReduceByKey.of(input) .keyBy(Pair::getFirst, String.class) .valueBy(Pair::getSecond) .reduceBy((Stream<Long> values, Collector<List<Long>> coll) -> coll.collect(values.collect(Collectors.toList()))) .withSortedValues(Long::compareTo) .output(); } });
.withSortedValues(comparator::compare) .applyIf(windowing != null, b -> b.windowBy(windowing)) .outputValues();
@Override protected Dataset<List<Pair<Integer, List<Integer>>>> getOutput(Dataset<Integer> input) { Dataset<Pair<Integer, List<Integer>>> reducedByWindow = ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toList())) .withSortedValues(Integer::compare) .windowBy(Count.of(3)) .output(); return ReduceWindow.of(reducedByWindow) .reduceBy(s -> s.collect(Collectors.toList())) .withSortedValues((l, r) -> { int cmp = l.getFirst().compareTo(r.getFirst()); if (cmp == 0) { int firstLeft = l.getSecond().get(0); int firstRight = r.getSecond().get(0); cmp = Integer.compare(firstLeft, firstRight); } return cmp; }) .windowBy(GlobalWindowing.get()) .output(); }
@Override protected Dataset<List<Pair<Integer, List<Integer>>>> getOutput(Dataset<Integer> input) { Dataset<Pair<Integer, List<Integer>>> reducedByWindow = ReduceByKey.of(input) .keyBy(e -> e % 2, Integer.class) .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toList())) .withSortedValues(Integer::compare) .windowBy(Count.of(3)) .output(); return ReduceWindow.of(reducedByWindow) .reduceBy(s -> s.collect(Collectors.toList())) .withSortedValues((l, r) -> { int cmp = l.getFirst().compareTo(r.getFirst()); if (cmp == 0) { int firstLeft = l.getSecond().get(0); int firstRight = r.getSecond().get(0); cmp = Integer.compare(firstLeft, firstRight); } return cmp; }) .windowBy(GlobalWindowing.get()) .output(); }
@Test public void testWindow_applyIfNot() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .withSortedValues(Long::compare) .applyIf(false, b -> b, b -> b.windowBy(Time.of(Duration.ofHours(1)))) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertTrue(reduce.getWindowing() instanceof Time); }
@Test public void testWindow_applyIf() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .withSortedValues(Long::compare) .applyIf(true, b -> b.windowBy(Time.of(Duration.ofHours(1)))) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertTrue(reduce.getWindowing() instanceof Time); }
@Test public void testBuild_sortedValues() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .withSortedValues(Long::compare) .windowBy(Time.of(Duration.ofHours(1))) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertNotNull(reduce.valueComparator); }
@Override public ListDataSink<Pair<Integer, Long>> modifySink( ListDataSink<Pair<Integer, Long>> sink) { return sink.withPrepareDataset(d -> { ReduceByKey.of(d) .keyBy(p -> p.getFirst() % 2) .valueBy(Pair::getSecond) .reduceBy((Stream<Long> values, Collector<Long> c) -> values.forEach(c::collect)) .withSortedValues(Long::compare) .output() .persist(sink); }); }
@Override protected Dataset<Pair<String, List<Long>>> getOutput(Dataset<Pair<String, Long>> input) { return ReduceByKey.of(input) .keyBy(Pair::getFirst, String.class) .valueBy(Pair::getSecond) .reduceBy((Stream<Long> values, Collector<List<Long>> coll) -> coll.collect(values.collect(Collectors.toList()))) .withSortedValues(Long::compareTo) .output(); } });
@Test public void testBuild_sortedValuesWithNoWindowing() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 2); ReduceByKey.of(dataset) .keyBy(s -> s) .valueBy(s -> 1L) .reduceBy(n -> StreamSupport.stream(n.spliterator(), false).mapToLong(Long::new).sum()) .withSortedValues(Long::compare) .output(); ReduceByKey reduce = (ReduceByKey) flow.operators().iterator().next(); assertNotNull(reduce.valueComparator); }
@Override public ListDataSink<Pair<Integer, Long>> modifySink( ListDataSink<Pair<Integer, Long>> sink) { return sink.withPrepareDataset(d -> { ReduceByKey.of(d) .keyBy(p -> p.getFirst() % 2) .valueBy(Pair::getSecond) .reduceBy((Stream<Long> values, Collector<Long> c) -> values.forEach(c::collect)) .withSortedValues(Long::compare) .output() .persist(sink); }); }