@Test public void testBasics() throws Exception { final Duration READ_DELAY = Duration.ofMillis(100L); ListDataSink<Set<String>> out = ListDataSink.get(); Fluent.flow("Test") .read(ListDataSource.unbounded( asList("0-one 1-two 0-three 1-four 0-five 1-six 0-seven".split(" "))) .withReadDelay(READ_DELAY)) // ~ create windows of size three .apply(input -> ReduceByKey.of(input) .keyBy(e -> "") .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Count.of(3))) // ~ strip the needless key and flatten out the elements thereby // creating multiple elements in the output belonging to the same window .flatMap((Pair<String, Set<String>> e, Collector<String> c) -> e.getSecond().stream().forEachOrdered(c::collect)) // ~ we now expect to reconstruct the same windowing // as the very initial step .apply(input -> ReduceByKey.of(input) .keyBy(e -> "") .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet()))) // ~ strip the needless key .mapElements(Pair::getSecond) .persist(out) .execute(new LocalExecutor()); }
.keyBy(LogLine::getIp) .valueBy(line -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofDays(1))) .output();
.keyBy(e -> "") .valueBy(e -> 1) .combineBy(Sums.ofInts()) .windowBy(windowing) .output();
.keyBy(String::toLowerCase) .valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .output();
.keyBy(e -> "") .valueBy(e -> 1) .combineBy(Sums.ofInts()) .windowBy(windowing) .output();
.keyBy(Pair::getFirst) .valueBy(Pair::getSecond) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofSeconds(1))) .output();
.keyBy(Pair::getFirst) .valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(TimeSliding.of(Duration.ofMillis(10), Duration.ofMillis(5))) .output();
.keyBy(Triple::getFirst) .valueBy(Triple::getSecond) .combineBy(xs -> { StringBuilder buf = new StringBuilder(); xs.forEach(buf::append);
.keyBy(e -> e.getFirst().word) .valueBy(Pair::getSecond) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofSeconds(1))) .output();
.keyBy(Pair::getFirst) .valueBy(e -> 1L) .reduceBy(Sums.ofLongs()) .windowBy(TimeSliding.of(Duration.ofMillis(10), Duration.ofMillis(5))) .output();
.keyBy(e -> "") .valueBy(e -> e) .reduceBy(s -> s.collect(Collectors.toSet())) .windowBy(Count.of(3)) .output();
.keyBy(Pair::getFirst) .valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofMillis(5))) .output();
.keyBy(Pair::getFirst) .valueBy(Pair::getSecond) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofSeconds(1))) .output();
.keyBy(Pair::getFirst) .valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofMillis(5))) .output();
@Test public void testWordCountBatch() throws Exception { Flow flow = Flow.create("Test"); Dataset<String> lines = flow.createInput(ListDataSource.bounded( asList("one two three four", "one two three", "one two", "one"))); // expand it to words Dataset<Pair<String, Long>> words = FlatMap.of(lines) .using(toWordCountPair()) .output(); // reduce it to counts, use windowing, so the output is batch or stream // depending on the type of input Dataset<Pair<String, Long>> streamOutput = ReduceByKey .of(words) .keyBy(Pair::getFirst) .valueBy(Pair::getSecond) .combineBy(Sums.ofLongs()) .output(); ListDataSink<Pair<String, Long>> out = ListDataSink.get(); streamOutput.persist(out); executor.submit(flow).get(); DatasetAssert.unorderedEquals( out.getOutputs(), Pair.of("one", 4L), Pair.of("two", 3L), Pair.of("three", 2L), Pair.of("four", 1L)); }