@Override protected Dataset<Pair<Integer, String>> getOutput( Dataset<Integer> left, Dataset<Long> right) { return FullJoin.of(left, right) .by(e -> e, e -> (int) (e % 10), Integer.class) .using((Optional<Integer> l, Optional<Long> r, Collector<String> c) -> { c.collect(l.orElse(null) + "+" + r.orElse(null)); }) .windowBy(new EvenOddWindowing()) .output(); }
@Override protected Dataset<Pair<Integer, String>> getOutput( Dataset<Integer> left, Dataset<Long> right) { return FullJoin.of(left, right) .by(e -> e, e -> (int) (e % 10), Integer.class) .using((Optional<Integer> l, Optional<Long> r, Collector<String> c) -> { c.collect(l.orElse(null) + "+" + r.orElse(null)); }) .windowBy(new EvenOddWindowing()) .output(); }
@Override protected Dataset<Pair<Integer, String>> getOutput( Dataset<Integer> left, Dataset<Long> right) { return RightJoin.of(left, right) .by(e -> e, e -> (int) (e % 10), Integer.class) .using((Optional<Integer> l, Long r, Collector<String> c) -> { c.collect(l.orElse(null) + "+" + r); }) .windowBy(new EvenOddWindowing()) .output(); }
.windowBy(Time.of(params.getShortStats())) .output();
.windowBy(Time.of(Duration.ofHours(1))) .output();
.by(e -> e, Pair::getFirst) .using((Object l, Pair<Object, Long> r, Collector<Long> c) -> c.collect(r.getSecond())) .windowBy(Time.of(Duration.ofSeconds(1))) .output(new Util.TestHint());
@Test(expected = IllegalArgumentException.class) public void testMultipleOutputsToSameSink() throws Exception { flow = Flow.create(getClass().getSimpleName()); input = flow.createInput(new MockStreamDataSource<>()); Dataset<Object> mapped = MapElements.of(input).using(e -> e).output(); Dataset<Pair<Object, Long>> reduced = ReduceByKey .of(mapped) .keyBy(e -> e).reduceBy(values -> 1L) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); Dataset<Pair<Object, Long>> output = Join.of(mapped, reduced) .by(e -> e, Pair::getFirst) .using((Object l, Pair<Object, Long> r, Collector<Long> c) -> { c.collect(r.getSecond()); }) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); ListDataSink<Pair<Object, Long>> sink = ListDataSink.get(); output.persist(sink); reduced.persist(sink); FlowUnfolder.unfold(flow, Executor.getBasicOps()); }
@Override protected Dataset<Triple<TimeInterval, String, String>> getOutput(Dataset<Pair<String, Long>> left, Dataset<Pair<String, Long>> right) { left = AssignEventTime.of(left).using(Pair::getSecond).output(); right = AssignEventTime.of(right).using(Pair::getSecond).output(); Dataset<Pair<String, Triple<TimeInterval, String, String>>> joined = Join.of(left, right) .by(p -> "", p -> "", String.class) .using((Pair<String, Long> l, Pair<String, Long> r, Collector<Triple<TimeInterval, String, String>> c) -> { TimeInterval window = (TimeInterval) c.getWindow(); c.getCounter("cntr").increment(10); c.getHistogram("hist-" + l.getFirst().charAt(1)).add(2345, 8); c.collect(Triple.of(window, l.getFirst(), r.getFirst())); }) .windowBy(Time.of(Duration.ofMillis(3))) .output(); return MapElements.of(joined).using(Pair::getSecond).output(); }
@Override protected Dataset<Triple<TimeInterval, String, String>> getOutput(Dataset<Pair<String, Long>> left, Dataset<Pair<String, Long>> right) { left = AssignEventTime.of(left).using(Pair::getSecond).output(); right = AssignEventTime.of(right).using(Pair::getSecond).output(); Dataset<Pair<String, Triple<TimeInterval, String, String>>> joined = Join.of(left, right) .by(p -> "", p -> "", String.class) .using((Pair<String, Long> l, Pair<String, Long> r, Collector<Triple<TimeInterval, String, String>> c) -> { TimeInterval window = (TimeInterval) c.getWindow(); c.getCounter("cntr").increment(10); c.getHistogram("hist-" + l.getFirst().charAt(1)).add(2345, 8); c.collect(Triple.of(window, l.getFirst(), r.getFirst())); }) .windowBy(Time.of(Duration.ofMillis(3))) .output(); return MapElements.of(joined).using(Pair::getSecond).output(); }
@Before public void before() throws Exception { flow = Flow.create(getClass().getSimpleName()); input = flow.createInput(new MockStreamDataSource<>()); Dataset<Object> mapped = MapElements.of(input).using(e -> e).output(); Dataset<Pair<Object, Long>> reduced = ReduceByKey .of(mapped) .keyBy(e -> e).reduceBy(values -> 1L) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); Dataset<Pair<Object, Long>> output = Join.of(mapped, reduced) .by(e -> e, Pair::getFirst) .using((Object l, Pair<Object, Long> r, Collector<Long> c) -> c.collect(r.getSecond())) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); output.persist(new StdoutSink<>()); }
@Override protected Dataset<Triple<TimeInterval, String, String>> getOutput(Dataset<Pair<String, Long>> left, Dataset<Pair<String, Long>> right) { left = AssignEventTime.of(left).using(Pair::getSecond).output(); right = AssignEventTime.of(right).using(Pair::getSecond).output(); Dataset<Pair<String, Triple<TimeInterval, String, String>>> joined = Join.of(left, right) .by(p -> "", p -> "", String.class) .using((Pair<String, Long> l, Pair<String, Long> r, Collector<Triple<TimeInterval, String, String>> c) -> c.collect(Triple.of((TimeInterval) c.getWindow(), l.getFirst(), r.getFirst()))) .windowBy(Session.of(Duration.ofMillis(10))) .output(); return MapElements.of(joined).using(Pair::getSecond).output(); }
@Override protected Dataset<Triple<TimeInterval, String, String>> getOutput(Dataset<Pair<String, Long>> left, Dataset<Pair<String, Long>> right) { left = AssignEventTime.of(left).using(Pair::getSecond).output(); right = AssignEventTime.of(right).using(Pair::getSecond).output(); Dataset<Pair<String, Triple<TimeInterval, String, String>>> joined = Join.of(left, right) .by(p -> "", p -> "", String.class) .using((Pair<String, Long> l, Pair<String, Long> r, Collector<Triple<TimeInterval, String, String>> c) -> c.collect(Triple.of((TimeInterval) c.getWindow(), l.getFirst(), r.getFirst()))) .windowBy(Session.of(Duration.ofMillis(10))) .output(); return MapElements.of(joined).using(Pair::getSecond).output(); }
@Override public Dataset<Pair<KEY, OUT>> output(OutputHint... outputHints) { return windowBy(null).output(outputHints); }
@Override public Dataset<Pair<KEY, OUT>> output(OutputHint... outputHints) { return windowBy(null).output(outputHints); }
@Override protected Dataset<Pair<Integer, String>> getOutput( Dataset<Integer> left, Dataset<Long> right) { return RightJoin.of(left, right) .by(e -> e, e -> (int) (e % 10), Integer.class) .using((Optional<Integer> l, Long r, Collector<String> c) -> { c.collect(l.orElse(null) + "+" + r); }) .windowBy(new EvenOddWindowing()) .output(); }
@Override protected Dataset<Pair<Integer, String>> getOutput( Dataset<Integer> left, Dataset<Long> right) { return LeftJoin.of(left, right) .by(e -> e, e -> (int) (e % 10), Integer.class) .using((Integer l, Optional<Long> r, Collector<String> c) -> { c.collect(l + "+" + r.orElse(null)); }) .windowBy(new EvenOddWindowing()) .output(); }
@Test public void testBuild_Windowing() { Flow flow = Flow.create("TEST"); Dataset<String> left = Util.createMockDataset(flow, 1); Dataset<String> right = Util.createMockDataset(flow, 1); Join.named("Join1") .of(left, right) .by(String::length, String::length) .using((String l, String r, Collector<String> c) -> c.collect(l + r)) .windowBy(Time.of(Duration.ofHours(1))) .output(); Join join = (Join) flow.operators().iterator().next(); assertTrue(join.getWindowing() instanceof Time); }
@Override protected Dataset<Pair<Integer, String>> getOutput( Dataset<Integer> left, Dataset<Long> right) { return LeftJoin.of(left, right) .by(e -> e, e -> (int) (e % 10), Integer.class) .using((Integer l, Optional<Long> r, Collector<String> c) -> { c.collect(l + "+" + r.orElse(null)); }) .windowBy(new EvenOddWindowing()) .output(); }