@Override protected Dataset<Integer> getOutput(Dataset<Pair<Integer, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return Distinct.of(input) .mapped(Pair::getFirst) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Integer> getOutput(Dataset<Pair<Integer, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return Distinct.of(input) .mapped(Pair::getFirst) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Integer> getOutput(Dataset<Pair<Integer, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return Distinct.of(input) .mapped(Pair::getFirst) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
.mapped(e -> e) .windowBy(Time.of(Duration.ofMillis(5))) .output();
@Test public void testDistinctOnStreamUsingWindowingLabels() throws Exception { Flow flow = Flow.create("Test"); Dataset<String> lines = flow.createInput( ListDataSource.unbounded(asList( "one two three four one one two", "one two three three three")) .withReadDelay(Duration.ofSeconds(2))); // expand it to words Dataset<String> words = FlatMap.of(lines) .using(toWords(w -> w)) .output(); Dataset<Pair<TimeInterval, String>> output = FlatMap.of(Distinct.of(words).windowBy(Time.of(Duration.ofSeconds(1))).output()) .using((UnaryFunctor<String, Pair<TimeInterval, String>>) (elem, context) -> context.collect(Pair.of((TimeInterval) context.getWindow(), elem))) .output(); ListDataSink<String> out = ListDataSink.get(); // strip the labels again because we cannot test them MapElements.of(output) .using(Pair::getSecond) .output() .persist(out); executor.submit(flow).get(); DatasetAssert.unorderedEquals( out.getOutputs(), "four", "one", "three", "two", "one", "three", "two"); }
@Override protected Dataset<Triple<Instant, Type, Long>> getOutput( Dataset<Triple<Instant, Type, String>> input) { input = AssignEventTime.of(input).using(t -> t.getFirst().toEpochMilli()).output(); Dataset<ComparablePair<Type, String>> distinct = Distinct.of(input) .mapped(t -> new ComparablePair<>(t.getSecond(), t.getThird())) .windowBy(Time.of(Duration.ofHours(1))) .output(); Dataset<Pair<Type, Long>> reduced = ReduceByKey.of(distinct) .keyBy(ComparablePair::getFirst) .valueBy(p -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofHours(1))) .output(); // extract window end timestamp return FlatMap .of(reduced) .using((Pair<Type, Long> p, Collector<Triple<Instant, Type, Long>> ctx) -> { long windowEnd = ((TimeInterval) ctx.getWindow()).getEndMillis(); ctx.collect(Triple.of(Instant.ofEpochMilli(windowEnd), p.getFirst(), p.getSecond())); }) .output(); }
@Test public void testDistinctOnStreamWithoutWindowingLabels() throws Exception { Flow flow = Flow.create("Test"); Dataset<String> lines = flow.createInput( ListDataSource.unbounded(asList( "one two three four one one two", "one two three three three")) .withReadDelay(Duration.ofSeconds(2))); // expand it to words Dataset<String> words = FlatMap.of(lines) .using(toWords(w -> w)) .output(); Dataset<String> output = Distinct.of(words) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); ListDataSink<String> out = ListDataSink.get(); output.persist(out); executor.submit(flow).get(); DatasetAssert.unorderedEquals( out.getOutputs(), "four", "one", "three", "two", "one", "three", "two"); }
@Test public void test() throws Exception { Flow f = Flow.create("Test"); ListDataSink<Pair<String, String>> output = ListDataSink.get(); Dataset<Pair<String, String>> input = f.createInput(ListDataSource.unbounded( Arrays.asList( Pair.of("foo", "bar"), Pair.of("quux", "ibis"), Pair.of("foo", "bar"))), // ~ force event time e -> 1L); Distinct.of(input) .windowBy(Time.of(Duration.ofSeconds(1))) .output() .persist(output); new TestFlinkExecutor().submit(f).get(); DatasetAssert.unorderedEquals( output.getOutputs(), Pair.of("foo", "bar"), Pair.of("quux", "ibis")); } }
@Override protected Dataset<Triple<Instant, Type, Long>> getOutput( Dataset<Triple<Instant, Type, String>> input) { input = AssignEventTime.of(input).using(t -> t.getFirst().toEpochMilli()).output(); Dataset<ComparablePair<Type, String>> distinct = Distinct.of(input) .mapped(t -> new ComparablePair<>(t.getSecond(), t.getThird())) .windowBy(Time.of(Duration.ofHours(1))) .output(); Dataset<Pair<Type, Long>> reduced = ReduceByKey.of(distinct) .keyBy(ComparablePair::getFirst) .valueBy(p -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofHours(1))) .output(); // extract window end timestamp return FlatMap .of(reduced) .using((Pair<Type, Long> p, Collector<Triple<Instant, Type, Long>> ctx) -> { long windowEnd = ((TimeInterval) ctx.getWindow()).getEndMillis(); ctx.collect(Triple.of(Instant.ofEpochMilli(windowEnd), p.getFirst(), p.getSecond())); }) .output(); }
@Test public void testBuild() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 3); Time<String> windowing = Time.of(Duration.ofHours(1)); Dataset<String> uniq = Distinct.named("Distinct1") .of(dataset) .windowBy(windowing) .output(); assertEquals(flow, uniq.getFlow()); assertEquals(1, flow.size()); Distinct distinct = (Distinct) flow.operators().iterator().next(); assertEquals(flow, distinct.getFlow()); assertEquals("Distinct1", distinct.getName()); assertEquals(uniq, distinct.output()); assertSame(windowing, distinct.getWindowing()); }
public Dataset<ELEM> output(OutputHint... outputHints) { return new OutputBuilder<>(name, input, mapper, null).output(); } }
public Dataset<ELEM> output(OutputHint... outputHints) { return new OutputBuilder<>(name, input, mapper, null).output(); } }
@Override protected Dataset<Integer> getOutput(Dataset<Pair<Integer, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return Distinct.of(input) .mapped(Pair::getFirst) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Test public void testBuild_Windowing() { Flow flow = Flow.create("TEST"); Dataset<String> dataset = Util.createMockDataset(flow, 3); Dataset<String> uniq = Distinct.of(dataset) .windowBy(Time.of(Duration.ofHours(1))) .output(); Distinct distinct = (Distinct) flow.operators().iterator().next(); assertTrue(distinct.getWindowing() instanceof Time); }