/** * A convenience method to create a data set from the given source and * assign the elements event time using the user defined function. * * @param <T> the type of elements of the created input data set * * @param source the data source to represent as a data set * @param evtTimeFn the user defined event time extraction function * * @return a data set representing the specified source of data with assigned * event time assigned */ public <T> Dataset<T> createInput(DataSource<T> source, ExtractEventTime<T> evtTimeFn) { Dataset<T> input = createInput(source); return AssignEventTime.of(input).using(Objects.requireNonNull(evtTimeFn)).output(); }
@Override public Dataset<IN> output(OutputHint... outputHints) { Flow flow = input.getFlow(); AssignEventTime<IN> op = new AssignEventTime<>(name, flow, input, eventTimeFn, Sets.newHashSet(outputHints)); flow.add(op); return op.output(); } }
/** * A convenience method to create a data set from the given source and * assign the elements event time using the user defined function. * * @param <T> the type of elements of the created input data set * * @param source the data source to represent as a data set * @param evtTimeFn the user defined event time extraction function * * @return a data set representing the specified source of data with assigned * event time assigned */ public <T> Dataset<T> createInput(DataSource<T> source, ExtractEventTime<T> evtTimeFn) { Dataset<T> input = createInput(source); return AssignEventTime.of(input).using(Objects.requireNonNull(evtTimeFn)).output(); }
@Override public Dataset<IN> output(OutputHint... outputHints) { Flow flow = input.getFlow(); AssignEventTime<IN> op = new AssignEventTime<>(name, flow, input, eventTimeFn, Sets.newHashSet(outputHints)); flow.add(op); return op.output(); } }
@Override protected Dataset<Pair<Integer, Long>> getOutput( Dataset<Pair<Integer, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return CountByKey.of(input) .keyBy(Pair::getFirst) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Integer> getOutput(Dataset<Pair<Integer, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return Distinct.of(input) .mapped(Pair::getFirst) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Integer> getOutput(Dataset<Pair<Integer, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return Distinct.of(input) .mapped(Pair::getFirst) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Pair<Integer, Long>> getOutput( Dataset<Pair<Integer, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return CountByKey.of(input) .keyBy(Pair::getFirst) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Integer> getOutput(Dataset<Pair<Integer, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return Distinct.of(input) .mapped(Pair::getFirst) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Integer> getOutput(Dataset<Pair<Integer, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return Distinct.of(input) .mapped(Pair::getFirst) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Pair<Integer, Long>> getOutput(Dataset<Integer> input) { // ~ use stable event-time watermark input = AssignEventTime.of(input).using(e -> 0).output(); return CountByKey.of(input) .keyBy(e -> e) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Pair<Integer, Long>> getOutput(Dataset<Integer> input) { // ~ use stable event-time watermark input = AssignEventTime.of(input).using(e -> 0).output(); return CountByKey.of(input) .keyBy(e -> e) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Pair<Integer, Long>> getOutput(Dataset<Integer> input) { // ~ use stable event-time watermark input = AssignEventTime.of(input).using(e -> 0).output(); return CountByKey.of(input) .keyBy(e -> e) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Pair<Integer, Long>> getOutput(Dataset<Integer> input) { // ~ use stable event-time watermark input = AssignEventTime.of(input).using(e -> 0).output(); return CountByKey.of(input) .keyBy(e -> e) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Pair<Word, Long>> getOutput(Dataset<Pair<Word, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return ReduceStateByKey.of(input) .keyBy(Pair::getFirst) .valueBy(Pair::getFirst) .stateFactory((StateFactory<Word, Long, CountState<Word>>) CountState::new) .mergeStatesBy(CountState::combine) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Pair<Word, Long>> getOutput(Dataset<Pair<Word, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return ReduceByKey.of(input) .keyBy(Pair::getFirst) .valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Pair<Integer, Long>> getOutput(Dataset<Pair<Integer, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return ReduceByKey.of(input) .keyBy(Pair::getFirst, Integer.class) .valueBy(e -> 1L) .combineBy(Sums.ofLongs()) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }
@Override protected Dataset<Pair<Word, Long>> getOutput(Dataset<Pair<Word, Long>> input) { input = AssignEventTime.of(input).using(Pair::getSecond).output(); return ReduceStateByKey.of(input) .keyBy(Pair::getFirst) .valueBy(Pair::getFirst) .stateFactory((StateFactory<Word, Long, CountState<Word>>) CountState::new) .mergeStatesBy(CountState::combine) .windowBy(Time.of(Duration.ofSeconds(1))) .output(); }