private SingleOutputStreamOperator<T> aggregate(AggregationFunction<T> aggregator) { return reduce(aggregator); }
/** * Applies the given window function to each window. The window function is called for each * evaluation of the window for each key individually. The output of the window function is * interpreted as a regular non-windowed stream. * * <p>Arriving data is incrementally aggregated using the given reducer. * * @param reduceFunction The reduce function that is used for incremental aggregation. * @param function The window function. * @return The data stream that is the result of applying the window function to the window. */ public <R> SingleOutputStreamOperator<R> reduce( ReduceFunction<T> reduceFunction, WindowFunction<T, R, K, W> function) { TypeInformation<T> inType = input.getType(); TypeInformation<R> resultType = getWindowFunctionReturnType(function, inType); return reduce(reduceFunction, function, resultType); }
/** * A thin wrapper layer over {@link WindowedStream#reduce(org.apache.flink.api.common.functions.ReduceFunction)}. * * @param fun The reduce function. * @return The data stream that is the result of applying the reduce function to the window. */ @SuppressWarnings("unchecked") public PythonSingleOutputStreamOperator reduce(ReduceFunction<PyObject> fun) throws IOException { return new PythonSingleOutputStreamOperator(stream.reduce(new PythonReduceFunction(fun))); }
/** * Applies the given window function to each window. The window function is called for each * evaluation of the window for each key individually. The output of the window function is * interpreted as a regular non-windowed stream. * * <p>Arriving data is incrementally aggregated using the given reducer. * * @param reduceFunction The reduce function that is used for incremental aggregation. * @param function The window function. * @return The data stream that is the result of applying the window function to the window. */ @PublicEvolving public <R> SingleOutputStreamOperator<R> reduce(ReduceFunction<T> reduceFunction, ProcessWindowFunction<T, R, K, W> function) { TypeInformation<R> resultType = getProcessWindowFunctionReturnType(function, input.getType(), null); return reduce(reduceFunction, function, resultType); }
/** * Applies a reduce function to the window. The window function is called for each evaluation * of the window for each key individually. The output of the reduce function is interpreted * as a regular non-windowed stream. * * <p>This window will try and incrementally aggregate data as much as the window policies * permit. For example, tumbling time windows can aggregate the data, meaning that only one * element per key is stored. Sliding time windows will aggregate on the granularity of the * slide interval, so a few elements are stored per key (one per slide interval). * Custom windows may not be able to incrementally aggregate, or may need to store extra values * in an aggregation tree. * * @param function The reduce function. * @return The data stream that is the result of applying the reduce function to the window. */ @SuppressWarnings("unchecked") public SingleOutputStreamOperator<T> reduce(ReduceFunction<T> function) { if (function instanceof RichFunction) { throw new UnsupportedOperationException("ReduceFunction of reduce can not be a RichFunction. " + "Please use reduce(ReduceFunction, WindowFunction) instead."); } //clean the closure function = input.getExecutionEnvironment().clean(function); return reduce(function, new PassThroughWindowFunction<K, W, T>()); }
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); DataStream<Tuple2<Long, Long>> stream = env.addSource(new DataSource()); stream .keyBy(0) .timeWindow(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS)) .reduce(new SummingReducer()) // alternative: use a apply function which does not pre-aggregate // .keyBy(new FirstFieldKeyExtractor<Tuple2<Long, Long>, Long>()) // .window(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS)) // .apply(new SummingWindowFunction()) .addSink(new SinkFunction<Tuple2<Long, Long>>() { @Override public void invoke(Tuple2<Long, Long> value) { } }); env.execute(); }
@Test public void testReduceWindowState() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class); DataStream<File> src = env.fromElements(new File("/")); SingleOutputStreamOperator<?> result = src .keyBy(new KeySelector<File, String>() { @Override public String getKey(File value) { return null; } }) .timeWindow(Time.milliseconds(1000)) .reduce(new ReduceFunction<File>() { @Override public File reduce(File value1, File value2) { return null; } }); validateStateDescriptorConfigured(result); }
/** * .reduce() does not support RichReduceFunction, since the reduce function is used internally * in a {@code ReducingState}. */ @Test(expected = UnsupportedOperationException.class) public void testReduceWithRichReducerFails() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); source .keyBy(0) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .reduce(new RichReduceFunction<Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) throws Exception { return null; } }); fail("exception was not thrown"); }
@Test public void testErrorOnEventTimeOverProcessingTime() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(2); env.getConfig().disableSysoutLogging(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); DataStream<Tuple2<String, Integer>> source1 = env.fromElements(new Tuple2<>("a", 1), new Tuple2<>("b", 2)); source1 .keyBy(0) .window(TumblingEventTimeWindows.of(Time.seconds(5))) .reduce(new ReduceFunction<Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) { return value1; } }) .print(); try { env.execute(); fail("this should fail with an exception"); } catch (Exception e) { // expected } }
@Test public void testErrorOnEventTimeWithoutTimestamps() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(2); env.getConfig().disableSysoutLogging(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); DataStream<Tuple2<String, Integer>> source1 = env.fromElements(new Tuple2<>("a", 1), new Tuple2<>("b", 2)); source1 .keyBy(0) .window(TumblingEventTimeWindows.of(Time.seconds(5))) .reduce(new ReduceFunction<Tuple2<String, Integer>>() { @Override public Tuple2<String, Integer> reduce(Tuple2<String, Integer> value1, Tuple2<String, Integer> value2) { return value1; } }) .print(); try { env.execute(); fail("this should fail with an exception"); } catch (Exception e) { // expected } }
@Test @SuppressWarnings("rawtypes") public void testReduceEventTimeWindows() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements( Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .keyBy(0) .timeWindow(Time.of(1000, TimeUnit.MILLISECONDS), Time.of(100, TimeUnit.MILLISECONDS)) .reduce(new DummyReducer()); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform1 = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator1 = transform1.getOperator(); Assert.assertTrue(operator1 instanceof WindowOperator); WindowOperator winOperator1 = (WindowOperator) operator1; Assert.assertTrue(winOperator1.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator1.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator1.getStateDescriptor() instanceof ReducingStateDescriptor); }
@Test @SuppressWarnings("rawtypes") public void testReduceEventTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .keyBy(new TupleKeySelector()) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .reduce(new DummyReducer()); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
.keyBy(0) .timeWindow(Time.of(1000, TimeUnit.MILLISECONDS), Time.of(100, TimeUnit.MILLISECONDS)) .reduce(reducer);
@Test @SuppressWarnings("rawtypes") public void testReduceProcessingTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .keyBy(new TupleKeySelector()) .window(SlidingProcessingTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .reduce(new DummyReducer()); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof ProcessingTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingProcessingTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
@Test @SuppressWarnings("rawtypes") public void testReduceWithCustomTrigger() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DummyReducer reducer = new DummyReducer(); DataStream<Tuple2<String, Integer>> window1 = source .keyBy(0) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .trigger(CountTrigger.of(1)) .reduce(reducer); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof CountTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
.keyBy(new TupleKeySelector()) .window(TumblingProcessingTimeWindows.of(Time.of(1, TimeUnit.SECONDS))) .reduce(new DummyReducer(), new ProcessWindowFunction<Tuple2<String, Integer>, Tuple3<String, String, Integer>, String, TimeWindow>() { private static final long serialVersionUID = 1L;
.keyBy(new TupleKeySelector()) .window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS))) .reduce(reducer, new WindowFunction<Tuple2<String, Integer>, Tuple3<String, String, Integer>, String, TimeWindow>() { private static final long serialVersionUID = 1L;
.keyBy(new TupleKeySelector()) .window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS))) .reduce(reducer, new ProcessWindowFunction<Tuple2<String, Integer>, Tuple3<String, String, Integer>, String, TimeWindow>() { private static final long serialVersionUID = 1L;
@Test @SuppressWarnings("rawtypes") public void testReduceWithEvictor() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DummyReducer reducer = new DummyReducer(); DataStream<Tuple2<String, Integer>> window1 = source .keyBy(0) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .evictor(CountEvictor.of(100)) .reduce(reducer); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof EvictingWindowOperator); EvictingWindowOperator<String, Tuple2<String, Integer>, ?, ?> winOperator = (EvictingWindowOperator<String, Tuple2<String, Integer>, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getEvictor() instanceof CountEvictor); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
.window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .evictor(CountEvictor.of(100)) .reduce( reducer, new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple, TimeWindow>() {