/** * Applies the given window function to each window. The window function is called for each * evaluation of the window for each key individually. The output of the window function is * interpreted as a regular non-windowed stream. * * <p>Note that this function requires that all data in the windows is buffered until the window * is evaluated, as the function provides no means of incremental aggregation. * * @param function The window function. * @return The data stream that is the result of applying the window function to the window. */ @PublicEvolving public <R> SingleOutputStreamOperator<R> process(ProcessWindowFunction<T, R, K, W> function) { TypeInformation<R> resultType = getProcessWindowFunctionReturnType(function, getInputType(), null); return process(function, resultType); }
@Test public void testProcessWindowState() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); env.registerTypeWithKryoSerializer(File.class, JavaSerializer.class); DataStream<File> src = env.fromElements(new File("/")); SingleOutputStreamOperator<?> result = src .keyBy(new KeySelector<File, String>() { @Override public String getKey(File value) { return null; } }) .timeWindow(Time.milliseconds(1000)) .process(new ProcessWindowFunction<File, String, String, TimeWindow>() { @Override public void process(String s, Context ctx, Iterable<File> input, Collector<String> out) {} }); validateListStateDescriptorConfigured(result); }
@Test public void testProcessdWindowFunctionSideOutput() throws Exception { TestListResultSink<Integer> resultSink = new TestListResultSink<>(); TestListResultSink<String> sideOutputResultSink = new TestListResultSink<>(); StreamExecutionEnvironment see = StreamExecutionEnvironment.getExecutionEnvironment(); see.setParallelism(3); see.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); DataStream<Integer> dataStream = see.fromCollection(elements); OutputTag<String> sideOutputTag = new OutputTag<String>("side"){}; SingleOutputStreamOperator<Integer> windowOperator = dataStream .assignTimestampsAndWatermarks(new TestWatermarkAssigner()) .keyBy(new TestKeySelector()) .timeWindow(Time.milliseconds(1), Time.milliseconds(1)) .process(new ProcessWindowFunction<Integer, Integer, Integer, TimeWindow>() { private static final long serialVersionUID = 1L; @Override public void process(Integer integer, Context context, Iterable<Integer> elements, Collector<Integer> out) throws Exception { out.collect(integer); context.output(sideOutputTag, "sideout-" + String.valueOf(integer)); } }); windowOperator.getSideOutput(sideOutputTag).addSink(sideOutputResultSink); windowOperator.addSink(resultSink); see.execute(); assertEquals(Arrays.asList("sideout-1", "sideout-2", "sideout-5"), sideOutputResultSink.getSortedResult()); assertEquals(Arrays.asList(1, 2, 5), resultSink.getSortedResult()); }
@Test @SuppressWarnings("rawtypes") public void testMergingWindowsWithEvictor() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Integer> source = env.fromElements(1, 2); DataStream<String> window1 = source .keyBy(new KeySelector<Integer, String>() { @Override public String getKey(Integer value) throws Exception { return value.toString(); } }) .window(EventTimeSessionWindows.withGap(Time.seconds(5))) .evictor(CountEvictor.of(5)) .process(new TestProcessWindowFunction()); final OneInputTransformation<Integer, String> transform = (OneInputTransformation<Integer, String>) window1.getTransformation(); final OneInputStreamOperator<Integer, String> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Integer, ?, ?, ?> winOperator = (WindowOperator<String, Integer, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof EventTimeSessionWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, 1); }
@Test @SuppressWarnings("rawtypes") public void testProcessProcessingTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .keyBy(new TupleKeySelector()) .window(TumblingProcessingTimeWindows.of(Time.of(1, TimeUnit.SECONDS))) .process(new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String, TimeWindow>() { private static final long serialVersionUID = 1L; @Override public void process(String key, Context ctx, Iterable<Tuple2<String, Integer>> values, Collector<Tuple2<String, Integer>> out) throws Exception { for (Tuple2<String, Integer> in : values) { out.collect(in); } } }); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof ProcessingTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingProcessingTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
@Test @SuppressWarnings("rawtypes") public void testProcessEventTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .keyBy(new TupleKeySelector()) .window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS))) .process(new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String, TimeWindow>() { private static final long serialVersionUID = 1L; @Override public void process(String key, Context ctx, Iterable<Tuple2<String, Integer>> values, Collector<Tuple2<String, Integer>> out) throws Exception { for (Tuple2<String, Integer> in : values) { out.collect(in); } } }); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
.window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS))) .trigger(CountTrigger.of(1)) .process(new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String, TimeWindow>() { private static final long serialVersionUID = 1L;
.trigger(CountTrigger.of(1)) .evictor(TimeEvictor.of(Time.of(100, TimeUnit.MILLISECONDS))) .process(new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String, TimeWindow>() { private static final long serialVersionUID = 1L;
/** * Applies the given window function to each window. The window function is called for each * evaluation of the window for each key individually. The output of the window function is * interpreted as a regular non-windowed stream. * * <p>Not that this function requires that all data in the windows is buffered until the window * is evaluated, as the function provides no means of incremental aggregation. * * @param function The window function. * @return The data stream that is the result of applying the window function to the window. */ @PublicEvolving public <R> SingleOutputStreamOperator<R> process(ProcessWindowFunction<T, R, K, W> function) { TypeInformation<R> resultType = getProcessWindowFunctionReturnType(function, getInputType(), null); return process(function, resultType); }
/** * Applies the given window function to each window. The window function is called for each * evaluation of the window for each key individually. The output of the window function is * interpreted as a regular non-windowed stream. * * <p>Note that this function requires that all data in the windows is buffered until the window * is evaluated, as the function provides no means of incremental aggregation. * * @param function The window function. * @return The data stream that is the result of applying the window function to the window. */ @PublicEvolving public <R> SingleOutputStreamOperator<R> process(ProcessWindowFunction<T, R, K, W> function) { TypeInformation<R> resultType = getProcessWindowFunctionReturnType(function, getInputType(), null); return process(function, resultType); }
/** * Applies the given window function to each window. The window function is called for each * evaluation of the window for each key individually. The output of the window function is * interpreted as a regular non-windowed stream. * * <p>Note that this function requires that all data in the windows is buffered until the window * is evaluated, as the function provides no means of incremental aggregation. * * @param function The window function. * @return The data stream that is the result of applying the window function to the window. */ @PublicEvolving public <R> SingleOutputStreamOperator<R> process(ProcessWindowFunction<T, R, K, W> function) { TypeInformation<R> resultType = getProcessWindowFunctionReturnType(function, getInputType(), null); return process(function, resultType); }
.keyBy((TaxiFare fare) -> fare.driverId) .timeWindow(Time.hours(1)) .process(new AddTips());