/** * Applies an aggregation that that gives the minimum value of every window * of the data stream at the given position. * * @param positionToMin The position to minimize * @return The transformed DataStream. */ public SingleOutputStreamOperator<T> min(int positionToMin) { return aggregate(new ComparableAggregator<>(positionToMin, input.getType(), AggregationFunction.AggregationType.MIN, input.getExecutionConfig())); }
/** * Applies an aggregation that sums every window of the data stream at the * given position. * * @param positionToSum The position in the tuple/array to sum * @return The transformed DataStream. */ public SingleOutputStreamOperator<T> sum(int positionToSum) { return aggregate(new SumAggregator<>(positionToSum, input.getType(), input.getExecutionConfig())); }
/** * Applies an aggregation that gives the minimum element of every window of * the data stream by the given position. If more elements have the same * minimum value the operator returns either the first or last one depending * on the parameter setting. * * @param positionToMinBy The position to minimize * @param first If true, then the operator return the first element with the minimum value, otherwise returns the last * @return The transformed DataStream. */ public SingleOutputStreamOperator<T> minBy(int positionToMinBy, boolean first) { return aggregate(new ComparableAggregator<>(positionToMinBy, input.getType(), AggregationFunction.AggregationType.MINBY, first, input.getExecutionConfig())); }
/** * Applies an aggregation that gives the maximum element of every window of * the data stream by the given position. If more elements have the same * maximum value the operator returns either the first or last one depending * on the parameter setting. * * @param positionToMaxBy The position to maximize by * @param first If true, then the operator return the first element with the maximum value, otherwise returns the last * @return The transformed DataStream. */ public SingleOutputStreamOperator<T> maxBy(int positionToMaxBy, boolean first) { return aggregate(new ComparableAggregator<>(positionToMaxBy, input.getType(), AggregationFunction.AggregationType.MAXBY, first, input.getExecutionConfig())); }
/** * Applies an aggregation that gives the maximum value of every window of * the data stream at the given position. * * @param positionToMax The position to maximize * @return The transformed DataStream. */ public SingleOutputStreamOperator<T> max(int positionToMax) { return aggregate(new ComparableAggregator<>(positionToMax, input.getType(), AggregationFunction.AggregationType.MAX, input.getExecutionConfig())); }
/** * Applies an aggregation that that gives the maximum value of the pojo data * stream at the given field expression for every window. A field expression * is either the name of a public field or a getter method with parentheses * of the {@link DataStream DataStreams} underlying type. A dot can be used to drill * down into objects, as in {@code "field1.getInnerField2()" }. * * @param field The field expression based on which the aggregation will be applied. * @return The transformed DataStream. */ public SingleOutputStreamOperator<T> max(String field) { return aggregate(new ComparableAggregator<>(field, input.getType(), AggregationFunction.AggregationType.MAX, false, input.getExecutionConfig())); }
/** * Applies an aggregation that sums every window of the pojo data stream at the given field for * every window. * * <p>A field expression is either the name of a public field or a getter method with * parentheses of the stream's underlying type. A dot can be used to drill down into objects, * as in {@code "field1.getInnerField2()" }. * * @param field The field to sum * @return The transformed DataStream. */ public SingleOutputStreamOperator<T> sum(String field) { return aggregate(new SumAggregator<>(field, input.getType(), input.getExecutionConfig())); }
/** * Applies an aggregation that that gives the minimum value of the pojo data * stream at the given field expression for every window. * * <p>A field * expression is either the name of a public field or a getter method with * parentheses of the {@link DataStream}S underlying type. A dot can be used * to drill down into objects, as in {@code "field1.getInnerField2()" }. * * @param field The field expression based on which the aggregation will be applied. * @return The transformed DataStream. */ public SingleOutputStreamOperator<T> min(String field) { return aggregate(new ComparableAggregator<>(field, input.getType(), AggregationFunction.AggregationType.MIN, false, input.getExecutionConfig())); }
/** * Applies an aggregation that that gives the maximum element of the pojo * data stream by the given field expression for every window. A field * expression is either the name of a public field or a getter method with * parentheses of the {@link DataStream}S underlying type. A dot can be used * to drill down into objects, as in {@code "field1.getInnerField2()" }. * * @param field The field expression based on which the aggregation will be applied. * @param first If True then in case of field equality the first object will be returned * @return The transformed DataStream. */ public SingleOutputStreamOperator<T> maxBy(String field, boolean first) { return aggregate(new ComparableAggregator<>(field, input.getType(), AggregationFunction.AggregationType.MAXBY, first, input.getExecutionConfig())); }
/** * Applies an aggregation that that gives the minimum element of the pojo * data stream by the given field expression for every window. A field * expression is either the name of a public field or a getter method with * parentheses of the {@link DataStream DataStreams} underlying type. A dot can be used * to drill down into objects, as in {@code "field1.getInnerField2()" }. * * @param field The field expression based on which the aggregation will be applied. * @param first If True then in case of field equality the first object will be returned * @return The transformed DataStream. */ public SingleOutputStreamOperator<T> minBy(String field, boolean first) { return aggregate(new ComparableAggregator<>(field, input.getType(), AggregationFunction.AggregationType.MINBY, first, input.getExecutionConfig())); }
/** * Applies the given aggregation function to each window. The aggregation function is called for * each element, aggregating values incrementally and keeping the state to one accumulator * per key and window. * * @param function The aggregation function. * @return The data stream that is the result of applying the fold function to the window. * * @param <ACC> The type of the AggregateFunction's accumulator * @param <R> The type of the elements in the resulting stream, equal to the * AggregateFunction's result type */ @PublicEvolving public <ACC, R> SingleOutputStreamOperator<R> aggregate(AggregateFunction<T, ACC, R> function) { checkNotNull(function, "function"); if (function instanceof RichFunction) { throw new UnsupportedOperationException("This aggregation function cannot be a RichFunction."); } TypeInformation<ACC> accumulatorType = TypeExtractor.getAggregateFunctionAccumulatorType( function, input.getType(), null, false); TypeInformation<R> resultType = TypeExtractor.getAggregateFunctionReturnType( function, input.getType(), null, false); return aggregate(function, accumulatorType, resultType); }
/** * .aggregate() does not support RichAggregateFunction, since the AggregationFunction is used internally * in a {@code AggregatingState}. */ @Test(expected = UnsupportedOperationException.class) public void testAggregateWithRichFunctionFails() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); source .keyBy(0) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .aggregate(new DummyRichAggregationFunction<Tuple2<String, Integer>>()); fail("exception was not thrown"); }
@Test public void testAggregateProcessingTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); DataStream<Tuple3<String, String, Integer>> source = env.fromElements( Tuple3.of("hello", "hallo", 1), Tuple3.of("hello", "hallo", 2)); DataStream<Integer> window1 = source .keyBy(new Tuple3KeySelector()) .window(SlidingProcessingTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .aggregate(new DummyAggregationFunction()); final OneInputTransformation<Tuple3<String, String, Integer>, Integer> transform = (OneInputTransformation<Tuple3<String, String, Integer>, Integer>) window1.getTransformation(); final OneInputStreamOperator<Tuple3<String, String, Integer>, Integer> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof ProcessingTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingProcessingTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof AggregatingStateDescriptor); processElementAndEnsureOutput( winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple3<>("hello", "hallo", 1)); }
@Test public void testAggregateEventTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple3<String, String, Integer>> source = env.fromElements( Tuple3.of("hello", "hallo", 1), Tuple3.of("hello", "hallo", 2)); DataStream<Integer> window1 = source .keyBy(new Tuple3KeySelector()) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .aggregate(new DummyAggregationFunction()); final OneInputTransformation<Tuple3<String, String, Integer>, Integer> transform = (OneInputTransformation<Tuple3<String, String, Integer>, Integer>) window1.getTransformation(); final OneInputStreamOperator<Tuple3<String, String, Integer>, Integer> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof AggregatingStateDescriptor); processElementAndEnsureOutput( winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple3<>("hello", "hallo", 1)); }
@Test public void testAggregateWithWindowFunctionProcessingTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); DataStream<Tuple3<String, String, Integer>> source = env.fromElements( Tuple3.of("hello", "hallo", 1), Tuple3.of("hello", "hallo", 2)); DataStream<String> window = source .keyBy(new Tuple3KeySelector()) .window(TumblingProcessingTimeWindows.of(Time.of(1, TimeUnit.SECONDS))) .aggregate(new DummyAggregationFunction(), new TestWindowFunction()); final OneInputTransformation<Tuple3<String, String, Integer>, String> transform = (OneInputTransformation<Tuple3<String, String, Integer>, String>) window.getTransformation(); final OneInputStreamOperator<Tuple3<String, String, Integer>, String> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof ProcessingTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingProcessingTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof AggregatingStateDescriptor); processElementAndEnsureOutput( operator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple3<>("hello", "hallo", 1)); }
@Test public void testAggregateWithProcessWindowFunctionProcessingTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); DataStream<Tuple3<String, String, Integer>> source = env.fromElements( Tuple3.of("hello", "hallo", 1), Tuple3.of("hello", "hallo", 2)); DataStream<String> window = source .keyBy(new Tuple3KeySelector()) .window(TumblingProcessingTimeWindows.of(Time.of(1, TimeUnit.SECONDS))) .aggregate(new DummyAggregationFunction(), new TestProcessWindowFunction()); final OneInputTransformation<Tuple3<String, String, Integer>, String> transform = (OneInputTransformation<Tuple3<String, String, Integer>, String>) window.getTransformation(); final OneInputStreamOperator<Tuple3<String, String, Integer>, String> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof ProcessingTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingProcessingTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof AggregatingStateDescriptor); processElementAndEnsureOutput( operator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple3<>("hello", "hallo", 1)); }
@Test public void testAggregateWithProcessWindowFunctionEventTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple3<String, String, Integer>> source = env.fromElements( Tuple3.of("hello", "hallo", 1), Tuple3.of("hello", "hallo", 2)); DataStream<String> window = source .keyBy(new Tuple3KeySelector()) .window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS))) .aggregate(new DummyAggregationFunction(), new TestProcessWindowFunction()); final OneInputTransformation<Tuple3<String, String, Integer>, String> transform = (OneInputTransformation<Tuple3<String, String, Integer>, String>) window.getTransformation(); final OneInputStreamOperator<Tuple3<String, String, Integer>, String> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof AggregatingStateDescriptor); processElementAndEnsureOutput( operator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple3<>("hello", "hallo", 1)); }
@Test public void testAggregateWithWindowFunctionEventTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple3<String, String, Integer>> source = env.fromElements( Tuple3.of("hello", "hallo", 1), Tuple3.of("hello", "hallo", 2)); DummyReducer reducer = new DummyReducer(); DataStream<String> window = source .keyBy(new Tuple3KeySelector()) .window(TumblingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS))) .aggregate(new DummyAggregationFunction(), new TestWindowFunction()); final OneInputTransformation<Tuple3<String, String, Integer>, String> transform = (OneInputTransformation<Tuple3<String, String, Integer>, String>) window.getTransformation(); final OneInputStreamOperator<Tuple3<String, String, Integer>, String> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof AggregatingStateDescriptor); processElementAndEnsureOutput( operator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple3<>("hello", "hallo", 1)); }
@Test public void testAggregateWithEvictor() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple3<String, String, Integer>> source = env.fromElements( Tuple3.of("hello", "hallo", 1), Tuple3.of("hello", "hallo", 2)); DataStream<Integer> window1 = source .keyBy(new Tuple3KeySelector()) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .evictor(CountEvictor.of(100)) .aggregate(new DummyAggregationFunction()); final OneInputTransformation<Tuple3<String, String, Integer>, Integer> transform = (OneInputTransformation<Tuple3<String, String, Integer>, Integer>) window1.getTransformation(); final OneInputStreamOperator<Tuple3<String, String, Integer>, Integer> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor); processElementAndEnsureOutput( winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple3<>("hello", "hallo", 1)); }
@Test public void testAggregateWithEvictorAndProcessFunction() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple3<String, String, Integer>> source = env.fromElements( Tuple3.of("hello", "hallo", 1), Tuple3.of("hello", "hallo", 2)); DataStream<String> window1 = source .keyBy(new Tuple3KeySelector()) .window(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .evictor(CountEvictor.of(100)) .aggregate( new DummyAggregationFunction(), new TestProcessWindowFunction()); final OneInputTransformation<Tuple3<String, String, Integer>, String> transform = (OneInputTransformation<Tuple3<String, String, Integer>, String>) window1.getTransformation(); final OneInputStreamOperator<Tuple3<String, String, Integer>, String> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple3<String, String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ListStateDescriptor); processElementAndEnsureOutput( winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple3<>("hello", "hallo", 1)); }