Collection<Integer> inputIds = transform(transform.getInput()); String slotSharingGroup = determineSlotSharingGroup(transform.getSlotSharingGroup(), inputIds); streamGraph.addOperator(transform.getId(), slotSharingGroup, transform.getCoLocationGroupKey(), transform.getOperator(), transform.getInputType(), transform.getOutputType(), transform.getName()); if (transform.getStateKeySelector() != null) { TypeSerializer<?> keySerializer = transform.getStateKeyType().createSerializer(env.getConfig()); streamGraph.setOneInputStateKey(transform.getId(), transform.getStateKeySelector(), keySerializer); streamGraph.setParallelism(transform.getId(), transform.getParallelism()); streamGraph.setMaxParallelism(transform.getId(), transform.getMaxParallelism()); streamGraph.addEdge(inputId, transform.getId(), 0); return Collections.singleton(transform.getId());
/** * Method for passing user defined operators along with the type * information that will transform the DataStream. * * @param operatorName * name of the operator, for logging purposes * @param outTypeInfo * the output type of the operator * @param operator * the object containing the transformation logic * @param <R> * type of the return stream * @return the data stream constructed */ @PublicEvolving public <R> SingleOutputStreamOperator<R> transform(String operatorName, TypeInformation<R> outTypeInfo, OneInputStreamOperator<T, R> operator) { // read the output type of the input Transform to coax out errors about MissingTypeInfo transformation.getOutputType(); OneInputTransformation<T, R> resultTransform = new OneInputTransformation<>( this.transformation, operatorName, operator, outTypeInfo, environment.getParallelism()); @SuppressWarnings({ "unchecked", "rawtypes" }) SingleOutputStreamOperator<R> returnStream = new SingleOutputStreamOperator(environment, resultTransform); getExecutionEnvironment().addOperator(resultTransform); return returnStream; }
private void validateStateDescriptorConfigured(SingleOutputStreamOperator<?> result) { OneInputTransformation<?, ?> transform = (OneInputTransformation<?, ?>) result.getTransformation(); WindowOperator<?, ?, ?, ?, ?> op = (WindowOperator<?, ?, ?, ?, ?>) transform.getOperator(); StateDescriptor<?, ?> descr = op.getStateDescriptor(); // this would be the first statement to fail if state descriptors were not properly initialized TypeSerializer<?> serializer = descr.getSerializer(); assertTrue(serializer instanceof KryoSerializer); Kryo kryo = ((KryoSerializer<?>) serializer).getKryo(); assertTrue("serializer registration was not properly passed on", kryo.getSerializer(File.class) instanceof JavaSerializer); }
@Test public void readSourceTranslatorUnboundedWithoutMaxParallelism() { final int parallelism = 2; Read.Unbounded transform = Read.from(new TestUnboundedSource()); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); StreamTransformation<?> sourceTransform = applyReadSourceTransform(transform, PCollection.IsBounded.UNBOUNDED, env); UnboundedSourceWrapper source = (UnboundedSourceWrapper) ((SourceTransformation) ((OneInputTransformation) sourceTransform).getInput()) .getOperator() .getUserFunction(); assertEquals(parallelism, source.getSplitSources().size()); }
private void validateListStateDescriptorConfigured(SingleOutputStreamOperator<?> result) { OneInputTransformation<?, ?> transform = (OneInputTransformation<?, ?>) result.getTransformation(); WindowOperator<?, ?, ?, ?, ?> op = (WindowOperator<?, ?, ?, ?, ?>) transform.getOperator(); StateDescriptor<?, ?> descr = op.getStateDescriptor(); assertTrue(descr instanceof ListStateDescriptor); ListStateDescriptor<?> listDescr = (ListStateDescriptor<?>) descr; // this would be the first statement to fail if state descriptors were not properly initialized TypeSerializer<?> serializer = listDescr.getSerializer(); assertTrue(serializer instanceof ListSerializer); TypeSerializer<?> elementSerializer = listDescr.getElementSerializer(); assertTrue(elementSerializer instanceof KryoSerializer); Kryo kryo = ((KryoSerializer<?>) elementSerializer).getKryo(); assertTrue("serializer registration was not properly passed on", kryo.getSerializer(File.class) instanceof JavaSerializer); } }
@Test public void readSourceTranslatorUnboundedWithMaxParallelism() { final int maxParallelism = 6; final int parallelism = 2; Read.Unbounded transform = Read.from(new TestUnboundedSource()); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.setMaxParallelism(maxParallelism); StreamTransformation<?> sourceTransform = applyReadSourceTransform(transform, PCollection.IsBounded.UNBOUNDED, env); UnboundedSourceWrapper source = (UnboundedSourceWrapper) ((SourceTransformation) ((OneInputTransformation) sourceTransform).getInput()) .getOperator() .getUserFunction(); assertEquals(maxParallelism, source.getSplitSources().size()); }
Collection<Integer> inputIds = transform(transform.getInput()); String slotSharingGroup = determineSlotSharingGroup(transform.getSlotSharingGroup(), inputIds); streamGraph.addOperator(transform.getId(), slotSharingGroup, transform.getCoLocationGroupKey(), transform.getOperator(), transform.getInputType(), transform.getOutputType(), transform.getName()); if (transform.getStateKeySelector() != null) { TypeSerializer<?> keySerializer = transform.getStateKeyType().createSerializer(env.getConfig()); streamGraph.setOneInputStateKey(transform.getId(), transform.getStateKeySelector(), keySerializer); streamGraph.setParallelism(transform.getId(), transform.getParallelism()); streamGraph.setMaxParallelism(transform.getId(), transform.getMaxParallelism()); streamGraph.addEdge(inputId, transform.getId(), 0); return Collections.singleton(transform.getId());
OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator1 = transform1.getOperator(); Assert.assertTrue(operator1 instanceof WindowOperator); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator2 = transform2.getOperator(); Assert.assertTrue(operator2 instanceof WindowOperator);
/** * Method for passing user defined operators along with the type * information that will transform the DataStream. * * @param operatorName * name of the operator, for logging purposes * @param outTypeInfo * the output type of the operator * @param operator * the object containing the transformation logic * @param <R> * type of the return stream * @return the data stream constructed */ @PublicEvolving public <R> SingleOutputStreamOperator<R> transform(String operatorName, TypeInformation<R> outTypeInfo, OneInputStreamOperator<T, R> operator) { // read the output type of the input Transform to coax out errors about MissingTypeInfo transformation.getOutputType(); OneInputTransformation<T, R> resultTransform = new OneInputTransformation<>( this.transformation, operatorName, operator, outTypeInfo, environment.getParallelism()); @SuppressWarnings({ "unchecked", "rawtypes" }) SingleOutputStreamOperator<R> returnStream = new SingleOutputStreamOperator(environment, resultTransform); getExecutionEnvironment().addOperator(resultTransform); return returnStream; }
Collection<Integer> inputIds = transform(transform.getInput()); String slotSharingGroup = determineSlotSharingGroup(transform.getSlotSharingGroup(), inputIds); streamGraph.addOperator(transform.getId(), slotSharingGroup, transform.getCoLocationGroupKey(), transform.getOperator(), transform.getInputType(), transform.getOutputType(), transform.getName()); if (transform.getStateKeySelector() != null) { TypeSerializer<?> keySerializer = transform.getStateKeyType().createSerializer(env.getConfig()); streamGraph.setOneInputStateKey(transform.getId(), transform.getStateKeySelector(), keySerializer); streamGraph.setParallelism(transform.getId(), transform.getParallelism()); streamGraph.setMaxParallelism(transform.getId(), transform.getMaxParallelism()); streamGraph.addEdge(inputId, transform.getId(), 0); return Collections.singleton(transform.getId());
@Test @SuppressWarnings("rawtypes") public void testReduceEventTimeWindows() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements( Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .keyBy(0) .timeWindow(Time.of(1000, TimeUnit.MILLISECONDS), Time.of(100, TimeUnit.MILLISECONDS)) .reduce(new DummyReducer()); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform1 = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator1 = transform1.getOperator(); Assert.assertTrue(operator1 instanceof WindowOperator); WindowOperator winOperator1 = (WindowOperator) operator1; Assert.assertTrue(winOperator1.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator1.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator1.getStateDescriptor() instanceof ReducingStateDescriptor); }
/** * Method for passing user defined operators along with the type * information that will transform the DataStream. * * @param operatorName * name of the operator, for logging purposes * @param outTypeInfo * the output type of the operator * @param operator * the object containing the transformation logic * @param <R> * type of the return stream * @return the data stream constructed */ @PublicEvolving public <R> SingleOutputStreamOperator<R> transform(String operatorName, TypeInformation<R> outTypeInfo, OneInputStreamOperator<T, R> operator) { // read the output type of the input Transform to coax out errors about MissingTypeInfo transformation.getOutputType(); OneInputTransformation<T, R> resultTransform = new OneInputTransformation<>( this.transformation, operatorName, operator, outTypeInfo, environment.getParallelism()); @SuppressWarnings({ "unchecked", "rawtypes" }) SingleOutputStreamOperator<R> returnStream = new SingleOutputStreamOperator(environment, resultTransform); getExecutionEnvironment().addOperator(resultTransform); return returnStream; }
/** * Transforms a {@code OneInputTransformation}. * * <p>This recursively transforms the inputs, creates a new {@code StreamNode} in the graph and * wired the inputs to this new node. */ private <IN, OUT> Collection<Integer> transformOneInputTransform(OneInputTransformation<IN, OUT> transform) { Collection<Integer> inputIds = transform(transform.getInput()); // the recursive call might have already transformed this if (alreadyTransformed.containsKey(transform)) { return alreadyTransformed.get(transform); } String slotSharingGroup = determineSlotSharingGroup(transform.getSlotSharingGroup(), inputIds); streamGraph.addOperator(transform.getId(), slotSharingGroup, transform.getOperator(), transform.getInputType(), transform.getOutputType(), transform.getName()); if (transform.getStateKeySelector() != null) { TypeSerializer<?> keySerializer = transform.getStateKeyType().createSerializer(env.getConfig()); streamGraph.setOneInputStateKey(transform.getId(), transform.getStateKeySelector(), keySerializer); } streamGraph.setParallelism(transform.getId(), transform.getParallelism()); streamGraph.setMaxParallelism(transform.getId(), transform.getMaxParallelism()); for (Integer inputId: inputIds) { streamGraph.addEdge(inputId, transform.getId(), 0); } return Collections.singleton(transform.getId()); }
@Test @SuppressWarnings("rawtypes") public void testApplyEventTimeWindows() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements( Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .keyBy(0) .timeWindow(Time.of(1000, TimeUnit.MILLISECONDS)) .apply(new WindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple, TimeWindow>() { private static final long serialVersionUID = 1L; @Override public void apply(Tuple tuple, TimeWindow window, Iterable<Tuple2<String, Integer>> values, Collector<Tuple2<String, Integer>> out) throws Exception { } }); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform1 = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator1 = transform1.getOperator(); Assert.assertTrue(operator1 instanceof WindowOperator); WindowOperator winOperator1 = (WindowOperator) operator1; Assert.assertTrue(winOperator1.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator1.getWindowAssigner() instanceof TumblingEventTimeWindows); Assert.assertTrue(winOperator1.getStateDescriptor() instanceof ListStateDescriptor); }
/** * Method for passing user defined operators along with the type * information that will transform the DataStream. * * @param operatorName * name of the operator, for logging purposes * @param outTypeInfo * the output type of the operator * @param operator * the object containing the transformation logic * @param <R> * type of the return stream * @return the data stream constructed */ @PublicEvolving public <R> SingleOutputStreamOperator<R> transform(String operatorName, TypeInformation<R> outTypeInfo, OneInputStreamOperator<T, R> operator) { // read the output type of the input Transform to coax out errors about MissingTypeInfo transformation.getOutputType(); OneInputTransformation<T, R> resultTransform = new OneInputTransformation<>( this.transformation, operatorName, operator, outTypeInfo, environment.getParallelism()); @SuppressWarnings({ "unchecked", "rawtypes" }) SingleOutputStreamOperator<R> returnStream = new SingleOutputStreamOperator(environment, resultTransform); getExecutionEnvironment().addOperator(resultTransform); return returnStream; }
@Test @SuppressWarnings("rawtypes") public void testFoldEventTimeWindows() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements( Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .keyBy(0) .timeWindow(Time.of(1000, TimeUnit.MILLISECONDS), Time.of(100, TimeUnit.MILLISECONDS)) .fold(new Tuple2<>("", 1), new DummyFolder()); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform1 = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator1 = transform1.getOperator(); Assert.assertTrue(operator1 instanceof WindowOperator); WindowOperator winOperator1 = (WindowOperator) operator1; Assert.assertTrue(winOperator1.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator1.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator1.getStateDescriptor() instanceof FoldingStateDescriptor); }
@Test @SuppressWarnings("rawtypes") public void testReduceEventTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .windowAll(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .reduce(new DummyReducer()); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
@Test @SuppressWarnings("rawtypes") public void testReduceProcessingTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .windowAll(SlidingProcessingTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .reduce(new DummyReducer()); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof ProcessingTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingProcessingTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof ReducingStateDescriptor); processElementAndEnsureOutput(winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
@Test public void testAggregateWithWindowFunctionProcessingTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple3<String, String, Integer>> window = source .windowAll(TumblingProcessingTimeWindows.of(Time.of(1, TimeUnit.SECONDS))) .aggregate(new DummyAggregationFunction(), new TestAllWindowFunction()); OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple3<String, String, Integer>>) window.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple3<String, String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof ProcessingTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof TumblingProcessingTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof AggregatingStateDescriptor); processElementAndEnsureOutput( operator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }
@Test public void testAggregateEventTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); DataStream<Tuple2<String, Integer>> source = env.fromElements(Tuple2.of("hello", 1), Tuple2.of("hello", 2)); DataStream<Tuple2<String, Integer>> window1 = source .windowAll(SlidingEventTimeWindows.of(Time.of(1, TimeUnit.SECONDS), Time.of(100, TimeUnit.MILLISECONDS))) .aggregate(new DummyAggregationFunction()); OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>> transform = (OneInputTransformation<Tuple2<String, Integer>, Tuple2<String, Integer>>) window1.getTransformation(); OneInputStreamOperator<Tuple2<String, Integer>, Tuple2<String, Integer>> operator = transform.getOperator(); Assert.assertTrue(operator instanceof WindowOperator); WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?> winOperator = (WindowOperator<String, Tuple2<String, Integer>, ?, ?, ?>) operator; Assert.assertTrue(winOperator.getTrigger() instanceof EventTimeTrigger); Assert.assertTrue(winOperator.getWindowAssigner() instanceof SlidingEventTimeWindows); Assert.assertTrue(winOperator.getStateDescriptor() instanceof AggregatingStateDescriptor); processElementAndEnsureOutput( winOperator, winOperator.getKeySelector(), BasicTypeInfo.STRING_TYPE_INFO, new Tuple2<>("hello", 1)); }