/** * Specifies a {@link KeySelector} for elements from the first input with explicit type information for the key type. * * @param keySelector The KeySelector to be used for extracting the first input's key for partitioning. * @param keyType The type information describing the key type. */ public <KEY> Where<KEY> where(KeySelector<T1, KEY> keySelector, TypeInformation<KEY> keyType) { requireNonNull(keySelector); requireNonNull(keyType); return new Where<>(input1.clean(keySelector), keyType); }
/** * Operator used for directing tuples to specific named outputs using an * {@link org.apache.flink.streaming.api.collector.selector.OutputSelector}. * Calling this method on an operator creates a new {@link SplitStream}. * * @param outputSelector * The user defined * {@link org.apache.flink.streaming.api.collector.selector.OutputSelector} * for directing the tuples. * @return The {@link SplitStream} * @deprecated Please use side ouput instead. */ @Deprecated public SplitStream<T> split(OutputSelector<T> outputSelector) { return new SplitStream<>(this, clean(outputSelector)); }
/** * Partitions a DataStream on the key returned by the selector, using a custom partitioner. * This method takes the key selector to get the key to partition on, and a partitioner that * accepts the key type. * * <p>Note: This method works only on single field keys, i.e. the selector cannot return tuples * of fields. * * @param partitioner * The partitioner to assign partitions to keys. * @param keySelector * The KeySelector with which the DataStream is partitioned. * @return The partitioned DataStream. * @see KeySelector */ public <K> DataStream<T> partitionCustom(Partitioner<K> partitioner, KeySelector<T, K> keySelector) { return setConnectionType(new CustomPartitionerWrapper<>(clean(partitioner), clean(keySelector))); }
/** * It creates a new {@link KeyedStream} that uses the provided key for partitioning * its operator states. * * @param key * The KeySelector to be used for extracting the key for partitioning * @return The {@link DataStream} with partitioned state (i.e. KeyedStream) */ public <K> KeyedStream<T, K> keyBy(KeySelector<T, K> key) { Preconditions.checkNotNull(key); return new KeyedStream<>(this, clean(key)); }
private <K> DataStream<T> partitionCustom(Partitioner<K> partitioner, Keys<T> keys) { KeySelector<T, K> keySelector = KeySelectorUtil.getSelectorForOneKey(keys, partitioner, getType(), getExecutionConfig()); return setConnectionType( new CustomPartitionerWrapper<>( clean(partitioner), clean(keySelector))); }
/** * Specifies a {@link KeySelector} for elements from the first input with explicit type information. * * @param keySelector The KeySelector to be used for extracting the first input's key for partitioning. * @param keyType The type information describing the key type. */ public <KEY> Where<KEY> where(KeySelector<T1, KEY> keySelector, TypeInformation<KEY> keyType) { Preconditions.checkNotNull(keySelector); Preconditions.checkNotNull(keyType); return new Where<>(input1.clean(keySelector), keyType); }
/** * Applies the given {@link CoProcessFunction} on the connected input streams, * thereby creating a transformed output stream. * * <p>The function will be called for every element in the input streams and can produce zero * or more output elements. Contrary to the {@link #flatMap(CoFlatMapFunction)} function, * this function can also query the time and set timers. When reacting to the firing of set * timers the function can directly emit elements and/or register yet more timers. * * @param coProcessFunction The {@link CoProcessFunction} that is called for each element * in the stream. * * @param <R> The type of elements emitted by the {@code CoProcessFunction}. * * @return The transformed {@link DataStream}. */ @Internal public <R> SingleOutputStreamOperator<R> process( CoProcessFunction<IN1, IN2, R> coProcessFunction, TypeInformation<R> outputType) { TwoInputStreamOperator<IN1, IN2, R> operator; if ((inputStream1 instanceof KeyedStream) && (inputStream2 instanceof KeyedStream)) { operator = new KeyedCoProcessOperator<>(inputStream1.clean(coProcessFunction)); } else { operator = new CoProcessOperator<>(inputStream1.clean(coProcessFunction)); } return transform("Co-Process", outputType, operator); }
/** * It creates a new {@link KeyedStream} that uses the provided key with explicit type information * for partitioning its operator states. * * @param key The KeySelector to be used for extracting the key for partitioning. * @param keyType The type information describing the key type. * @return The {@link DataStream} with partitioned state (i.e. KeyedStream) */ public <K> KeyedStream<T, K> keyBy(KeySelector<T, K> key, TypeInformation<K> keyType) { Preconditions.checkNotNull(key); Preconditions.checkNotNull(keyType); return new KeyedStream<>(this, clean(key), keyType); }
/** * Applies a Map transformation on a {@link DataStream}. The transformation * calls a {@link MapFunction} for each element of the DataStream. Each * MapFunction call returns exactly one element. The user can also extend * {@link RichMapFunction} to gain access to other features provided by the * {@link org.apache.flink.api.common.functions.RichFunction} interface. * * @param mapper * The MapFunction that is called for each element of the * DataStream. * @param <R> * output type * @return The transformed {@link DataStream}. */ public <R> SingleOutputStreamOperator<R> map(MapFunction<T, R> mapper) { TypeInformation<R> outType = TypeExtractor.getMapReturnTypes(clean(mapper), getType(), Utils.getCallLocationName(), true); return transform("Map", outType, new StreamMap<>(clean(mapper))); }
/** * Applies a FlatMap transformation on a {@link DataStream}. The * transformation calls a {@link FlatMapFunction} for each element of the * DataStream. Each FlatMapFunction call can return any number of elements * including none. The user can also extend {@link RichFlatMapFunction} to * gain access to other features provided by the * {@link org.apache.flink.api.common.functions.RichFunction} interface. * * @param flatMapper * The FlatMapFunction that is called for each element of the * DataStream * * @param <R> * output type * @return The transformed {@link DataStream}. */ public <R> SingleOutputStreamOperator<R> flatMap(FlatMapFunction<T, R> flatMapper) { TypeInformation<R> outType = TypeExtractor.getFlatMapReturnTypes(clean(flatMapper), getType(), Utils.getCallLocationName(), true); return transform("Flat Map", outType, new StreamFlatMap<>(clean(flatMapper))); }
/** * Applies a Filter transformation on a {@link DataStream}. The * transformation calls a {@link FilterFunction} for each element of the * DataStream and retains only those element for which the function returns * true. Elements for which the function returns false are filtered. The * user can also extend {@link RichFilterFunction} to gain access to other * features provided by the * {@link org.apache.flink.api.common.functions.RichFunction} interface. * * @param filter * The FilterFunction that is called for each element of the * DataStream. * @return The filtered DataStream. */ public SingleOutputStreamOperator<T> filter(FilterFunction<T> filter) { return transform("Filter", getType(), new StreamFilter<>(clean(filter))); }
private KeyedStream<T, Tuple> keyBy(Keys<T> keys) { return new KeyedStream<>(this, clean(KeySelectorUtil.getSelectorForKeys(keys, getType(), getExecutionConfig()))); }
/** * Applies the given {@link ProcessFunction} on the input stream, thereby * creating a transformed output stream. * * <p>The function will be called for every element in the input streams and can produce zero * or more output elements. * * @param processFunction The {@link ProcessFunction} that is called for each element * in the stream. * @param outputType {@link TypeInformation} for the result type of the function. * * @param <R> The type of elements emitted by the {@code ProcessFunction}. * * @return The transformed {@link DataStream}. */ @Internal public <R> SingleOutputStreamOperator<R> process( ProcessFunction<T, R> processFunction, TypeInformation<R> outputType) { ProcessOperator<T, R> operator = new ProcessOperator<>(clean(processFunction)); return transform("Process", outputType, operator); }
return new EqualTo(input2.clean(keySelector));
/** * Extracts a timestamp from an element and assigns it as the internal timestamp of that element. * The internal timestamps are, for example, used to to event-time window operations. * * <p>If you know that the timestamps are strictly increasing you can use an * {@link AscendingTimestampExtractor}. Otherwise, * you should provide a {@link TimestampExtractor} that also implements * {@link TimestampExtractor#getCurrentWatermark()} to keep track of watermarks. * * @param extractor The TimestampExtractor that is called for each element of the DataStream. * * @deprecated Please use {@link #assignTimestampsAndWatermarks(AssignerWithPeriodicWatermarks)} * of {@link #assignTimestampsAndWatermarks(AssignerWithPunctuatedWatermarks)} * instead. * @see #assignTimestampsAndWatermarks(AssignerWithPeriodicWatermarks) * @see #assignTimestampsAndWatermarks(AssignerWithPunctuatedWatermarks) */ @Deprecated public SingleOutputStreamOperator<T> assignTimestamps(TimestampExtractor<T> extractor) { // match parallelism to input, otherwise dop=1 sources could lead to some strange // behaviour: the watermark will creep along very slowly because the elements // from the source go to each extraction operator round robin. int inputParallelism = getTransformation().getParallelism(); ExtractTimestampsOperator<T> operator = new ExtractTimestampsOperator<>(clean(extractor)); return transform("ExtractTimestamps", getTransformation().getOutputType(), operator) .setParallelism(inputParallelism); }
/** * Applies a CoMap transformation on a {@link ConnectedStreams} and maps * the output to a common type. The transformation calls a * {@link CoMapFunction#map1} for each element of the first input and * {@link CoMapFunction#map2} for each element of the second input. Each * CoMapFunction call returns exactly one element. * * @param coMapper The CoMapFunction used to jointly transform the two input DataStreams * @return The transformed {@link DataStream} */ public <R> SingleOutputStreamOperator<R> map(CoMapFunction<IN1, IN2, R> coMapper) { TypeInformation<R> outTypeInfo = TypeExtractor.getBinaryOperatorReturnType( coMapper, CoMapFunction.class, 0, 1, 2, TypeExtractor.NO_INDEX, getType1(), getType2(), Utils.getCallLocationName(), true); return transform("Co-Map", outTypeInfo, new CoStreamMap<>(inputStream1.clean(coMapper))); }
/** * Applies a CoFlatMap transformation on a {@link ConnectedStreams} and * maps the output to a common type. The transformation calls a * {@link CoFlatMapFunction#flatMap1} for each element of the first input * and {@link CoFlatMapFunction#flatMap2} for each element of the second * input. Each CoFlatMapFunction call returns any number of elements * including none. * * @param coFlatMapper * The CoFlatMapFunction used to jointly transform the two input * DataStreams * @return The transformed {@link DataStream} */ public <R> SingleOutputStreamOperator<R> flatMap( CoFlatMapFunction<IN1, IN2, R> coFlatMapper) { TypeInformation<R> outTypeInfo = TypeExtractor.getBinaryOperatorReturnType( coFlatMapper, CoFlatMapFunction.class, 0, 1, 2, TypeExtractor.NO_INDEX, getType1(), getType2(), Utils.getCallLocationName(), true); return transform("Co-Flat Map", outTypeInfo, new CoStreamFlatMap<>(inputStream1.clean(coFlatMapper))); }
final AssignerWithPunctuatedWatermarks<T> cleanedAssigner = clean(timestampAndWatermarkAssigner);
/** * Adds the given sink to this DataStream. Only streams with sinks added * will be executed once the {@link StreamExecutionEnvironment#execute()} * method is called. * * @param sinkFunction * The object containing the sink's invoke function. * @return The closed DataStream. */ public DataStreamSink<T> addSink(SinkFunction<T> sinkFunction) { // read the output type of the input Transform to coax out errors about MissingTypeInfo transformation.getOutputType(); // configure the type if needed if (sinkFunction instanceof InputTypeConfigurable) { ((InputTypeConfigurable) sinkFunction).setInputType(getType(), getExecutionConfig()); } StreamSink<T> sinkOperator = new StreamSink<>(clean(sinkFunction)); DataStreamSink<T> sink = new DataStreamSink<>(this, sinkOperator); getExecutionEnvironment().addOperator(sink.getTransformation()); return sink; }
final AssignerWithPeriodicWatermarks<T> cleanedAssigner = clean(timestampAndWatermarkAssigner);