true); return process(processFunction, outType);
/** * Applies the given {@link KeyedProcessFunction} on the input stream, thereby creating a transformed output stream. * * <p>The function will be called for every element in the input streams and can produce zero * or more output elements. Contrary to the {@link DataStream#flatMap(FlatMapFunction)} * function, this function can also query the time and set timers. When reacting to the firing * of set timers the function can directly emit elements and/or register yet more timers. * * @param keyedProcessFunction The {@link KeyedProcessFunction} that is called for each element in the stream. * * @param <R> The type of elements emitted by the {@code KeyedProcessFunction}. * * @return The transformed {@link DataStream}. */ @PublicEvolving public <R> SingleOutputStreamOperator<R> process(KeyedProcessFunction<KEY, T, R> keyedProcessFunction) { TypeInformation<R> outType = TypeExtractor.getUnaryOperatorReturnType( keyedProcessFunction, KeyedProcessFunction.class, 1, 2, TypeExtractor.NO_INDEX, getType(), Utils.getCallLocationName(), true); return process(keyedProcessFunction, outType); }
.process(new Tokenizer());
.process(new ProcessFunction<Integer, Integer>() { private static final long serialVersionUID = 1L;
.process(processFunction);
return value.f0; }).process(new ProcessFunction<Tuple2<Integer, Long>, Object>() { private static final long serialVersionUID = -805125545438296619L;
/** * Verify that a {@link KeyedStream#process(KeyedProcessFunction)} call is correctly translated to an operator. */ @Test public void testKeyedStreamKeyedProcessTranslation() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Long> src = env.generateSequence(0, 0); KeyedProcessFunction<Long, Long, Integer> keyedProcessFunction = new KeyedProcessFunction<Long, Long, Integer>() { private static final long serialVersionUID = 1L; @Override public void processElement(Long value, Context ctx, Collector<Integer> out) throws Exception { // Do nothing } @Override public void onTimer(long timestamp, OnTimerContext ctx, Collector<Integer> out) throws Exception { // Do nothing } }; DataStream<Integer> processed = src .keyBy(new IdentityKeySelector<Long>()) .process(keyedProcessFunction); processed.addSink(new DiscardingSink<Integer>()); assertEquals(keyedProcessFunction, getFunctionForDataStream(processed)); assertTrue(getOperatorForDataStream(processed) instanceof KeyedProcessOperator); }
return value.f0; }).process(new ProcessFunction<Tuple2<Integer, Long>, Object>() { private static final long serialVersionUID = -805125545438296619L;
true); return process(processFunction, outType);
true); return process(processFunction, outType);
/** * Applies the given {@link KeyedProcessFunction} on the input stream, thereby creating a transformed output stream. * * <p>The function will be called for every element in the input streams and can produce zero * or more output elements. Contrary to the {@link DataStream#flatMap(FlatMapFunction)} * function, this function can also query the time and set timers. When reacting to the firing * of set timers the function can directly emit elements and/or register yet more timers. * * @param keyedProcessFunction The {@link KeyedProcessFunction} that is called for each element in the stream. * * @param <R> The type of elements emitted by the {@code KeyedProcessFunction}. * * @return The transformed {@link DataStream}. */ @PublicEvolving public <R> SingleOutputStreamOperator<R> process(KeyedProcessFunction<KEY, T, R> keyedProcessFunction) { TypeInformation<R> outType = TypeExtractor.getUnaryOperatorReturnType( keyedProcessFunction, KeyedProcessFunction.class, 1, 2, TypeExtractor.NO_INDEX, getType(), Utils.getCallLocationName(), true); return process(keyedProcessFunction, outType); }
/** * Applies the given {@link KeyedProcessFunction} on the input stream, thereby creating a transformed output stream. * * <p>The function will be called for every element in the input streams and can produce zero * or more output elements. Contrary to the {@link DataStream#flatMap(FlatMapFunction)} * function, this function can also query the time and set timers. When reacting to the firing * of set timers the function can directly emit elements and/or register yet more timers. * * @param keyedProcessFunction The {@link KeyedProcessFunction} that is called for each element in the stream. * * @param <R> The type of elements emitted by the {@code KeyedProcessFunction}. * * @return The transformed {@link DataStream}. */ @PublicEvolving public <R> SingleOutputStreamOperator<R> process(KeyedProcessFunction<KEY, T, R> keyedProcessFunction) { TypeInformation<R> outType = TypeExtractor.getUnaryOperatorReturnType( keyedProcessFunction, KeyedProcessFunction.class, 1, 2, TypeExtractor.NO_INDEX, getType(), Utils.getCallLocationName(), true); return process(keyedProcessFunction, outType); }
true); return process(processFunction, outType);
@Override public ResultStreams translate(String name, List<InputAndSpec<?, ?>> streamLedgerSpecs) { List<OutputTag<?>> sideOutputTags = createSideOutputTags(streamLedgerSpecs); // the input stream is a union of different streams. KeyedStream<TaggedElement, Boolean> input = union(streamLedgerSpecs) .keyBy(unused -> true); // main pipeline String serialTransactorName = "SerialTransactor(" + name + ")"; SingleOutputStreamOperator<Void> resultStream = input .process(new SerialTransactor(specs(streamLedgerSpecs), sideOutputTags)) .name(serialTransactorName) .uid(serialTransactorName + "___SERIAL_TX") .forceNonParallel() .returns(Void.class); // gather the sideOutputs. Map<String, DataStream<?>> output = new HashMap<>(); for (OutputTag<?> outputTag : sideOutputTags) { DataStream<?> rs = resultStream.getSideOutput(outputTag); output.put(outputTag.getId(), rs); } return new ResultStreams(output); } }
@Override public ResultStreams translate(String name, List<InputAndSpec<?, ?>> streamLedgerSpecs) { List<OutputTag<?>> sideOutputTags = createSideOutputTags(streamLedgerSpecs); // the input stream is a union of different streams. KeyedStream<TaggedElement, Boolean> input = union(streamLedgerSpecs) .keyBy(unused -> true); // main pipeline String serialTransactorName = "SerialTransactor(" + name + ")"; SingleOutputStreamOperator<Void> resultStream = input .process(new SerialTransactor(specs(streamLedgerSpecs), sideOutputTags)) .name(serialTransactorName) .uid(serialTransactorName + "___SERIAL_TX") .forceNonParallel() .returns(Void.class); // gather the sideOutputs. Map<String, DataStream<?>> output = new HashMap<>(); for (OutputTag<?> outputTag : sideOutputTags) { DataStream<?> rs = resultStream.getSideOutput(outputTag); output.put(outputTag.getId(), rs); } return new ResultStreams(output); } }
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int maxEventDelay = 60; // events are out of order by max 60 seconds final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); // start the data generator DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor))); DataStream<TaxiRide> longRides = rides .keyBy(ride -> ride.rideId) .process(new MatchFunction()); printOrTest(longRides); env.execute("Long Taxi Rides"); }
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int maxEventDelay = 60; // events are out of order by max 60 seconds final int servingSpeedFactor = 600; // events of 10 minutes are served in 1 second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); // start the data generator DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor))); DataStream<TaxiRide> longRides = rides .keyBy(r -> r.rideId) .process(new MatchFunction()); printOrTest(longRides); env.execute("Long Taxi Rides"); }
public static void main(String[] args) throws Exception { // read parameters ParameterTool params = ParameterTool.fromArgs(args); String input = params.getRequired("input"); // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); // connect to the data file DataStream<String> carData = env.readTextFile(input); // map to events DataStream<ConnectedCarEvent> events = carData .map((String line) -> ConnectedCarEvent.fromString(line)) .assignTimestampsAndWatermarks(new ConnectedCarAssigner()); // sort events events.keyBy((ConnectedCarEvent event) -> event.carId) .process(new SortFunction()) .print(); env.execute("Sort Connected Car Events"); }
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int servingSpeedFactor = 1800; // 30 minutes worth of events are served every second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); // set up checkpointing env.setStateBackend(new FsStateBackend("file:///tmp/checkpoints")); env.enableCheckpointing(1000); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(60, Time.of(10, TimeUnit.SECONDS))); DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new CheckpointedTaxiRideSource(input, servingSpeedFactor))); DataStream<TaxiRide> longRides = rides .filter(new NYCFilter()) .keyBy((TaxiRide ride) -> ride.rideId) .process(new MatchFunction()); printOrTest(longRides); env.execute("Long Taxi Rides (checkpointed)"); }
.process(new ClosestTaxi());