DataStream<Long> broadcast = src.broadcast(); DataStreamSink<Long> broadcastSink = broadcast.print(); StreamPartitioner<?> broadcastPartitioner =
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int maxEventDelay = 60; // events are out of order by at most 60 seconds final int servingSpeedFactor = 600; // 10 minutes worth of events are served every second // In this simple case we need a broadcast state descriptor, but aren't going to // use it to store anything. final MapStateDescriptor<Long, Long> dummyBroadcastState = new MapStateDescriptor<>( "dummy", BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO ); // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); DataStream<TaxiRide> rides = env.addSource(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor)); // add a socket source BroadcastStream<String> queryStream = env.socketTextStream("localhost", 9999) // EXERCISE QUESTION: Is this needed? // .assignTimestampsAndWatermarks(new QueryStreamAssigner()) .broadcast(dummyBroadcastState); DataStream<TaxiRide> reports = rides .keyBy((TaxiRide ride) -> ride.taxiId) .connect(queryStream) .process(new QueryFunction()); printOrTest(reports); env.execute("Ongoing Rides"); }
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); final String input = params.get("input", ExerciseBase.pathToRideData); final int maxEventDelay = 60; // events are out of order by at most 60 seconds final int servingSpeedFactor = 1800; // 30 minutes worth of events are served every second // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(ExerciseBase.parallelism); // setup a stream of taxi rides DataStream<TaxiRide> rides = env.addSource(rideSourceOrTest(new TaxiRideSource(input, maxEventDelay, servingSpeedFactor))); // add a socket source for the query stream BroadcastStream<String> queryStream = env .addSource(stringSourceOrTest(new SocketTextStreamFunction("localhost", 9999, "\n", -1))) .broadcast(queryDescriptor); // connect the two streams and process queries DataStream<Tuple2<String, String>> results = rides .keyBy((TaxiRide ride) -> ride.taxiId) .connect(queryStream) .process(new QueryProcessor()); printOrTest(results); env.execute("Taxi Query"); }