public static void main(final String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final String outputPath = params.getRequired("outputPath"); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); env.enableCheckpointing(5000L); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.of(10L, TimeUnit.SECONDS))); final StreamingFileSink<Tuple2<Integer, Integer>> sink = StreamingFileSink .forRowFormat(new Path(outputPath), (Encoder<Tuple2<Integer, Integer>>) (element, stream) -> { PrintStream out = new PrintStream(stream); out.println(element.f1); }) .withBucketAssigner(new KeyBucketAssigner()) .withRollingPolicy(OnCheckpointRollingPolicy.build()) .build(); // generate data, shuffle, sink env.addSource(new Generator(10, 10, 60)) .keyBy(0) .addSink(sink); env.execute("StreamingFileSinkProgram"); }
.keyBy(new KeySelector<Email, String>() {
public static void main(String[] args) throws Exception { ParameterTool params = ParameterTool.fromArgs(args); String outputPath = params.getRequired("outputPath"); StreamExecutionEnvironment sEnv = StreamExecutionEnvironment.getExecutionEnvironment(); sEnv.setRestartStrategy(RestartStrategies.fixedDelayRestart( 3, Time.of(10, TimeUnit.SECONDS) )); sEnv.enableCheckpointing(4000); final int idlenessMs = 10; // define bucketing sink to emit the result BucketingSink<Tuple4<Integer, Long, Integer, String>> sink = new BucketingSink<Tuple4<Integer, Long, Integer, String>>(outputPath) .setBucketer(new KeyBucketer()); // generate data, shuffle, perform stateful operation, sink sEnv.addSource(new Generator(10, idlenessMs, 60)) .keyBy(0) .map(new SubtractingMapper(-1L * idlenessMs)) .addSink(sink); sEnv.execute(); }
new Event(2, "end", 1.0), new Event(42, "end", 42.0) ).keyBy(new KeySelector<Event, Integer>() {
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer010<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer010<>( parameterTool.getRequired("output-topic"), new KafkaEventSchema(), parameterTool.getProperties())); env.execute("Kafka 0.10 Example"); }
.keyBy((KeySelector<Long, Long>) aLong -> aLong) .flatMap(new StateCreatingFlatMap(valueSize, killJvmOnFail)) .addSink(new PrintSinkFunction<>());
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer011<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer011<>( parameterTool.getRequired("output-topic"), new KafkaEventSchema(), parameterTool.getProperties())); env.execute("Kafka 0.11 Example"); }
@Test public void testSessionWithFoldFails() throws Exception { // verify that fold does not work with merging windows StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); WindowedStream<String, String, TimeWindow> windowedStream = env.fromElements("Hello", "Ciao") .keyBy(new KeySelector<String, String>() { @Override public String getKey(String value) throws Exception { return value; } }) .window(EventTimeSessionWindows.withGap(Time.seconds(5))); try { windowedStream.fold("", new FoldFunction<String, String>() { private static final long serialVersionUID = -4567902917104921706L; @Override public String fold(String accumulator, String value) throws Exception { return accumulator; } }); } catch (UnsupportedOperationException e) { // expected // use a catch to ensure that the exception is thrown by the fold return; } fail("The fold call should fail."); }
.keyBy(new IdentityKeySelector<Long>()) .process(processFunction);
public static void main(String[] args) throws Exception { // parse input arguments final ParameterTool parameterTool = ParameterTool.fromArgs(args); StreamExecutionEnvironment env = KafkaExampleUtil.prepareExecutionEnv(parameterTool); DataStream<KafkaEvent> input = env .addSource( new FlinkKafkaConsumer<>( parameterTool.getRequired("input-topic"), new KafkaEventSchema(), parameterTool.getProperties()) .assignTimestampsAndWatermarks(new CustomWatermarkExtractor())) .keyBy("word") .map(new RollingAdditionMapper()); input.addSink( new FlinkKafkaProducer<>( parameterTool.getRequired("output-topic"), new KeyedSerializationSchemaWrapper<>(new KafkaEventSchema()), parameterTool.getProperties(), FlinkKafkaProducer.Semantic.EXACTLY_ONCE)); env.execute("Modern Kafka Example"); }
.keyBy(new KeySelector<String, String>() { private static final long serialVersionUID = 598309916882894293L;
/** * Verify that a {@link KeyedStream#process(KeyedProcessFunction)} call is correctly translated to an operator. */ @Test public void testKeyedStreamKeyedProcessTranslation() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Long> src = env.generateSequence(0, 0); KeyedProcessFunction<Long, Long, Integer> keyedProcessFunction = new KeyedProcessFunction<Long, Long, Integer>() { private static final long serialVersionUID = 1L; @Override public void processElement(Long value, Context ctx, Collector<Integer> out) throws Exception { // Do nothing } @Override public void onTimer(long timestamp, OnTimerContext ctx, Collector<Integer> out) throws Exception { // Do nothing } }; DataStream<Integer> processed = src .keyBy(new IdentityKeySelector<Long>()) .process(keyedProcessFunction); processed.addSink(new DiscardingSink<Integer>()); assertEquals(keyedProcessFunction, getFunctionForDataStream(processed)); assertTrue(getOperatorForDataStream(processed) instanceof KeyedProcessOperator); }
private static JobGraph createJobGraphWithKeyedAndNonPartitionedOperatorState( int parallelism, int maxParallelism, int fixedParallelism, int numberKeys, int numberElements, boolean terminateAfterEmission, int checkpointingInterval) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.getConfig().setMaxParallelism(maxParallelism); env.enableCheckpointing(checkpointingInterval); env.setRestartStrategy(RestartStrategies.noRestart()); DataStream<Integer> input = env.addSource(new SubtaskIndexNonPartitionedStateSource( numberKeys, numberElements, terminateAfterEmission)) .setParallelism(fixedParallelism) .keyBy(new KeySelector<Integer, Integer>() { private static final long serialVersionUID = -7952298871120320940L; @Override public Integer getKey(Integer value) throws Exception { return value; } }); SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys); DataStream<Tuple2<Integer, Integer>> result = input.flatMap(new SubtaskIndexFlatMapper(numberElements)); result.addSink(new CollectionSink<Tuple2<Integer, Integer>>()); return env.getStreamGraph().getJobGraph(); }
.keyBy(0) .map(new RichMapFunction<Tuple2<String, Integer>, String>() { private static final long serialVersionUID = 1L;
numberElements, terminateAfterEmission)) .keyBy(new KeySelector<Integer, Integer>() { private static final long serialVersionUID = -7952298871120320940L;
private void runTest( SourceFunction<SessionEvent<Integer, TestEventPayload>> dataSource, WindowFunction<SessionEvent<Integer, TestEventPayload>, String, Tuple, TimeWindow> windowFunction) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); WindowedStream<SessionEvent<Integer, TestEventPayload>, Tuple, TimeWindow> windowedStream = env.addSource(dataSource).keyBy("sessionKey") .window(EventTimeSessionWindows.withGap(Time.milliseconds(MAX_SESSION_EVENT_GAP_MS))); if (ALLOWED_LATENESS_MS != Long.MAX_VALUE) { windowedStream = windowedStream.allowedLateness(Time.milliseconds(ALLOWED_LATENESS_MS)); } if (PURGE_WINDOW_ON_FIRE) { windowedStream = windowedStream.trigger(PurgingTrigger.of(EventTimeTrigger.create())); } windowedStream.apply(windowFunction).print(); JobExecutionResult result = env.execute(); // check that overall event counts match with our expectations. remember that late events within lateness will // each trigger a window! Assert.assertEquals( (LATE_EVENTS_PER_SESSION + 1) * NUMBER_OF_SESSIONS * EVENTS_PER_SESSION, (long) result.getAccumulatorResult(SESSION_COUNTER_ON_TIME_KEY)); Assert.assertEquals( NUMBER_OF_SESSIONS * (LATE_EVENTS_PER_SESSION * (LATE_EVENTS_PER_SESSION + 1) / 2), (long) result.getAccumulatorResult(SESSION_COUNTER_LATE_KEY)); }
.keyBy(0) .addSink(new ToPartitionFileSink(partitionFiles));
.keyBy(0) .map(new TestMapFunction()) .addSink(new MigrationTestUtils.AccumulatorCountingSink<>());