.flatMap(new FlatMapFunction<Long, Tuple2<String, String>>() { @Override public void flatMap(Long value, Collector<Tuple2<String, String>> out) {
public static void main(final String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final String outputPath = params.getRequired("outputPath"); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); env.enableCheckpointing(5000L); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.of(10L, TimeUnit.SECONDS))); final StreamingFileSink<Tuple2<Integer, Integer>> sink = StreamingFileSink .forRowFormat(new Path(outputPath), (Encoder<Tuple2<Integer, Integer>>) (element, stream) -> { PrintStream out = new PrintStream(stream); out.println(element.f1); }) .withBucketAssigner(new KeyBucketAssigner()) .withRollingPolicy(OnCheckpointRollingPolicy.build()) .build(); // generate data, shuffle, sink env.addSource(new Generator(10, 10, 60)) .keyBy(0) .addSink(sink); env.execute("StreamingFileSinkProgram"); }
/** * Add a java source to the streaming topology. The source expected to be an java based * implementation (.e.g. Kafka connector). * * @param src A native java source (e.g. PythonFlinkKafkaConsumer09) * @return Python data stream */ public PythonDataStream add_java_source(SourceFunction<Object> src) { return new PythonDataStream<>(env.addSource(src).map(new AdapterMap<>())); }
public static void main(String[] args) throws Exception { final ParameterTool pt = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); setupEnvironment(env, pt); final int numStates = pt.getInt(NUM_LIST_STATES_PER_OP.key(), NUM_LIST_STATES_PER_OP.defaultValue()); final int numPartitionsPerState = pt.getInt(NUM_PARTITIONS_PER_LIST_STATE.key(), NUM_PARTITIONS_PER_LIST_STATE.defaultValue()); Preconditions.checkState(env.getCheckpointInterval() > 0L, "Checkpointing must be enabled for this test!"); env.addSource(new SimpleEndlessSourceWithBloatedState(numStates, numPartitionsPerState)).setParallelism(env.getParallelism()) .addSink(new DiscardingSink<>()).setParallelism(1); env.execute("HeavyDeploymentStressTestProgram"); }
public static JobGraph stoppableJob(final StopJobSignal stopJobSignal) { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.addSource(new InfiniteSourceFunction(stopJobSignal)) .setParallelism(2) .shuffle() .addSink(new DiscardingSink<>()) .setParallelism(2); return env.getStreamGraph().getJobGraph(); }
@Test @SuppressWarnings("unchecked") public void testFromCollectionParallelism() { try { TypeInformation<Integer> typeInfo = BasicTypeInfo.INT_TYPE_INFO; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Integer> dataStream1 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo); try { dataStream1.setParallelism(4); fail("should throw an exception"); } catch (IllegalArgumentException e) { // expected } dataStream1.addSink(new DiscardingSink<Integer>()); DataStreamSource<Integer> dataStream2 = env.fromParallelCollection(new DummySplittableIterator<Integer>(), typeInfo).setParallelism(4); dataStream2.addSink(new DiscardingSink<Integer>()); env.getExecutionPlan(); assertEquals("Parallelism of collection source must be 1.", 1, env.getStreamGraph().getStreamNode(dataStream1.getId()).getParallelism()); assertEquals("Parallelism of parallel collection source must be 4.", 4, env.getStreamGraph().getStreamNode(dataStream2.getId()).getParallelism()); } catch (Exception e) { e.printStackTrace(); fail(e.getMessage()); } }
DataStream<Integer> map = src.map(mapFunction); map.addSink(new DiscardingSink<Integer>()); assertEquals(mapFunction, getFunctionForDataStream(map)); DataStream<Integer> flatMap = src.flatMap(flatMapFunction); flatMap.addSink(new DiscardingSink<Integer>()); assertEquals(flatMapFunction, getFunctionForDataStream(flatMap));
env.setParallelism(10); SingleOutputStreamOperator<Long> map = src.map(new MapFunction<Tuple2<Long, Long>, Long>() { @Override public Long map(Tuple2<Long, Long> value) throws Exception { assertEquals(1, env.getStreamGraph().getStreamNode(src.getId()).getParallelism()); assertEquals(10, env.getStreamGraph().getStreamNode(map.getId()).getParallelism()); assertEquals(1, env.getStreamGraph().getStreamNode(windowed.getId()).getParallelism()); assertEquals(1, env.getStreamGraph().getStreamNode(src.getId()).getParallelism()); assertEquals(10, env.getStreamGraph().getStreamNode(map.getId()).getParallelism()); assertEquals(1, env.getStreamGraph().getStreamNode(windowed.getId()).getParallelism()); src.setParallelism(3); fail(); } catch (IllegalArgumentException success) { parallelSource.addSink(new DiscardingSink<Long>()); assertEquals(7, env.getStreamGraph().getStreamNode(parallelSource.getId()).getParallelism()); parallelSource.setParallelism(3); assertEquals(3, env.getStreamGraph().getStreamNode(parallelSource.getId()).getParallelism());
.addSource(consumer).setParallelism(sourceParallelism) .map(new ThrottledMapper<Tuple2<Integer, Integer>>(20)).setParallelism(sourceParallelism);
/** * NOTE: This method is for internal use only for defining a TableSource. * Do not use it in Table API programs. */ @Override public DataStream<Row> getDataStream(StreamExecutionEnvironment env) { DeserializationSchema<Row> deserializationSchema = getDeserializationSchema(); // Version-specific Kafka consumer FlinkKafkaConsumerBase<Row> kafkaConsumer = getKafkaConsumer(topic, properties, deserializationSchema); return env.addSource(kafkaConsumer).name(explainSource()); }
.addSource(latestReadingConsumer).setParallelism(parallelism) .flatMap(new FlatMapFunction<Tuple2<Integer, Integer>, Object>() { @Override public void flatMap(Tuple2<Integer, Integer> value, Collector<Object> out) throws Exception {
private static JobGraph createJobGraphWithKeyedAndNonPartitionedOperatorState( int parallelism, int maxParallelism, int fixedParallelism, int numberKeys, int numberElements, boolean terminateAfterEmission, int checkpointingInterval) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.getConfig().setMaxParallelism(maxParallelism); env.enableCheckpointing(checkpointingInterval); env.setRestartStrategy(RestartStrategies.noRestart()); DataStream<Integer> input = env.addSource(new SubtaskIndexNonPartitionedStateSource( numberKeys, numberElements, terminateAfterEmission)) .setParallelism(fixedParallelism) .keyBy(new KeySelector<Integer, Integer>() { private static final long serialVersionUID = -7952298871120320940L; @Override public Integer getKey(Integer value) throws Exception { return value; } }); SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys); DataStream<Tuple2<Integer, Integer>> result = input.flatMap(new SubtaskIndexFlatMapper(numberElements)); result.addSink(new CollectionSink<Tuple2<Integer, Integer>>()); return env.getStreamGraph().getJobGraph(); }
public DataStreamSource(StreamExecutionEnvironment environment, TypeInformation<T> outTypeInfo, StreamSource<T, ?> operator, boolean isParallel, String sourceName) { super(environment, new SourceTransformation<>(sourceName, operator, outTypeInfo, environment.getParallelism())); this.isParallel = isParallel; if (!isParallel) { setParallelism(1); } }
private static StreamExecutionEnvironment getSimpleJob() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.generateSequence(1, 10000000) .addSink(new SinkFunction<Long>() { @Override public void invoke(Long value) { } }); return env; } }
.assignTimestampsAndWatermarks(new CustomWmEmitter<Long>() { .assignTimestampsAndWatermarks(new CustomWmEmitter<String>() {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSink<Long> sink = env.generateSequence(1, 100).print(); assertTrue(env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getStatePartitioner1() == null); assertTrue(env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getInEdges().get(0).getPartitioner() instanceof ForwardPartitioner); DataStreamSink<Long> sink2 = env.generateSequence(1, 100).keyBy(key1).print(); DataStreamSink<Long> sink3 = env.generateSequence(1, 100).keyBy(key2).print();
@Test public void testUnionBetweenConsecutiveSplitRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Integer> src = env.fromElements(0, 0); OutputSelector<Integer> outputSelector = new DummyOutputSelector<>(); src.split(outputSelector).select("dummy").union(src.map(x -> x)).split(outputSelector).addSink(new DiscardingSink<>()); expectedException.expect(IllegalStateException.class); expectedException.expectMessage("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs."); env.getStreamGraph(); }
private static void runJob() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.fromElements(1, 2, 3) .print(); env.execute(); }
env.addSource(new TestSource<Integer>()).print(); fail(); } catch (Exception ignored) {} } catch (Exception ignored) {} env.addSource(new TestSource<Integer>()).returns(Integer.class); source.map(new TestMap<Long, Long>()).returns(Long.class).print(); source.flatMap(new TestFlatMap<Long, Long>()).returns(new TypeHint<Long>(){}).print();