/** * A thin wrapper layer over {@link DataStream#map(MapFunction)}. * * @param mapper The MapFunction that is called for each element of the * DataStream. * @return The transformed {@link PythonDataStream}. */ public PythonDataStream<SingleOutputStreamOperator<PyObject>> map( MapFunction<PyObject, PyObject> mapper) throws IOException { return new PythonSingleOutputStreamOperator(stream.map(new PythonMapFunction(mapper))); }
public static SingleOutputStreamOperator<Integer> createStatelessMap(DataStream<Integer> input) { return input.map(new NoOpMapFunction()) .setParallelism(4); }
public static SingleOutputStreamOperator<Integer> createSecondStatefulMap(ExecutionMode mode, DataStream<Integer> input) { return input .map(new StatefulStringStoringMap(mode, "second")) .setParallelism(4) .uid("second"); }
public static SingleOutputStreamOperator<Integer> createFirstStatefulMap(ExecutionMode mode, DataStream<Integer> input) { SingleOutputStreamOperator<Integer> map = input .map(new StatefulStringStoringMap(mode, "first")) .setParallelism(4) .uid("first"); return map; }
public static SingleOutputStreamOperator<Integer> createFirstStatefulMap(ExecutionMode mode, DataStream<Integer> input) { return input .map(new StatefulStringStoringMap(mode, "first")) .setParallelism(4) .uid("first"); }
public static SingleOutputStreamOperator<Integer> createThirdStatefulMap(ExecutionMode mode, DataStream<Integer> input) { SingleOutputStreamOperator<Integer> map = input .map(new StatefulStringStoringMap(mode, "third")) .setParallelism(4) .uid("third"); return map; }
public static SingleOutputStreamOperator<Integer> createSecondStatefulMap(ExecutionMode mode, DataStream<Integer> input) { SingleOutputStreamOperator<Integer> map = input .map(new StatefulStringStoringMap(mode, "second")) .setParallelism(4) .uid("second"); return map; }
@Test(expected = UnsupportedOperationException.class) public void testForwardFailsLowToHighParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> src = env.fromElements(1, 2, 3); // this doesn't work because it goes from 1 to 3 src.forward().map(new NoOpIntMap()); env.execute(); }
/** * We connect two different data streams in a chain to a CoMap. */ @Test public void differentDataStreamSameChain() throws Exception { TestListResultSink<String> resultSink = new TestListResultSink<>(); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); DataStream<Integer> src = env.fromElements(1, 3, 5); DataStream<String> stringMap = src.map(value -> "x " + value); stringMap.connect(src).map(new CoMapFunction<String, Integer, String>() { @Override public String map1(String value) { return value; } @Override public String map2(Integer value) { return String.valueOf(value + 1); } }).addSink(resultSink); env.execute(); List<String> expected = Arrays.asList("x 1", "x 3", "x 5", "2", "4", "6"); List<String> result = resultSink.getResult(); Collections.sort(expected); Collections.sort(result); assertEquals(expected, result); }
/** * Tests that the json generated by JSONGenerator shall meet with 2 requirements: * 1. sink nodes are at the back * 2. if both two nodes are sink nodes or neither of them is sink node, then sort by its id. */ @Test public void testSinkIdComparison() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 2, 3); for (int i = 0; i < 32; i++) { if (i % 2 == 0) { source.addSink(new SinkFunction<Integer>() { @Override public void invoke(Integer value, Context ctx) throws Exception {} }); } else { source.map(x -> x + 1); } } // IllegalArgumentException will be thrown without FLINK-9216 env.getStreamGraph().getStreamingPlanAsJSON(); }
/** * Tests that there are no collisions with two identical intermediate nodes connected to the * same predecessor. * * <pre> * /-> [ (map) ] -> [ (sink) ] * [ (src) ] -+ * \-> [ (map) ] -> [ (sink) ] * </pre> */ @Test public void testNodeHashIdenticalNodes() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); env.setParallelism(4); env.disableOperatorChaining(); DataStream<String> src = env.addSource(new NoOpSourceFunction()); src.map(new NoOpMapFunction()).addSink(new NoOpSinkFunction()); src.map(new NoOpMapFunction()).addSink(new NoOpSinkFunction()); JobGraph jobGraph = env.getStreamGraph().getJobGraph(); Set<JobVertexID> vertexIds = new HashSet<>(); for (JobVertex vertex : jobGraph.getVertices()) { assertTrue(vertexIds.add(vertex.getID())); } }
@Test(expected = UnsupportedOperationException.class) public void testForwardFailsHightToLowParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // this does a rebalance that works DataStream<Integer> src = env.fromElements(1, 2, 3).map(new NoOpIntMap()); // this doesn't work because it goes from 3 to 1 src.forward().map(new NoOpIntMap()).setParallelism(1); env.execute(); }
.map(value -> (long) (value + 1)) .keyBy(Long::intValue);
/** * This verifies that an event time source works when setting stream time characteristic to * processing time. In this case, the watermarks should just be swallowed. */ @Test public void testEventTimeSourceWithProcessingTime() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(2); env.getConfig().disableSysoutLogging(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); DataStream<Integer> source1 = env.addSource(new MyTimestampSource(0, 10)); source1 .map(new IdentityMap()) .transform("Watermark Check", BasicTypeInfo.INT_TYPE_INFO, new CustomOperator(false)); env.execute(); // verify that we don't get any watermarks, the source is used as watermark source in // other tests, so it normally emits watermarks Assert.assertTrue(CustomOperator.finalWatermarks[0].size() == 0); }
/** * Runs the following program. * <pre> * [ (source)->(filter) ]-s->[ (map) ] -> [ (map) ] -> [ (groupBy/count)->(sink) ] * </pre> */ @Override public void testProgram(StreamExecutionEnvironment env) { DataStream<String> stream = env.addSource(new StringGeneratingSourceFunction(NUM_STRINGS)); stream // -------------- first vertex, chained to the source ---------------- .filter(new StringRichFilterFunction()) .shuffle() // -------------- seconds vertex - the stateful one that also fails ---------------- .map(new StringPrefixCountRichMapFunction()) .startNewChain() .map(new StatefulCounterFunction()) // -------------- third vertex - counter and the sink ---------------- .keyBy("prefix") .map(new OnceFailingPrefixCounter(NUM_STRINGS)) .addSink(new SinkFunction<PrefixCount>() { @Override public void invoke(PrefixCount value) throws Exception { // Do nothing here } }); }
/** * Creates a streaming JobGraph from the StreamEnvironment. */ private JobGraph createJobGraph( int parallelism, int numberOfRetries, long restartDelay) { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.disableOperatorChaining(); env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(numberOfRetries, restartDelay)); env.getConfig().disableSysoutLogging(); DataStream<Integer> stream = env .addSource(new InfiniteTestSource()) .shuffle() .map(new StatefulCounter()); stream.addSink(new DiscardingSink<>()); return env.getStreamGraph().getJobGraph(); }
/** * These check whether timestamps are properly ignored when they are disabled. */ @Test public void testDisabledTimestamps() throws Exception { final int numElements = 10; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.ProcessingTime); env.setParallelism(PARALLELISM); env.getConfig().disableSysoutLogging(); DataStream<Integer> source1 = env.addSource(new MyNonWatermarkingSource(numElements)); DataStream<Integer> source2 = env.addSource(new MyNonWatermarkingSource(numElements)); source1 .map(new IdentityMap()) .connect(source2).map(new IdentityCoMap()) .transform("Custom Operator", BasicTypeInfo.INT_TYPE_INFO, new DisabledTimestampCheckingOperator()) .addSink(new DiscardingSink<Integer>()); env.execute(); }
/** * These check whether timestamps are properly assigned at the sources and handled in * network transmission and between chained operators when timestamps are enabled. */ @Test public void testTimestampHandling() throws Exception { final int numElements = 10; StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(PARALLELISM); env.getConfig().disableSysoutLogging(); DataStream<Integer> source1 = env.addSource(new MyTimestampSource(0L, numElements)); DataStream<Integer> source2 = env.addSource(new MyTimestampSource(0L, numElements)); source1 .map(new IdentityMap()) .connect(source2).map(new IdentityCoMap()) .transform("Custom Operator", BasicTypeInfo.INT_TYPE_INFO, new TimestampCheckingOperator()) .addSink(new DiscardingSink<Integer>()); env.execute(); }
assertEquals(TypeExtractor.getForClass(Long.class), src1.getType()); DataStream<Tuple2<Integer, String>> map = src1.map(new MapFunction<Long, Tuple2<Integer, String>>() { @Override public Tuple2<Integer, String> map(Long value) throws Exception {