/** * A thin wrapper layer over {@link StreamExecutionEnvironment#readTextFile(java.lang.String)}. * * @param path The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path"). * @return The data stream that represents the data read from the given file as text lines * @throws IOException */ public PythonDataStream read_text_file(String path) throws IOException { return new PythonDataStream<>(env.readTextFile(path).map(new AdapterMap<String>())); }
/** * A thin wrapper layer over {@link StreamExecutionEnvironment#socketTextStream(java.lang.String, int)}. * * @param host The host name which a server socket binds * @param port The port number which a server socket binds. A port number of 0 means that the port number is automatically * allocated. * @return A python data stream containing the strings received from the socket */ public PythonDataStream socket_text_stream(String host, int port) { return new PythonDataStream<>(env.socketTextStream(host, port).map(new AdapterMap<String>())); }
/** * Add a java source to the streaming topology. The source expected to be an java based * implementation (.e.g. Kafka connector). * * @param src A native java source (e.g. PythonFlinkKafkaConsumer09) * @return Python data stream */ public PythonDataStream add_java_source(SourceFunction<Object> src) { return new PythonDataStream<>(env.addSource(src).map(new AdapterMap<>())); }
/** * A thin wrapper layer over {@link StreamExecutionEnvironment#fromCollection(java.util.Collection)} * * <p>The input {@code Collection} is of type {@code Object}, because it is a collection * of Python elements. * There type is determined in runtime, by the Jython framework.</p> * * @param collection The collection of python elements to create the data stream from. * @return The data stream representing the given collection */ public PythonDataStream from_collection(Collection<Object> collection) { return new PythonDataStream<>(env.fromCollection(collection).map(new AdapterMap<>())); }
/** * A thin wrapper layer over {@link StreamExecutionEnvironment#generateSequence(long, long)}. * * @param from The number to start at (inclusive) * @param to The number to stop at (inclusive) * @return A python data stream, containing all number in the [from, to] interval */ public PythonDataStream generate_sequence(long from, long to) { return new PythonDataStream<>(env.generateSequence(from, to).map(new AdapterMap<>())); }
public PythonDataStream create_python_source(SourceFunction<Object> src) throws Exception { return new PythonDataStream<>(env.addSource(new PythonGeneratorFunction(src)).map(new AdapterMap<>())); }
/** * Creates a python data stream from the given iterator. * * <p>Note that this operation will result in a non-parallel data stream source, i.e., * a data stream source with a parallelism of one.</p> * * @param iter The iterator of elements to create the data stream from * @return The data stream representing the elements in the iterator * @see StreamExecutionEnvironment#fromCollection(java.util.Iterator, org.apache.flink.api.common.typeinfo.TypeInformation) */ public PythonDataStream from_collection(Iterator<Object> iter) throws Exception { return new PythonDataStream<>(env.addSource(new PythonIteratorFunction(iter), TypeExtractor.getForClass(Object.class)) .map(new AdapterMap<>())); }
@Test(expected = IllegalStateException.class) public void testExecutionWithEmptyIteration() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); iter1.map(noOpIntMap).print(); env.execute(); }
@Test(expected = UnsupportedOperationException.class) public void testDifferingParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10) .map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); iter1.closeWith(iter1.map(noOpIntMap).setParallelism(parallelism / 2)); }
@Test(expected = UnsupportedOperationException.class) public void testClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); IterativeStream<Integer> iter2 = source.iterate(); iter2.closeWith(iter1.map(noOpIntMap)); }
@Test public void testDoubleClosing() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); iter1.closeWith(iter1.map(noOpIntMap)); iter1.closeWith(iter1.map(noOpIntMap)); }
@Test(expected = UnsupportedOperationException.class) public void testCoIterClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType( Integer.class); coIter.closeWith(iter1.map(noOpIntMap)); }
@Test(expected = UnsupportedOperationException.class) public void testCoDifferingParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType( Integer.class); coIter.closeWith(coIter.map(noOpIntCoMap).setParallelism(parallelism / 2)); }
/** * If expected values ever change double check that the change is not braking the contract of * {@link StreamingRuntimeContext#getOperatorUniqueID()} being stable between job submissions. */ @Test public void testGetOperatorUniqueID() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); env.fromElements(1, 2, 3) .map(new VerifyOperatorIDMapFunction("6c4f323f22da8fb6e34f80c61be7a689")).uid("42") .map(new VerifyOperatorIDMapFunction("3e129e83691e7737fbf876b47452acbc")).uid("44"); env.execute(); }
@Test(expected = UnsupportedOperationException.class) public void testForwardFailsHightToLowParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // this does a rebalance that works DataStream<Integer> src = env.fromElements(1, 2, 3).map(new NoOpIntMap()); // this doesn't work because it goes from 3 to 1 src.forward().map(new NoOpIntMap()).setParallelism(1); env.execute(); }
@Test public void test() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.addSource(new TestSource()).map(new TestMap()).addSink(new DiscardingSink<Integer>()); env.execute(); assertNotEquals(srcContext, mapContext); }
/** * Tests that a changed operator name does not affect the hash. */ @Test public void testChangedOperatorName() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); env.addSource(new NoOpSourceFunction(), "A").map(new NoOpMapFunction()); JobGraph jobGraph = env.getStreamGraph().getJobGraph(); JobVertexID expected = jobGraph.getVerticesAsArray()[0].getID(); env = StreamExecutionEnvironment.createLocalEnvironment(); env.addSource(new NoOpSourceFunction(), "B").map(new NoOpMapFunction()); jobGraph = env.getStreamGraph().getJobGraph(); JobVertexID actual = jobGraph.getVerticesAsArray()[0].getID(); assertEquals(expected, actual); }
/** * Tests that a manual hash for an intermediate chain node is accepted. */ @Test public void testManualHashAssignmentForIntermediateNodeInChain() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); env.setParallelism(4); env.addSource(new NoOpSourceFunction()) // Intermediate chained node .map(new NoOpMapFunction()).uid("map") .addSink(new NoOpSinkFunction()); env.getStreamGraph().getJobGraph(); }
@Test public void testUnionBetweenConsecutiveSplitRejection() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Integer> src = env.fromElements(0, 0); OutputSelector<Integer> outputSelector = new DummyOutputSelector<>(); src.split(outputSelector).select("dummy").union(src.map(x -> x)).split(outputSelector).addSink(new DiscardingSink<>()); expectedException.expect(IllegalStateException.class); expectedException.expectMessage("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs."); env.getStreamGraph(); }
private static void runPartitioningProgram(int parallelism) throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(parallelism); env.getConfig().enableObjectReuse(); env.setBufferTimeout(5L); env.enableCheckpointing(1000, CheckpointingMode.AT_LEAST_ONCE); env .addSource(new TimeStampingSource()) .map(new IdMapper<Tuple2<Long, Long>>()) .keyBy(0) .addSink(new TimestampingSink()); env.execute("Partitioning Program"); }