/** * A thin wrapper layer over {@link IterativeStream#closeWith(org.apache.flink.streaming.api.datastream.DataStream)} * * <p>Please note that this function works with {@link PythonDataStream} and thus wherever a DataStream is mentioned in * the above {@link IterativeStream#closeWith(org.apache.flink.streaming.api.datastream.DataStream)} description, * the user may regard it as {@link PythonDataStream} . * * @param feedback_stream {@link PythonDataStream} that will be used as input to the iteration * head. * @return The feedback stream. */ public PythonDataStream close_with(PythonDataStream<? extends DataStream<PyObject>> feedback_stream) { ((IterativeStream<PyObject>) this.stream).closeWith(feedback_stream.stream); return feedback_stream; } }
@Test public void testDoubleClosing() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); iter1.closeWith(iter1.map(noOpIntMap)); iter1.closeWith(iter1.map(noOpIntMap)); }
@Test(expected = UnsupportedOperationException.class) public void testIncorrectParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10); IterativeStream<Integer> iter1 = source.iterate(); SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap); iter1.closeWith(map1).print(); }
@Test(expected = UnsupportedOperationException.class) public void testDifferingParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10) .map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); iter1.closeWith(iter1.map(noOpIntMap).setParallelism(parallelism / 2)); }
@Test(expected = UnsupportedOperationException.class) public void testClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); IterativeStream<Integer> iter2 = source.iterate(); iter2.closeWith(iter1.map(noOpIntMap)); }
it.closeWith(step.select("iterate"));
@Test public void testImmutabilityWithCoiteration() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); // for rebalance IterativeStream<Integer> iter1 = source.iterate(); // Calling withFeedbackType should create a new iteration ConnectedIterativeStreams<Integer, String> iter2 = iter1.withFeedbackType(String.class); iter1.closeWith(iter1.map(noOpIntMap)).print(); iter2.closeWith(iter2.map(noOpCoMap)).print(); StreamGraph graph = env.getStreamGraph(); assertEquals(2, graph.getIterationSourceSinkPairs().size()); for (Tuple2<StreamNode, StreamNode> sourceSinkPair: graph.getIterationSourceSinkPairs()) { assertEquals(graph.getTargetVertex(sourceSinkPair.f0.getOutEdges().get(0)), graph.getSourceVertex(sourceSinkPair.f1.getInEdges().get(0))); } }
@SuppressWarnings("rawtypes") @Test public void testSimpleIteration() throws Exception { int numRetries = 5; int timeoutScale = 1; for (int numRetry = 0; numRetry < numRetries; numRetry++) { try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); iterated = new boolean[parallelism]; DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false)) .map(noOpBoolMap).name("ParallelizeMap"); IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale); DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap); iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink()); iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink()); env.execute(); for (boolean iter : iterated) { assertTrue(iter); } break; // success } catch (Throwable t) { LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t); if (numRetry >= numRetries - 1) { throw t; } else { timeoutScale *= 2; } } } }
opMethod.invoke(increment, resource4); DataStreamSink<Integer> sink = iteration.closeWith(increment).addSink(new SinkFunction<Integer>() { @Override public void invoke(Integer value) throws Exception {
iteration.closeWith(iteration.flatMap(new IterationHead())).addSink(new ReceiveCheckNoOpSink<Boolean>());
.split(new EvenOddOutputSelector()); iter1.closeWith(source3.select("even").union( head1.rebalance().map(noOpIntMap).broadcast(), head2.shuffle()));
.split(new EvenOddOutputSelector()); iter1.closeWith( source3.select("even").union( head1.map(noOpIntMap).name("bc").broadcast(),
it.closeWith(head.keyBy(key).union(head.map(noOpIntMap).keyBy(key))).addSink(new ReceiveCheckNoOpSink<Integer>());
.setParallelism(ITER_TEST_PARALLELISM); iteration.closeWith(iterationBody);
public static void main(String[] args) throws Exception { // Set up the environment if(!parseParameters(args)) { return; } StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<Long, Long>> edges = getEdgesDataSet(env); IterativeStream<Tuple2<Long, Long>> iteration = edges.iterate(); DataStream<Tuple2<Long, Long>> result = iteration.closeWith( iteration.keyBy(0).flatMap(new AssignComponents())); // Emit the results result.print(); env.execute("Streaming Connected Components"); }
it.closeWith(step.select("iterate"));