return new PythonIterativeStream(this.stream.iterate(max_wait_time_ms));
/** * A thin wrapper layer over {@link DataStream#iterate()}. * * <p>Initiates an iterative part of the program that feeds back data streams. * The iterative part needs to be closed by calling * {@link PythonIterativeStream#close_with(PythonDataStream)}. The transformation of * this IterativeStream will be the iteration head. The data stream * given to the {@link PythonIterativeStream#close_with(PythonDataStream)} method is * the data stream that will be fed back and used as the input for the * iteration head. </p> * * <p>A common usage pattern for streaming iterations is to use output * splitting to send a part of the closing data stream to the head. Refer to * {@link #split(OutputSelector)} for more information. * * <p>The iteration edge will be partitioned the same way as the first input of * the iteration head unless it is changed in the * {@link PythonIterativeStream#close_with(PythonDataStream)} call. * * <p>By default a PythonDataStream with iteration will never terminate, but the user * can use the maxWaitTime parameter to set a max waiting time for the * iteration head. If no data received in the set time, the stream * terminates. * * @return The iterative data stream created. */ @PublicEvolving public PythonIterativeStream iterate() { return new PythonIterativeStream(this.stream.iterate()); }
@Test(expected = UnsupportedOperationException.class) public void testClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); IterativeStream<Integer> iter2 = source.iterate(); iter2.closeWith(iter1.map(noOpIntMap)); }
@Test(expected = UnsupportedOperationException.class) public void testCoIterClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType( Integer.class); coIter.closeWith(iter1.map(noOpIntMap)); }
@Test(expected = UnsupportedOperationException.class) public void testIncorrectParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10); IterativeStream<Integer> iter1 = source.iterate(); SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap); iter1.closeWith(map1).print(); }
@Test(expected = IllegalStateException.class) public void testExecutionWithEmptyIteration() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); iter1.map(noOpIntMap).print(); env.execute(); }
@Test(expected = UnsupportedOperationException.class) public void testDifferingParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10) .map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); iter1.closeWith(iter1.map(noOpIntMap).setParallelism(parallelism / 2)); }
@Test(expected = UnsupportedOperationException.class) public void testCoDifferingParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType( Integer.class); coIter.closeWith(coIter.map(noOpIntCoMap).setParallelism(parallelism / 2)); }
@Test public void testDoubleClosing() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); iter1.closeWith(iter1.map(noOpIntMap)); iter1.closeWith(iter1.map(noOpIntMap)); }
@Test public void testImmutabilityWithCoiteration() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); // for rebalance IterativeStream<Integer> iter1 = source.iterate(); // Calling withFeedbackType should create a new iteration ConnectedIterativeStreams<Integer, String> iter2 = iter1.withFeedbackType(String.class); iter1.closeWith(iter1.map(noOpIntMap)).print(); iter2.closeWith(iter2.map(noOpCoMap)).print(); StreamGraph graph = env.getStreamGraph(); assertEquals(2, graph.getIterationSourceSinkPairs().size()); for (Tuple2<StreamNode, StreamNode> sourceSinkPair: graph.getIterationSourceSinkPairs()) { assertEquals(graph.getTargetVertex(sourceSinkPair.f0.getOutEdges().get(0)), graph.getSourceVertex(sourceSinkPair.f1.getInEdges().get(0))); } }
@SuppressWarnings("rawtypes") @Test public void testSimpleIteration() throws Exception { int numRetries = 5; int timeoutScale = 1; for (int numRetry = 0; numRetry < numRetries; numRetry++) { try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); iterated = new boolean[parallelism]; DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false)) .map(noOpBoolMap).name("ParallelizeMap"); IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale); DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap); iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink()); iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink()); env.execute(); for (boolean iter : iterated) { assertTrue(iter); } break; // success } catch (Throwable t) { LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t); if (numRetry >= numRetries - 1) { throw t; } else { timeoutScale *= 2; } } } }
opMethod.invoke(source, resource1); IterativeStream<Integer> iteration = source.iterate(3000); opMethod.invoke(iteration, resource2);
.map(noOpBoolMap).name("ParallelizeMap"); IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale);
.map(noOpIntMap).name("ParallelizeMapRebalance"); IterativeStream<Integer> iter1 = source1.union(source2).iterate();
.map(noOpIntMap); IterativeStream<Integer> iter1 = source1.union(source2).iterate();
public static void main(String[] args) throws Exception { // Set up the environment if(!parseParameters(args)) { return; } StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<Long, Long>> edges = getEdgesDataSet(env); IterativeStream<Tuple2<Long, Long>> iteration = edges.iterate(); DataStream<Tuple2<Long, Long>> result = iteration.closeWith( iteration.keyBy(0).flatMap(new AssignComponents())); // Emit the results result.print(); env.execute("Streaming Connected Components"); }