@Test public void testDoubleClosing() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); iter1.closeWith(iter1.map(noOpIntMap)); iter1.closeWith(iter1.map(noOpIntMap)); }
/** * Changes the feedback type of the iteration and allows the user to apply * co-transformations on the input and feedback stream, as in a * {@link ConnectedStreams}. * * <p>For type safety the user needs to define the feedback type * * @param feedbackTypeClass * Class of the elements in the feedback stream. * @return A {@link ConnectedIterativeStreams}. */ public <F> ConnectedIterativeStreams<T, F> withFeedbackType(Class<F> feedbackTypeClass) { return withFeedbackType(TypeInformation.of(feedbackTypeClass)); }
return new IterativeStream<>(this, 0);
@Test(expected = UnsupportedOperationException.class) public void testCoIterClosingFromOutOfLoop() throws Exception { // this test verifies that we cannot close an iteration with a DataStream that does not // have the iteration in its predecessors StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // introduce dummy mapper to get to correct parallelism DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType( Integer.class); coIter.closeWith(iter1.map(noOpIntMap)); }
@Test public void testImmutabilityWithCoiteration() { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); // for rebalance IterativeStream<Integer> iter1 = source.iterate(); // Calling withFeedbackType should create a new iteration ConnectedIterativeStreams<Integer, String> iter2 = iter1.withFeedbackType(String.class); iter1.closeWith(iter1.map(noOpIntMap)).print(); iter2.closeWith(iter2.map(noOpCoMap)).print(); StreamGraph graph = env.getStreamGraph(); assertEquals(2, graph.getIterationSourceSinkPairs().size()); for (Tuple2<StreamNode, StreamNode> sourceSinkPair: graph.getIterationSourceSinkPairs()) { assertEquals(graph.getTargetVertex(sourceSinkPair.f0.getOutEdges().get(0)), graph.getSourceVertex(sourceSinkPair.f1.getInEdges().get(0))); } }
@SuppressWarnings("rawtypes") @Test public void testSimpleIteration() throws Exception { int numRetries = 5; int timeoutScale = 1; for (int numRetry = 0; numRetry < numRetries; numRetry++) { try { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); iterated = new boolean[parallelism]; DataStream<Boolean> source = env.fromCollection(Collections.nCopies(parallelism * 2, false)) .map(noOpBoolMap).name("ParallelizeMap"); IterativeStream<Boolean> iteration = source.iterate(3000 * timeoutScale); DataStream<Boolean> increment = iteration.flatMap(new IterationHead()).map(noOpBoolMap); iteration.map(noOpBoolMap).addSink(new ReceiveCheckNoOpSink()); iteration.closeWith(increment).addSink(new ReceiveCheckNoOpSink()); env.execute(); for (boolean iter : iterated) { assertTrue(iter); } break; // success } catch (Throwable t) { LOG.info("Run " + (numRetry + 1) + "/" + numRetries + " failed", t); if (numRetry >= numRetries - 1) { throw t; } else { timeoutScale *= 2; } } } }
/** * A thin wrapper layer over {@link IterativeStream#closeWith(org.apache.flink.streaming.api.datastream.DataStream)} * * <p>Please note that this function works with {@link PythonDataStream} and thus wherever a DataStream is mentioned in * the above {@link IterativeStream#closeWith(org.apache.flink.streaming.api.datastream.DataStream)} description, * the user may regard it as {@link PythonDataStream} . * * @param feedback_stream {@link PythonDataStream} that will be used as input to the iteration * head. * @return The feedback stream. */ public PythonDataStream close_with(PythonDataStream<? extends DataStream<PyObject>> feedback_stream) { ((IterativeStream<PyObject>) this.stream).closeWith(feedback_stream.stream); return feedback_stream; } }
public static void main(String[] args) throws Exception { // Set up the environment if(!parseParameters(args)) { return; } StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Tuple2<Long, Long>> edges = getEdgesDataSet(env); IterativeStream<Tuple2<Long, Long>> iteration = edges.iterate(); DataStream<Tuple2<Long, Long>> result = iteration.closeWith( iteration.keyBy(0).flatMap(new AssignComponents())); // Emit the results result.print(); env.execute("Streaming Connected Components"); }
protected IterativeStream(DataStream<T> dataStream, long maxWaitTime) { super(dataStream.getExecutionEnvironment(), new FeedbackTransformation<>(dataStream.getTransformation(), maxWaitTime)); this.originalInput = dataStream; this.maxWaitTime = maxWaitTime; setBufferTimeout(dataStream.environment.getBufferTimeout()); }
/** * Closes the iteration. This method defines the end of the iterative * program part that will be fed back to the start of the iteration. * * <p>A common usage pattern for streaming iterations is to use output * splitting to send a part of the closing data stream to the head. Refer to * {@link DataStream#split(org.apache.flink.streaming.api.collector.selector.OutputSelector)} * for more information. * * @param feedbackStream * {@link DataStream} that will be used as input to the iteration * head. * * @return The feedback stream. * */ @SuppressWarnings({ "unchecked", "rawtypes" }) public DataStream<T> closeWith(DataStream<T> feedbackStream) { Collection<StreamTransformation<?>> predecessors = feedbackStream.getTransformation().getTransitivePredecessors(); if (!predecessors.contains(this.transformation)) { throw new UnsupportedOperationException( "Cannot close an iteration with a feedback DataStream that does not originate from said iteration."); } ((FeedbackTransformation) getTransformation()).addFeedbackEdge(feedbackStream.getTransformation()); return feedbackStream; }
@Test(expected = IllegalStateException.class) public void testExecutionWithEmptyIteration() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); iter1.map(noOpIntMap).print(); env.execute(); }
opMethod.invoke(iteration, resource2); DataStream<Integer> flatMap = iteration.flatMap(new FlatMapFunction<Integer, Integer>() { @Override public void flatMap(Integer value, Collector<Integer> out) throws Exception { opMethod.invoke(increment, resource4); DataStreamSink<Integer> sink = iteration.closeWith(increment).addSink(new SinkFunction<Integer>() { @Override public void invoke(Integer value) throws Exception {
protected IterativeStream(DataStream<T> dataStream, long maxWaitTime) { super(dataStream.getExecutionEnvironment(), new FeedbackTransformation<>(dataStream.getTransformation(), maxWaitTime)); this.originalInput = dataStream; this.maxWaitTime = maxWaitTime; setBufferTimeout(dataStream.environment.getBufferTimeout()); }
/** * Closes the iteration. This method defines the end of the iterative * program part that will be fed back to the start of the iteration. * * <p>A common usage pattern for streaming iterations is to use output * splitting to send a part of the closing data stream to the head. Refer to * {@link DataStream#split(org.apache.flink.streaming.api.collector.selector.OutputSelector)} * for more information. * * @param feedbackStream * {@link DataStream} that will be used as input to the iteration * head. * * @return The feedback stream. * */ @SuppressWarnings({ "unchecked", "rawtypes" }) public DataStream<T> closeWith(DataStream<T> feedbackStream) { Collection<StreamTransformation<?>> predecessors = feedbackStream.getTransformation().getTransitivePredecessors(); if (!predecessors.contains(this.transformation)) { throw new UnsupportedOperationException( "Cannot close an iteration with a feedback DataStream that does not originate from said iteration."); } ((FeedbackTransformation) getTransformation()).addFeedbackEdge(feedbackStream.getTransformation()); return feedbackStream; }
@Test(expected = UnsupportedOperationException.class) public void testIncorrectParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10); IterativeStream<Integer> iter1 = source.iterate(); SingleOutputStreamOperator<Integer> map1 = iter1.map(noOpIntMap); iter1.closeWith(map1).print(); }
/** * Changes the feedback type of the iteration and allows the user to apply * co-transformations on the input and feedback stream, as in a * {@link ConnectedStreams}. * * <p>For type safety the user needs to define the feedback type * * @param feedbackTypeHint * Class of the elements in the feedback stream. * @return A {@link ConnectedIterativeStreams}. */ public <F> ConnectedIterativeStreams<T, F> withFeedbackType(TypeHint<F> feedbackTypeHint) { return withFeedbackType(TypeInformation.of(feedbackTypeHint)); }
return new IterativeStream<>(this, maxWaitTimeMillis);
protected IterativeStream(DataStream<T> dataStream, long maxWaitTime) { super(dataStream.getExecutionEnvironment(), new FeedbackTransformation<>(dataStream.getTransformation(), maxWaitTime)); this.originalInput = dataStream; this.maxWaitTime = maxWaitTime; setBufferTimeout(dataStream.environment.getBufferTimeout()); }