org.apache.flink.streaming.api.datastream.DataStreamSource.map java code examples

/**
 * A thin wrapper layer over {@link StreamExecutionEnvironment#readTextFile(java.lang.String)}.
 *
 * @param path The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path").
 * @return The data stream that represents the data read from the given file as text lines
 * @throws IOException
 */
public PythonDataStream read_text_file(String path) throws IOException {
  return new PythonDataStream<>(env.readTextFile(path).map(new AdapterMap<String>()));
}

/**
 * A thin wrapper layer over {@link StreamExecutionEnvironment#socketTextStream(java.lang.String, int)}.
 *
 * @param host The host name which a server socket binds
 * @param port The port number which a server socket binds. A port number of 0 means that the port number is automatically
 * allocated.
 * @return A python data stream containing the strings received from the socket
 */
public PythonDataStream socket_text_stream(String host, int port) {
  return new PythonDataStream<>(env.socketTextStream(host, port).map(new AdapterMap<String>()));
}

/**
 * Add a java source to the streaming topology. The source expected to be an java based
 * implementation (.e.g. Kafka connector).
 *
 * @param src A native java source (e.g. PythonFlinkKafkaConsumer09)
 * @return Python data stream
 */
public PythonDataStream add_java_source(SourceFunction<Object> src) {
  return new PythonDataStream<>(env.addSource(src).map(new AdapterMap<>()));
}

/**
 * A thin wrapper layer over {@link StreamExecutionEnvironment#fromCollection(java.util.Collection)}
 *
 * <p>The input {@code Collection} is of type {@code Object}, because it is a collection
 * of Python elements. * There type is determined in runtime, by the Jython framework.</p>
 *
 * @param collection The collection of python elements to create the data stream from.
 * @return The data stream representing the given collection
 */
public PythonDataStream from_collection(Collection<Object> collection) {
  return new PythonDataStream<>(env.fromCollection(collection).map(new AdapterMap<>()));
}

/**
 * A thin wrapper layer over {@link StreamExecutionEnvironment#generateSequence(long, long)}.
 *
 * @param from The number to start at (inclusive)
 * @param to The number to stop at (inclusive)
 * @return A python data stream, containing all number in the [from, to] interval
 */
public PythonDataStream generate_sequence(long from, long to) {
  return new PythonDataStream<>(env.generateSequence(from, to).map(new AdapterMap<>()));
}

public PythonDataStream create_python_source(SourceFunction<Object> src) throws Exception {
  return new PythonDataStream<>(env.addSource(new PythonGeneratorFunction(src)).map(new AdapterMap<>()));
}

/**
 * Creates a python data stream from the given iterator.
 *
 * <p>Note that this operation will result in a non-parallel data stream source, i.e.,
 * a data stream source with a parallelism of one.</p>
 *
 * @param iter The iterator of elements to create the data stream from
 * @return The data stream representing the elements in the iterator
 * @see StreamExecutionEnvironment#fromCollection(java.util.Iterator, org.apache.flink.api.common.typeinfo.TypeInformation)
 */
public PythonDataStream from_collection(Iterator<Object> iter) throws Exception {
  return new PythonDataStream<>(env.addSource(new PythonIteratorFunction(iter), TypeExtractor.getForClass(Object.class))
    .map(new AdapterMap<>()));
}

@Test(expected = IllegalStateException.class)
public void testExecutionWithEmptyIteration() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);
  IterativeStream<Integer> iter1 = source.iterate();
  iter1.map(noOpIntMap).print();
  env.execute();
}

@Test(expected = UnsupportedOperationException.class)
public void testDifferingParallelism() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  // introduce dummy mapper to get to correct parallelism
  DataStream<Integer> source = env.fromElements(1, 10)
      .map(noOpIntMap);
  IterativeStream<Integer> iter1 = source.iterate();
  iter1.closeWith(iter1.map(noOpIntMap).setParallelism(parallelism / 2));
}

@Test(expected = UnsupportedOperationException.class)
public void testClosingFromOutOfLoop() throws Exception {
  // this test verifies that we cannot close an iteration with a DataStream that does not
  // have the iteration in its predecessors
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  // introduce dummy mapper to get to correct parallelism
  DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);
  IterativeStream<Integer> iter1 = source.iterate();
  IterativeStream<Integer> iter2 = source.iterate();
  iter2.closeWith(iter1.map(noOpIntMap));
}

@Test
public void testDoubleClosing() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  // introduce dummy mapper to get to correct parallelism
  DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);
  IterativeStream<Integer> iter1 = source.iterate();
  iter1.closeWith(iter1.map(noOpIntMap));
  iter1.closeWith(iter1.map(noOpIntMap));
}

@Test(expected = UnsupportedOperationException.class)
public void testCoIterClosingFromOutOfLoop() throws Exception {
  // this test verifies that we cannot close an iteration with a DataStream that does not
  // have the iteration in its predecessors
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  // introduce dummy mapper to get to correct parallelism
  DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);
  IterativeStream<Integer> iter1 = source.iterate();
  ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType(
      Integer.class);
  coIter.closeWith(iter1.map(noOpIntMap));
}

@Test(expected = UnsupportedOperationException.class)
public void testCoDifferingParallelism() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  // introduce dummy mapper to get to correct parallelism
  DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap);
  ConnectedIterativeStreams<Integer, Integer> coIter = source.iterate().withFeedbackType(
      Integer.class);
  coIter.closeWith(coIter.map(noOpIntCoMap).setParallelism(parallelism / 2));
}

/**
 * If expected values ever change double check that the change is not braking the contract of
 * {@link StreamingRuntimeContext#getOperatorUniqueID()} being stable between job submissions.
 */
@Test
public void testGetOperatorUniqueID() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
  env.fromElements(1, 2, 3)
    .map(new VerifyOperatorIDMapFunction("6c4f323f22da8fb6e34f80c61be7a689")).uid("42")
    .map(new VerifyOperatorIDMapFunction("3e129e83691e7737fbf876b47452acbc")).uid("44");
  env.execute();
}

@Test(expected = UnsupportedOperationException.class)
public void testForwardFailsHightToLowParallelism() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  // this does a rebalance that works
  DataStream<Integer> src = env.fromElements(1, 2, 3).map(new NoOpIntMap());
  // this doesn't work because it goes from 3 to 1
  src.forward().map(new NoOpIntMap()).setParallelism(1);
  env.execute();
}

@Test
public void test() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(1);
  env.addSource(new TestSource()).map(new TestMap()).addSink(new DiscardingSink<Integer>());
  env.execute();
  assertNotEquals(srcContext, mapContext);
}

/**
 * Tests that a changed operator name does not affect the hash.
 */
@Test
public void testChangedOperatorName() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
  env.addSource(new NoOpSourceFunction(), "A").map(new NoOpMapFunction());
  JobGraph jobGraph = env.getStreamGraph().getJobGraph();
  JobVertexID expected = jobGraph.getVerticesAsArray()[0].getID();
  env = StreamExecutionEnvironment.createLocalEnvironment();
  env.addSource(new NoOpSourceFunction(), "B").map(new NoOpMapFunction());
  jobGraph = env.getStreamGraph().getJobGraph();
  JobVertexID actual = jobGraph.getVerticesAsArray()[0].getID();
  assertEquals(expected, actual);
}

/**
 * Tests that a manual hash for an intermediate chain node is accepted.
 */
@Test
public void testManualHashAssignmentForIntermediateNodeInChain() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment();
  env.setParallelism(4);
  env.addSource(new NoOpSourceFunction())
      // Intermediate chained node
      .map(new NoOpMapFunction()).uid("map")
      .addSink(new NoOpSinkFunction());
  env.getStreamGraph().getJobGraph();
}

@Test
public void testUnionBetweenConsecutiveSplitRejection() {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  DataStreamSource<Integer> src = env.fromElements(0, 0);
  OutputSelector<Integer> outputSelector = new DummyOutputSelector<>();
  src.split(outputSelector).select("dummy").union(src.map(x -> x)).split(outputSelector).addSink(new DiscardingSink<>());
  expectedException.expect(IllegalStateException.class);
  expectedException.expectMessage("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs.");
  env.getStreamGraph();
}

private static void runPartitioningProgram(int parallelism) throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);
  env.getConfig().enableObjectReuse();
  env.setBufferTimeout(5L);
  env.enableCheckpointing(1000, CheckpointingMode.AT_LEAST_ONCE);
  env
    .addSource(new TimeStampingSource())
    .map(new IdMapper<Tuple2<Long, Long>>())
    .keyBy(0)
    .addSink(new TimestampingSink());
  env.execute("Partitioning Program");
}

Popular methods of DataStreamSource

Popular in Java

Reading from database using SQL prepared statement
setScale (BigDecimal)
getApplicationContext (Context)
putExtra (Intent)
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
Timestamp (java.sql)
A Java representation of the SQL TIMESTAMP type. It provides the capability of representing the SQL
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
Collectors (java.util.stream)
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
From CI to AI: The AI layer in your organization

How to use mapmethodin org.apache.flink.streaming.api.datastream.DataStreamSource

Best Java code snippets using org.apache.flink.streaming.api.datastream.DataStreamSource.map (Showing top 20 results out of 315)

How to use
map
method
in
org.apache.flink.streaming.api.datastream.DataStreamSource