org.apache.flink.streaming.api.datastream.DataStreamSource java code examples

.flatMap(new FlatMapFunction<Long, Tuple2<String, String>>() {
  @Override
  public void flatMap(Long value, Collector<Tuple2<String, String>> out) {

public static void main(final String[] args) throws Exception {
  final ParameterTool params = ParameterTool.fromArgs(args);
  final String outputPath = params.getRequired("outputPath");
  final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(4);
  env.enableCheckpointing(5000L);
  env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.of(10L, TimeUnit.SECONDS)));
  final StreamingFileSink<Tuple2<Integer, Integer>> sink = StreamingFileSink
    .forRowFormat(new Path(outputPath), (Encoder<Tuple2<Integer, Integer>>) (element, stream) -> {
      PrintStream out = new PrintStream(stream);
      out.println(element.f1);
    })
    .withBucketAssigner(new KeyBucketAssigner())
    .withRollingPolicy(OnCheckpointRollingPolicy.build())
    .build();
  // generate data, shuffle, sink
  env.addSource(new Generator(10, 10, 60))
    .keyBy(0)
    .addSink(sink);
  env.execute("StreamingFileSinkProgram");
}

/**
 * Add a java source to the streaming topology. The source expected to be an java based
 * implementation (.e.g. Kafka connector).
 *
 * @param src A native java source (e.g. PythonFlinkKafkaConsumer09)
 * @return Python data stream
 */
public PythonDataStream add_java_source(SourceFunction<Object> src) {
  return new PythonDataStream<>(env.addSource(src).map(new AdapterMap<>()));
}

public static void main(String[] args) throws Exception {
  final ParameterTool pt = ParameterTool.fromArgs(args);
  final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  setupEnvironment(env, pt);
  final int numStates =
    pt.getInt(NUM_LIST_STATES_PER_OP.key(), NUM_LIST_STATES_PER_OP.defaultValue());
  final int numPartitionsPerState =
    pt.getInt(NUM_PARTITIONS_PER_LIST_STATE.key(), NUM_PARTITIONS_PER_LIST_STATE.defaultValue());
  Preconditions.checkState(env.getCheckpointInterval() > 0L, "Checkpointing must be enabled for this test!");
  env.addSource(new SimpleEndlessSourceWithBloatedState(numStates, numPartitionsPerState)).setParallelism(env.getParallelism())
    .addSink(new DiscardingSink<>()).setParallelism(1);
  env.execute("HeavyDeploymentStressTestProgram");
}

public static JobGraph stoppableJob(final StopJobSignal stopJobSignal) {
  final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.addSource(new InfiniteSourceFunction(stopJobSignal))
    .setParallelism(2)
    .shuffle()
    .addSink(new DiscardingSink<>())
    .setParallelism(2);
  return env.getStreamGraph().getJobGraph();
}

@Test
@SuppressWarnings("unchecked")
public void testFromCollectionParallelism() {
  try {
    TypeInformation<Integer> typeInfo = BasicTypeInfo.INT_TYPE_INFO;
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    DataStreamSource<Integer> dataStream1 = env.fromCollection(new DummySplittableIterator<Integer>(), typeInfo);
    try {
      dataStream1.setParallelism(4);
      fail("should throw an exception");
    }
    catch (IllegalArgumentException e) {
      // expected
    }
    dataStream1.addSink(new DiscardingSink<Integer>());
    DataStreamSource<Integer> dataStream2 = env.fromParallelCollection(new DummySplittableIterator<Integer>(),
        typeInfo).setParallelism(4);
    dataStream2.addSink(new DiscardingSink<Integer>());
    env.getExecutionPlan();
    assertEquals("Parallelism of collection source must be 1.", 1, env.getStreamGraph().getStreamNode(dataStream1.getId()).getParallelism());
    assertEquals("Parallelism of parallel collection source must be 4.",
        4,
        env.getStreamGraph().getStreamNode(dataStream2.getId()).getParallelism());
  }
  catch (Exception e) {
    e.printStackTrace();
    fail(e.getMessage());
  }
}

DataStream<Integer> map = src.map(mapFunction);
map.addSink(new DiscardingSink<Integer>());
assertEquals(mapFunction, getFunctionForDataStream(map));
DataStream<Integer> flatMap = src.flatMap(flatMapFunction);
flatMap.addSink(new DiscardingSink<Integer>());
assertEquals(flatMapFunction, getFunctionForDataStream(flatMap));

env.setParallelism(10);
SingleOutputStreamOperator<Long> map = src.map(new MapFunction<Tuple2<Long, Long>, Long>() {
  @Override
  public Long map(Tuple2<Long, Long> value) throws Exception {
assertEquals(1, env.getStreamGraph().getStreamNode(src.getId()).getParallelism());
assertEquals(10, env.getStreamGraph().getStreamNode(map.getId()).getParallelism());
assertEquals(1, env.getStreamGraph().getStreamNode(windowed.getId()).getParallelism());
assertEquals(1, env.getStreamGraph().getStreamNode(src.getId()).getParallelism());
assertEquals(10, env.getStreamGraph().getStreamNode(map.getId()).getParallelism());
assertEquals(1, env.getStreamGraph().getStreamNode(windowed.getId()).getParallelism());
  src.setParallelism(3);
  fail();
} catch (IllegalArgumentException success) {
parallelSource.addSink(new DiscardingSink<Long>());
assertEquals(7, env.getStreamGraph().getStreamNode(parallelSource.getId()).getParallelism());
parallelSource.setParallelism(3);
assertEquals(3, env.getStreamGraph().getStreamNode(parallelSource.getId()).getParallelism());

.addSource(consumer).setParallelism(sourceParallelism)
.map(new ThrottledMapper<Tuple2<Integer, Integer>>(20)).setParallelism(sourceParallelism);

/**
 * NOTE: This method is for internal use only for defining a TableSource.
 *       Do not use it in Table API programs.
 */
@Override
public DataStream<Row> getDataStream(StreamExecutionEnvironment env) {
  DeserializationSchema<Row> deserializationSchema = getDeserializationSchema();
  // Version-specific Kafka consumer
  FlinkKafkaConsumerBase<Row> kafkaConsumer = getKafkaConsumer(topic, properties, deserializationSchema);
  return env.addSource(kafkaConsumer).name(explainSource());
}

.addSource(latestReadingConsumer).setParallelism(parallelism)
.flatMap(new FlatMapFunction<Tuple2<Integer, Integer>, Object>() {
  @Override
  public void flatMap(Tuple2<Integer, Integer> value, Collector<Object> out) throws Exception {

private static JobGraph createJobGraphWithKeyedAndNonPartitionedOperatorState(
    int parallelism,
    int maxParallelism,
    int fixedParallelism,
    int numberKeys,
    int numberElements,
    boolean terminateAfterEmission,
    int checkpointingInterval) {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.setParallelism(parallelism);
  env.getConfig().setMaxParallelism(maxParallelism);
  env.enableCheckpointing(checkpointingInterval);
  env.setRestartStrategy(RestartStrategies.noRestart());
  DataStream<Integer> input = env.addSource(new SubtaskIndexNonPartitionedStateSource(
      numberKeys,
      numberElements,
      terminateAfterEmission))
      .setParallelism(fixedParallelism)
      .keyBy(new KeySelector<Integer, Integer>() {
        private static final long serialVersionUID = -7952298871120320940L;
        @Override
        public Integer getKey(Integer value) throws Exception {
          return value;
        }
      });
  SubtaskIndexFlatMapper.workCompletedLatch = new CountDownLatch(numberKeys);
  DataStream<Tuple2<Integer, Integer>> result = input.flatMap(new SubtaskIndexFlatMapper(numberElements));
  result.addSink(new CollectionSink<Tuple2<Integer, Integer>>());
  return env.getStreamGraph().getJobGraph();
}

public DataStreamSource(StreamExecutionEnvironment environment,
    TypeInformation<T> outTypeInfo, StreamSource<T, ?> operator,
    boolean isParallel, String sourceName) {
  super(environment, new SourceTransformation<>(sourceName, operator, outTypeInfo, environment.getParallelism()));
  this.isParallel = isParallel;
  if (!isParallel) {
    setParallelism(1);
  }
}

  private static StreamExecutionEnvironment getSimpleJob() {
    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
    env.generateSequence(1, 10000000)
        .addSink(new SinkFunction<Long>() {
          @Override
          public void invoke(Long value) {
          }
        });

    return env;
  }
}

.name("source").uid("source");
.map(new NoOpMapFunction())
.name("source").uid("source");

.assignTimestampsAndWatermarks(new CustomWmEmitter<Long>() {
.assignTimestampsAndWatermarks(new CustomWmEmitter<String>() {

StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
DataStreamSink<Long> sink = env.generateSequence(1, 100).print();
assertTrue(env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getStatePartitioner1() == null);
assertTrue(env.getStreamGraph().getStreamNode(sink.getTransformation().getId()).getInEdges().get(0).getPartitioner() instanceof ForwardPartitioner);
DataStreamSink<Long> sink2 = env.generateSequence(1, 100).keyBy(key1).print();
DataStreamSink<Long> sink3 = env.generateSequence(1, 100).keyBy(key2).print();

@Test
public void testUnionBetweenConsecutiveSplitRejection() {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  DataStreamSource<Integer> src = env.fromElements(0, 0);
  OutputSelector<Integer> outputSelector = new DummyOutputSelector<>();
  src.split(outputSelector).select("dummy").union(src.map(x -> x)).split(outputSelector).addSink(new DiscardingSink<>());
  expectedException.expect(IllegalStateException.class);
  expectedException.expectMessage("Consecutive multiple splits are not supported. Splits are deprecated. Please use side-outputs.");
  env.getStreamGraph();
}

private static void runJob() throws Exception {
  StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
  env.fromElements(1, 2, 3)
    .print();
  env.execute();
}

  env.addSource(new TestSource<Integer>()).print();
  fail();
} catch (Exception ignored) {}
} catch (Exception ignored) {}
env.addSource(new TestSource<Integer>()).returns(Integer.class);
source.map(new TestMap<Long, Long>()).returns(Long.class).print();
source.flatMap(new TestFlatMap<Long, Long>()).returns(new TypeHint<Long>(){}).print();

Javadoc

The DataStreamSource represents the starting point of a DataStream.

Most used methods

Popular in Java

Reading from database using SQL prepared statement
setScale (BigDecimal)
getApplicationContext (Context)
putExtra (Intent)
SecureRandom (java.security)
This class generates cryptographically secure pseudo-random numbers. It is best to invoke SecureRand
Timestamp (java.sql)
A Java representation of the SQL TIMESTAMP type. It provides the capability of representing the SQL
SortedMap (java.util)
A map that has its keys ordered. The sorting is according to either the natural ordering of its keys
Collectors (java.util.stream)
Servlet (javax.servlet)
Defines methods that all servlets must implement. A servlet is a small Java program that runs within
Response (javax.ws.rs.core)
Defines the contract between a returned instance and the runtime when an application needs to provid
Top PhpStorm plugins

How to useDataStreamSource in org.apache.flink.streaming.api.datastream

Best Java code snippets using org.apache.flink.streaming.api.datastream.DataStreamSource (Showing top 20 results out of 621)

How to use
DataStreamSource
in
org.apache.flink.streaming.api.datastream