Refine search
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final Path inputFile = Paths.get(params.getRequired("inputFile")); final Path inputDir = Paths.get(params.getRequired("inputDir")); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(1); env.registerCachedFile(inputFile.toString(), "test_data", false); env.registerCachedFile(inputDir.toString(), "test_dir", false); final Path containedFile; try (Stream<Path> files = Files.list(inputDir)) { containedFile = files.findAny().orElseThrow(() -> new RuntimeException("Input directory must not be empty.")); } env.fromElements(1) .map(new TestMapFunction( inputFile.toAbsolutePath().toString(), Files.size(inputFile), inputDir.toAbsolutePath().toString(), containedFile.getFileName().toString())) .writeAsText(params.getRequired("output"), FileSystem.WriteMode.OVERWRITE); env.execute("Distributed Cache Via Blob Test Program"); }
private static void runJob() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.fromElements(1, 2, 3) .print(); env.execute(); }
public static void main(String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); double errorRate = params.getDouble("error-rate", 0.0); int sleep = params.getInt("sleep", 1); String kafkaTopic = params.get("kafka-topic"); String brokers = params.get("brokers", "localhost:9092"); System.out.printf("Generating events to Kafka with standalone source with error rate %f and sleep delay %s millis\n", errorRate, sleep); System.out.println(); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env .addSource(new EventsGeneratorSource(errorRate, sleep)) .addSink(new FlinkKafkaProducer010<>(brokers, kafkaTopic, new EventDeSerializer())); // trigger program execution env.execute("State machine example Kafka events generator job"); }
@Override public void go() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.addSource(new SourceFunction<String>() { @Override public void run(SourceContext<String> ctx) throws Exception { sync.block(); } @Override public void cancel() { sync.releaseBlocker(); } }).addSink(new PrintSinkFunction()); env.execute(); }
public static void main(String[] args) throws Exception { // parse the parameters final ParameterTool params = ParameterTool.fromArgs(args); final long windowSize = params.getLong("windowSize", 2000); final long rate = params.getLong("rate", 3L); System.out.println("Using windowSize=" + windowSize + ", data rate=" + rate); System.out.println("To customize example, use: WindowJoin [--windowSize <window-size-in-millis>] [--rate <elements-per-second>]"); // obtain execution environment, run this example in "ingestion time" StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.IngestionTime); // make parameters available in the web interface env.getConfig().setGlobalJobParameters(params); // create the data sources for both grades and salaries DataStream<Tuple2<String, Integer>> grades = GradeSource.getSource(env, rate); DataStream<Tuple2<String, Integer>> salaries = SalarySource.getSource(env, rate); // run the actual window join program // for testability, this functionality is in a separate method. DataStream<Tuple3<String, Integer, Integer>> joinedStream = runWindowJoin(grades, salaries, windowSize); // print the results with a single thread, rather than in parallel joinedStream.print().setParallelism(1); // execute program env.execute("Windowed Join Example"); }
/** * Tests whether the Elasticsearch sink fails when there is no cluster to connect to. */ public void runInvalidElasticsearchClusterTest() throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStreamSource<Tuple2<Integer, String>> source = env.addSource(new SourceSinkDataTestKit.TestDataSourceFunction()); Map<String, String> userConfig = new HashMap<>(); userConfig.put(ElasticsearchSinkBase.CONFIG_KEY_BULK_FLUSH_MAX_ACTIONS, "1"); userConfig.put("cluster.name", "invalid-cluster-name"); source.addSink(createElasticsearchSinkForNode( 1, "invalid-cluster-name", new SourceSinkDataTestKit.TestElasticsearchSinkFunction("test"), "123.123.123.123")); // incorrect ip address try { env.execute("Elasticsearch Sink Test"); } catch (JobExecutionException expectedException) { // test passes return; } fail(); }
@Test(expected = UnsupportedOperationException.class) public void testForwardFailsLowToHighParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> src = env.fromElements(1, 2, 3); // this doesn't work because it goes from 1 to 3 src.forward().map(new NoOpIntMap()); env.execute(); }
public static void main(String[] args) throws Exception { // set up execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); StreamTableEnvironment tEnv = TableEnvironment.getTableEnvironment(env); DataStream<Order> orderA = env.fromCollection(Arrays.asList( new Order(1L, "beer", 3), new Order(1L, "diaper", 4), new Order(3L, "rubber", 2))); DataStream<Order> orderB = env.fromCollection(Arrays.asList( new Order(2L, "pen", 3), new Order(2L, "rubber", 3), new Order(4L, "beer", 1))); // convert DataStream to Table Table tableA = tEnv.fromDataStream(orderA, "user, product, amount"); // register DataStream as Table tEnv.registerDataStream("OrderB", orderB, "user, product, amount"); // union the two tables Table result = tEnv.sqlQuery("SELECT * FROM " + tableA + " WHERE amount > 2 UNION ALL " + "SELECT * FROM OrderB WHERE amount < 2"); tEnv.toAppendStream(result, Order.class).print(); env.execute(); }
public static void main(String[] args) throws Exception { final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); DataStream<Tuple2<Long, Long>> stream = env.addSource(new DataSource()); stream .keyBy(0) .timeWindow(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS)) .reduce(new SummingReducer()) // alternative: use a apply function which does not pre-aggregate // .keyBy(new FirstFieldKeyExtractor<Tuple2<Long, Long>, Long>()) // .window(Time.of(2500, MILLISECONDS), Time.of(500, MILLISECONDS)) // .apply(new SummingWindowFunction()) .addSink(new SinkFunction<Tuple2<Long, Long>>() { @Override public void invoke(Tuple2<Long, Long> value) { } }); env.execute(); }
@Test public void testOperatorChainWithObjectReuseAndNoOutputOperators() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.getConfig().enableObjectReuse(); DataStream<Integer> input = env.fromElements(1, 2, 3); input.flatMap(new FlatMapFunction<Integer, Integer>() { @Override public void flatMap(Integer value, Collector<Integer> out) throws Exception { out.collect(value << 1); } }); env.execute(); } }
public static void main(String[] args) throws Exception { final ParameterTool pt = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); setupEnvironment(env, pt); KeyedStream<Event, Integer> source = env.addSource(createEventSource(pt)) .name("EventSource") .uid("EventSource") .assignTimestampsAndWatermarks(createTimestampExtractor(pt)) .keyBy(Event::getKey); List<TypeSerializer<ComplexPayload>> stateSer = Collections.singletonList(new KryoSerializer<>(ComplexPayload.class, env.getConfig())); KeyedStream<Event, Integer> afterStatefulOperations = isOriginalJobVariant(pt) ? applyOriginalStatefulOperations(source, stateSer, Collections.emptyList()) : applyUpgradedStatefulOperations(source, stateSer, Collections.emptyList()); afterStatefulOperations .flatMap(createSemanticsCheckMapper(pt)) .name("SemanticsCheckMapper") .addSink(new PrintSinkFunction<>()); env.execute("General purpose test job"); }
@Test public void testFilter() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env); StreamITCase.clear(); DataStream<Tuple5<Integer, Long, Integer, String, Long>> ds = JavaStreamTestData.get5TupleDataStream(env); tableEnv.registerDataStream("MyTable", ds, "a, b, c, d, e"); String sqlQuery = "SELECT a, b, e FROM MyTable WHERE c < 4"; Table result = tableEnv.sqlQuery(sqlQuery); DataStream<Row> resultSet = tableEnv.toAppendStream(result, Row.class); resultSet.addSink(new StreamITCase.StringSink<Row>()); env.execute(); List<String> expected = new ArrayList<>(); expected.add("1,1,1"); expected.add("2,2,2"); expected.add("2,3,1"); expected.add("3,4,2"); StreamITCase.compareWithList(expected); }
@Test public void testStreamingDistributedCache() throws Exception { String textPath = createTempFile("count.txt", DATA); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.registerCachedFile(textPath, "cache_test"); env.readTextFile(textPath).flatMap(new WordChecker()); env.execute(); }
@Test(expected = IllegalStateException.class) public void testExecutionWithEmptyIteration() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<Integer> source = env.fromElements(1, 10).map(noOpIntMap); IterativeStream<Integer> iter1 = source.iterate(); iter1.map(noOpIntMap).print(); env.execute(); }
public static void main(final String[] args) throws Exception { final ParameterTool params = ParameterTool.fromArgs(args); final String outputPath = params.getRequired("outputPath"); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setParallelism(4); env.enableCheckpointing(5000L); env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, Time.of(10L, TimeUnit.SECONDS))); final StreamingFileSink<Tuple2<Integer, Integer>> sink = StreamingFileSink .forRowFormat(new Path(outputPath), (Encoder<Tuple2<Integer, Integer>>) (element, stream) -> { PrintStream out = new PrintStream(stream); out.println(element.f1); }) .withBucketAssigner(new KeyBucketAssigner()) .withRollingPolicy(OnCheckpointRollingPolicy.build()) .build(); // generate data, shuffle, sink env.addSource(new Generator(10, 10, 60)) .keyBy(0) .addSink(sink); env.execute("StreamingFileSinkProgram"); }
@Test public void testSelect() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env); StreamITCase.clear(); DataStream<Tuple3<Integer, Long, String>> ds = JavaStreamTestData.getSmall3TupleDataSet(env); Table in = tableEnv.fromDataStream(ds, "a,b,c"); tableEnv.registerTable("MyTable", in); String sqlQuery = "SELECT * FROM MyTable"; Table result = tableEnv.sqlQuery(sqlQuery); DataStream<Row> resultSet = tableEnv.toAppendStream(result, Row.class); resultSet.addSink(new StreamITCase.StringSink<Row>()); env.execute(); List<String> expected = new ArrayList<>(); expected.add("1,1,Hi"); expected.add("2,2,Hello"); expected.add("3,2,Hello world"); StreamITCase.compareWithList(expected); }
@Test public void testUnion() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); StreamTableEnvironment tableEnv = TableEnvironment.getTableEnvironment(env); StreamITCase.clear(); env.execute();
@Test public void testProgram() throws Exception { String resultPath = getTempDirPath("result"); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); DataStream<String> text = env.fromElements(WordCountData.TEXT); DataStream<Tuple2<String, Integer>> counts = text .flatMap(new Tokenizer()) .keyBy(0).sum(1); counts.writeAsCsv(resultPath); env.execute("WriteAsCsvTest"); //Strip the parentheses from the expected text like output compareResultsByLinesInMemory(WordCountData.STREAMING_COUNTS_AS_TUPLES .replaceAll("[\\\\(\\\\)]", ""), resultPath); }
public static void main(String[] args) throws Exception { final ParameterTool pt = ParameterTool.fromArgs(args); final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); setupEnvironment(env, pt); final int numStates = pt.getInt(NUM_LIST_STATES_PER_OP.key(), NUM_LIST_STATES_PER_OP.defaultValue()); final int numPartitionsPerState = pt.getInt(NUM_PARTITIONS_PER_LIST_STATE.key(), NUM_PARTITIONS_PER_LIST_STATE.defaultValue()); Preconditions.checkState(env.getCheckpointInterval() > 0L, "Checkpointing must be enabled for this test!"); env.addSource(new SimpleEndlessSourceWithBloatedState(numStates, numPartitionsPerState)).setParallelism(env.getParallelism()) .addSink(new DiscardingSink<>()).setParallelism(1); env.execute("HeavyDeploymentStressTestProgram"); }
@Test(expected = UnsupportedOperationException.class) public void testForwardFailsHightToLowParallelism() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); // this does a rebalance that works DataStream<Integer> src = env.fromElements(1, 2, 3).map(new NoOpIntMap()); // this doesn't work because it goes from 3 to 1 src.forward().map(new NoOpIntMap()).setParallelism(1); env.execute(); }