/** * Reads the given file line-by-line and creates a data stream that contains a string with the * contents of each such line. The file will be read with the UTF-8 character set. * * <p><b>NOTES ON CHECKPOINTING: </b> The source monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards * them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data, * and exits, without waiting for the readers to finish reading. This implies that no more * checkpoint barriers are going to be forwarded after the source exits, thus having no * checkpoints after that point. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path"). * @return The data stream that represents the data read from the given file as text lines */ public DataStreamSource<String> readTextFile(String filePath) { return readTextFile(filePath, "UTF-8"); }
/** * A thin wrapper layer over {@link StreamExecutionEnvironment#readTextFile(java.lang.String)}. * * @param path The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path"). * @return The data stream that represents the data read from the given file as text lines * @throws IOException */ public PythonDataStream read_text_file(String path) throws IOException { return new PythonDataStream<>(env.readTextFile(path).map(new AdapterMap<String>())); }
if (params.has("input")) { text = env.readTextFile(params.get("input")); } else { System.out.println("Executing WordCount example with default input data set.");
if (params.has("input")) { text = env.readTextFile(params.get("input")); } else { System.out.println("Executing WindowWordCount example with default input data set.");
@Test public void testStreamingDistributedCache() throws Exception { String textPath = createTempFile("count.txt", DATA); StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.registerCachedFile(textPath, "cache_test"); env.readTextFile(textPath).flatMap(new WordChecker()); env.execute(); }
if (params.has("input")) { text = env.readTextFile(params.get("input")); } else { System.out.println("Executing WordCount example with default input data set.");
inputStream = env.readTextFile(params.get("input")).map(new FibonacciInputMap()); } else { System.out.println("Executing Iterate example with default input data set.");
DataStream<Tuple4<Integer, Integer, Double, Long>> carData; if (params.has("input")) { carData = env.readTextFile(params.get("input")).map(new ParseCarData()); } else { System.out.println("Executing TopSpeedWindowing example with default input data set.");
/** * Reads the given file line-by-line and creates a data stream that contains a string with the * contents of each such line. The file will be read with the system's default character set. * * <p><b>NOTES ON CHECKPOINTING: </b> The source monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards * them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data, * and exits, without waiting for the readers to finish reading. This implies that no more * checkpoint barriers are going to be forwarded after the source exits, thus having no * checkpoints after that point. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path"). * @return The data stream that represents the data read from the given file as text lines */ public DataStreamSource<String> readTextFile(String filePath) { return readTextFile(filePath, "UTF-8"); }
/** * Reads the given file line-by-line and creates a data stream that contains a string with the * contents of each such line. The file will be read with the system's default character set. * * <p><b>NOTES ON CHECKPOINTING: </b> The source monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards * them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data, * and exits, without waiting for the readers to finish reading. This implies that no more * checkpoint barriers are going to be forwarded after the source exits, thus having no * checkpoints after that point. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path"). * @return The data stream that represents the data read from the given file as text lines */ public DataStreamSource<String> readTextFile(String filePath) { return readTextFile(filePath, "UTF-8"); }
/** * Reads the given file line-by-line and creates a data stream that contains a string with the * contents of each such line. The file will be read with the system's default character set. * * <p><b>NOTES ON CHECKPOINTING: </b> The source monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards * them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data, * and exits, without waiting for the readers to finish reading. This implies that no more * checkpoint barriers are going to be forwarded after the source exits, thus having no * checkpoints after that point. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path"). * @return The data stream that represents the data read from the given file as text lines */ public DataStreamSource<String> readTextFile(String filePath) { return readTextFile(filePath, "UTF-8"); }
/** * Reads the given file line-by-line and creates a data stream that contains a string with the * contents of each such line. The file will be read with the system's default character set. * * <p><b>NOTES ON CHECKPOINTING: </b> The source monitors the path, creates the * {@link org.apache.flink.core.fs.FileInputSplit FileInputSplits} to be processed, forwards * them to the downstream {@link ContinuousFileReaderOperator readers} to read the actual data, * and exits, without waiting for the readers to finish reading. This implies that no more * checkpoint barriers are going to be forwarded after the source exits, thus having no * checkpoints after that point. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path"). * @return The data stream that represents the data read from the given file as text lines */ public DataStreamSource<String> readTextFile(String filePath) { return readTextFile(filePath, "UTF-8"); }
@SuppressWarnings("serial") private static DataStream<Edge<Long, NullValue>> getEdgesDataSet(StreamExecutionEnvironment env) { if (fileOutput) { return env.readTextFile(edgeInputPath) .map(new MapFunction<String, Edge<Long, NullValue>>() { @Override public Edge<Long, NullValue> map(String s) throws Exception { String[] fields = s.split("\\t"); long src = Long.parseLong(fields[0]); long trg = Long.parseLong(fields[1]); return new Edge<>(src, trg, NullValue.getInstance()); } }); } return env.generateSequence(0, 999).flatMap( new FlatMapFunction<Long, Edge<Long, NullValue>>() { @Override public void flatMap(Long key, Collector<Edge<Long, NullValue>> out) throws Exception { out.collect(new Edge<>(key, (key + 2) % 1000, NullValue.getInstance())); out.collect(new Edge<>(key, (key + 4) % 1000, NullValue.getInstance())); } }); }
@SuppressWarnings("serial") private static DataStream<Edge<Long, NullValue>> getEdgesDataSet(StreamExecutionEnvironment env) { if (fileOutput) { return env.readTextFile(edgeInputPath) .map(new MapFunction<String, Edge<Long, NullValue>>() { @Override public Edge<Long, NullValue> map(String s) throws Exception { String[] fields = s.split("\\t"); long src = Long.parseLong(fields[0]); long trg = Long.parseLong(fields[1]); return new Edge<>(src, trg, NullValue.getInstance()); } }); } return env.generateSequence(0, 999).flatMap( new FlatMapFunction<Long, Edge<Long, NullValue>>() { @Override public void flatMap(Long key, Collector<Edge<Long, NullValue>> out) throws Exception { out.collect(new Edge<>(key, (key + 2) % 1000, NullValue.getInstance())); out.collect(new Edge<>(key, (key + 4) % 1000, NullValue.getInstance())); } }); }
@SuppressWarnings("serial") private static DataStream<Tuple2<Long, Long>> getEdgesDataSet(StreamExecutionEnvironment env) { if (fileOutput) { return env.readTextFile(edgeInputPath) .map(new MapFunction<String, Tuple2<Long, Long>>() { @Override public Tuple2<Long, Long> map(String s) { String[] fields = s.split("\\t"); long src = Long.parseLong(fields[0]); long trg = Long.parseLong(fields[1]); return new Tuple2<>(src, trg); } }); } return env.generateSequence(1, 10).flatMap( new FlatMapFunction<Long, Tuple2<Long, Long>>() { @Override public void flatMap(Long key, Collector<Tuple2<Long, Long>> out) throws Exception { for (int i = 1; i < 3; i++) { long target = key + i; out.collect(new Tuple2<>(key, target)); } } }); }
public CentralizedWeightedMatching() throws Exception { StreamExecutionEnvironment env = StreamExecutionEnvironment.createLocalEnvironment(); // Source: http://grouplens.org/datasets/movielens/ @SuppressWarnings("serial") DataStream<Edge<Long, Long>> edges = env .readTextFile("movielens_10k_sorted.txt") .map(new MapFunction<String, Edge<Long, Long>>() { @Override public Edge<Long, Long> map(String s) throws Exception { String[] args = s.split("\t"); long src = Long.parseLong(args[0]); long trg = Long.parseLong(args[1]) + 1000000; long val = Long.parseLong(args[2]) * 10; return new Edge<>(src, trg, val); } }); GraphStream<Long, NullValue, Long> graph = new SimpleEdgeStream<>(edges, env); graph.getEdges() .flatMap(new WeightedMatchingFlatMapper()).setParallelism(1) .print().setParallelism(1); JobExecutionResult res = env.execute("Distributed Merge Tree Sandbox"); long runtime = res.getNetRuntime(); System.out.println("Runtime: " + runtime); }
@SuppressWarnings("serial") private static DataStream<Tuple3<Integer, Integer, EventType>> getGraphStream(StreamExecutionEnvironment env) { if (fileOutput) { return env.readTextFile(edgeInputPath) .map(new MapFunction<String, Tuple3<Integer, Integer, EventType>>() { @Override public Tuple3<Integer, Integer, EventType> map(String s) { String[] fields = s.split("\\s"); int src = Integer.parseInt(fields[0]); int trg = Integer.parseInt(fields[1]); EventType t = fields[2].equals("+") ? EventType.EDGE_ADDITION : EventType.EDGE_DELETION; return new Tuple3<>(src, trg, t); } }); } return env.fromElements( new Tuple3<>(1, 2, EventType.EDGE_ADDITION), new Tuple3<>(2, 3, EventType.EDGE_ADDITION), new Tuple3<>(1, 4, EventType.EDGE_ADDITION), new Tuple3<>(2, 3, EventType.EDGE_DELETION), new Tuple3<>(3, 4, EventType.EDGE_ADDITION), new Tuple3<>(1, 2, EventType.EDGE_DELETION)); } }
public static void main(String[] args) throws Exception { // read parameters ParameterTool params = ParameterTool.fromArgs(args); String input = params.getRequired("input"); // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); env.setParallelism(1); // connect to the data file DataStream<String> carData = env.readTextFile(input); // map to events DataStream<ConnectedCarEvent> events = carData .map((String line) -> ConnectedCarEvent.fromString(line)) .assignTimestampsAndWatermarks(new ConnectedCarAssigner()); // sort events events.keyBy((ConnectedCarEvent event) -> event.carId) .process(new SortFunction()) .print(); env.execute("Sort Connected Car Events"); }
public static void main(String[] args) throws Exception { // read parameters ParameterTool params = ParameterTool.fromArgs(args); String input = params.getRequired("input"); // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); // connect to the data file DataStream<String> carData = env.readTextFile(input); // find segments DataStream<ConnectedCarEvent> events = carData .map((String line) -> ConnectedCarEvent.fromString(line)) .assignTimestampsAndWatermarks(new ConnectedCarAssigner()); events.keyBy("carId") .window(EventTimeSessionWindows.withGap(Time.seconds(15))) .apply(new CreateGapSegment()) .print(); env.execute("Driving Sessions"); }
public static void main(String[] args) throws Exception { // read parameters ParameterTool params = ParameterTool.fromArgs(args); String input = params.getRequired("input"); // set up streaming execution environment StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(); env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime); // connect to the data file DataStream<String> carData = env.readTextFile(input); // map to events DataStream<ConnectedCarEvent> events = carData .map((String line) -> ConnectedCarEvent.fromString(line)) .assignTimestampsAndWatermarks(new ConnectedCarAssigner()); // find segments events.keyBy("carId") .window(GlobalWindows.create()) .trigger(new SegmentingOutOfOrderTrigger()) .evictor(new SegmentingEvictor()) .apply(new CreateStoppedSegment()) .print(); env.execute("Driving Segments"); }