/** * Creates a data stream that contains the contents of file created while system watches the given path. The file * will be read with the system's default character set. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path/") * @param intervalMillis * The interval of file watching in milliseconds * @param watchType * The watch type of file stream. When watchType is {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#ONLY_NEW_FILES}, the system processes * only * new files. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#REPROCESS_WITH_APPENDED} means that the system re-processes all contents of * appended file. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#PROCESS_ONLY_APPENDED} means that the system processes only appended * contents * of files. * @return The DataStream containing the given directory. * * @deprecated Use {@link #readFile(FileInputFormat, String, FileProcessingMode, long)} instead. */ @Deprecated @SuppressWarnings("deprecation") public DataStream<String> readFileStream(String filePath, long intervalMillis, FileMonitoringFunction.WatchType watchType) { DataStream<Tuple3<String, Long, Long>> source = addSource(new FileMonitoringFunction( filePath, intervalMillis, watchType), "Read File Stream source"); return source.flatMap(new FileReadFunction()); }
private List<String> listNewFiles(FileSystem fileSystem) throws IOException { List<String> files = new ArrayList<String>(); FileStatus[] statuses = fileSystem.listStatus(new Path(path)); if (statuses == null) { LOG.warn("Path does not exist: {}", path); } else { for (FileStatus status : statuses) { Path filePath = status.getPath(); String fileName = filePath.getName(); long modificationTime = status.getModificationTime(); if (!isFiltered(fileName, modificationTime)) { files.add(filePath.toString()); modificationTimes.put(fileName, modificationTime); } } } return files; }
@Override public void run(SourceContext<Tuple3<String, Long, Long>> ctx) throws Exception { FileSystem fileSystem = FileSystem.get(new URI(path)); while (isRunning) { List<String> files = listNewFiles(fileSystem); for (String filePath : files) { if (watchType == WatchType.ONLY_NEW_FILES || watchType == WatchType.REPROCESS_WITH_APPENDED) { ctx.collect(new Tuple3<String, Long, Long>(filePath, 0L, -1L)); offsetOfFiles.put(filePath, -1L); } else if (watchType == WatchType.PROCESS_ONLY_APPENDED) { long offset = 0; long fileSize = fileSystem.getFileStatus(new Path(filePath)).getLen(); if (offsetOfFiles.containsKey(filePath)) { offset = offsetOfFiles.get(filePath); } ctx.collect(new Tuple3<String, Long, Long>(filePath, offset, fileSize)); offsetOfFiles.put(filePath, fileSize); LOG.info("File processed: {}, {}, {}", filePath, offset, fileSize); } } Thread.sleep(interval); } }
@Test public void testForEmptyLocation() throws Exception { final FileMonitoringFunction fileMonitoringFunction = new FileMonitoringFunction("?non-existing-path", 1L, FileMonitoringFunction.WatchType.ONLY_NEW_FILES); fileMonitoringFunction.run( new SourceFunction.SourceContext<Tuple3<String, Long, Long>>() {
@Override public void run() { try { Thread.sleep(1000L); } catch (InterruptedException e) { e.printStackTrace(); } fileMonitoringFunction.cancel(); } }.start();
/** * Creates a data stream that contains the contents of file created while system watches the given path. The file * will be read with the system's default character set. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path/") * @param intervalMillis * The interval of file watching in milliseconds * @param watchType * The watch type of file stream. When watchType is {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#ONLY_NEW_FILES}, the system processes * only * new files. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#REPROCESS_WITH_APPENDED} means that the system re-processes all contents of * appended file. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#PROCESS_ONLY_APPENDED} means that the system processes only appended * contents * of files. * @return The DataStream containing the given directory. * * @deprecated Use {@link #readFile(FileInputFormat, String, FileProcessingMode, long)} instead. */ @Deprecated @SuppressWarnings("deprecation") public DataStream<String> readFileStream(String filePath, long intervalMillis, FileMonitoringFunction.WatchType watchType) { DataStream<Tuple3<String, Long, Long>> source = addSource(new FileMonitoringFunction( filePath, intervalMillis, watchType), "Read File Stream source"); return source.flatMap(new FileReadFunction()); }
private List<String> listNewFiles(FileSystem fileSystem) throws IOException { List<String> files = new ArrayList<String>(); FileStatus[] statuses = fileSystem.listStatus(new Path(path)); if (statuses == null) { LOG.warn("Path does not exist: {}", path); } else { for (FileStatus status : statuses) { Path filePath = status.getPath(); String fileName = filePath.getName(); long modificationTime = status.getModificationTime(); if (!isFiltered(fileName, modificationTime)) { files.add(filePath.toString()); modificationTimes.put(fileName, modificationTime); } } } return files; }
@Override public void run(SourceContext<Tuple3<String, Long, Long>> ctx) throws Exception { FileSystem fileSystem = FileSystem.get(new URI(path)); while (isRunning) { List<String> files = listNewFiles(fileSystem); for (String filePath : files) { if (watchType == WatchType.ONLY_NEW_FILES || watchType == WatchType.REPROCESS_WITH_APPENDED) { ctx.collect(new Tuple3<String, Long, Long>(filePath, 0L, -1L)); offsetOfFiles.put(filePath, -1L); } else if (watchType == WatchType.PROCESS_ONLY_APPENDED) { long offset = 0; long fileSize = fileSystem.getFileStatus(new Path(filePath)).getLen(); if (offsetOfFiles.containsKey(filePath)) { offset = offsetOfFiles.get(filePath); } ctx.collect(new Tuple3<String, Long, Long>(filePath, offset, fileSize)); offsetOfFiles.put(filePath, fileSize); LOG.info("File processed: {}, {}, {}", filePath, offset, fileSize); } } Thread.sleep(interval); } }
/** * Creates a data stream that contains the contents of file created while system watches the given path. The file * will be read with the system's default character set. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path/") * @param intervalMillis * The interval of file watching in milliseconds * @param watchType * The watch type of file stream. When watchType is {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#ONLY_NEW_FILES}, the system processes * only * new files. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#REPROCESS_WITH_APPENDED} means that the system re-processes all contents of * appended file. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#PROCESS_ONLY_APPENDED} means that the system processes only appended * contents * of files. * @return The DataStream containing the given directory. * * @deprecated Use {@link #readFile(FileInputFormat, String, FileProcessingMode, long)} instead. */ @Deprecated @SuppressWarnings("deprecation") public DataStream<String> readFileStream(String filePath, long intervalMillis, FileMonitoringFunction.WatchType watchType) { DataStream<Tuple3<String, Long, Long>> source = addSource(new FileMonitoringFunction( filePath, intervalMillis, watchType), "Read File Stream source"); return source.flatMap(new FileReadFunction()); }
private List<String> listNewFiles(FileSystem fileSystem) throws IOException { List<String> files = new ArrayList<String>(); FileStatus[] statuses = fileSystem.listStatus(new Path(path)); if (statuses == null) { LOG.warn("Path does not exist: {}", path); } else { for (FileStatus status : statuses) { Path filePath = status.getPath(); String fileName = filePath.getName(); long modificationTime = status.getModificationTime(); if (!isFiltered(fileName, modificationTime)) { files.add(filePath.toString()); modificationTimes.put(fileName, modificationTime); } } } return files; }
@Override public void run(SourceContext<Tuple3<String, Long, Long>> ctx) throws Exception { FileSystem fileSystem = FileSystem.get(new URI(path)); while (isRunning) { List<String> files = listNewFiles(fileSystem); for (String filePath : files) { if (watchType == WatchType.ONLY_NEW_FILES || watchType == WatchType.REPROCESS_WITH_APPENDED) { ctx.collect(new Tuple3<String, Long, Long>(filePath, 0L, -1L)); offsetOfFiles.put(filePath, -1L); } else if (watchType == WatchType.PROCESS_ONLY_APPENDED) { long offset = 0; long fileSize = fileSystem.getFileStatus(new Path(filePath)).getLen(); if (offsetOfFiles.containsKey(filePath)) { offset = offsetOfFiles.get(filePath); } ctx.collect(new Tuple3<String, Long, Long>(filePath, offset, fileSize)); offsetOfFiles.put(filePath, fileSize); LOG.info("File processed: {}, {}, {}", filePath, offset, fileSize); } } Thread.sleep(interval); } }
/** * Creates a data stream that contains the contents of file created while system watches the given path. The file * will be read with the system's default character set. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path/") * @param intervalMillis * The interval of file watching in milliseconds * @param watchType * The watch type of file stream. When watchType is {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#ONLY_NEW_FILES}, the system processes * only * new files. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#REPROCESS_WITH_APPENDED} means that the system re-processes all contents of * appended file. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#PROCESS_ONLY_APPENDED} means that the system processes only appended * contents * of files. * @return The DataStream containing the given directory. * * @deprecated Use {@link #readFile(FileInputFormat, String, FileProcessingMode, long)} instead. */ @Deprecated @SuppressWarnings("deprecation") public DataStream<String> readFileStream(String filePath, long intervalMillis, FileMonitoringFunction.WatchType watchType) { DataStream<Tuple3<String, Long, Long>> source = addSource(new FileMonitoringFunction( filePath, intervalMillis, watchType), "Read File Stream source"); return source.flatMap(new FileReadFunction()); }
private List<String> listNewFiles(FileSystem fileSystem) throws IOException { List<String> files = new ArrayList<String>(); FileStatus[] statuses = fileSystem.listStatus(new Path(path)); if (statuses == null) { LOG.warn("Path does not exist: {}", path); } else { for (FileStatus status : statuses) { Path filePath = status.getPath(); String fileName = filePath.getName(); long modificationTime = status.getModificationTime(); if (!isFiltered(fileName, modificationTime)) { files.add(filePath.toString()); modificationTimes.put(fileName, modificationTime); } } } return files; }
@Override public void run(SourceContext<Tuple3<String, Long, Long>> ctx) throws Exception { FileSystem fileSystem = FileSystem.get(new URI(path)); while (isRunning) { List<String> files = listNewFiles(fileSystem); for (String filePath : files) { if (watchType == WatchType.ONLY_NEW_FILES || watchType == WatchType.REPROCESS_WITH_APPENDED) { ctx.collect(new Tuple3<String, Long, Long>(filePath, 0L, -1L)); offsetOfFiles.put(filePath, -1L); } else if (watchType == WatchType.PROCESS_ONLY_APPENDED) { long offset = 0; long fileSize = fileSystem.getFileStatus(new Path(filePath)).getLen(); if (offsetOfFiles.containsKey(filePath)) { offset = offsetOfFiles.get(filePath); } ctx.collect(new Tuple3<String, Long, Long>(filePath, offset, fileSize)); offsetOfFiles.put(filePath, fileSize); LOG.info("File processed: {}, {}, {}", filePath, offset, fileSize); } } Thread.sleep(interval); } }
/** * Creates a data stream that contains the contents of file created while system watches the given path. The file * will be read with the system's default character set. * * @param filePath * The path of the file, as a URI (e.g., "file:///some/local/file" or "hdfs://host:port/file/path/") * @param intervalMillis * The interval of file watching in milliseconds * @param watchType * The watch type of file stream. When watchType is {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#ONLY_NEW_FILES}, the system processes * only * new files. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#REPROCESS_WITH_APPENDED} means that the system re-processes all contents of * appended file. {@link org.apache.flink.streaming.api.functions.source.FileMonitoringFunction.WatchType#PROCESS_ONLY_APPENDED} means that the system processes only appended * contents * of files. * @return The DataStream containing the given directory. * * @deprecated Use {@link #readFile(FileInputFormat, String, FileProcessingMode, long)} instead. */ @Deprecated @SuppressWarnings("deprecation") public DataStream<String> readFileStream(String filePath, long intervalMillis, FileMonitoringFunction.WatchType watchType) { DataStream<Tuple3<String, Long, Long>> source = addSource(new FileMonitoringFunction( filePath, intervalMillis, watchType), "Read File Stream source"); return source.flatMap(new FileReadFunction()); }