private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat, TypeInformation<OUT> typeInfo, String sourceName, FileProcessingMode monitoringMode, long interval) { Preconditions.checkNotNull(inputFormat, "Unspecified file input format."); Preconditions.checkNotNull(typeInfo, "Unspecified output type information."); Preconditions.checkNotNull(sourceName, "Unspecified name for the source."); Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode."); Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) || interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL, "The path monitoring interval cannot be less than " + ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms."); ContinuousFileMonitoringFunction<OUT> monitoringFunction = new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval); ContinuousFileReaderOperator<OUT> reader = new ContinuousFileReaderOperator<>(inputFormat); SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName) .transform("Split Reader: " + sourceName, typeInfo, reader); return new DataStreamSource<>(source); }
private void monitorDirAndForwardSplits(FileSystem fs, SourceContext<TimestampedFileInputSplit> context) throws IOException { assert (Thread.holdsLock(checkpointLock)); Map<Path, FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(path)); Map<Long, List<TimestampedFileInputSplit>> splitsSortedByModTime = getInputSplitsSortedByModTime(eligibleFiles); for (Map.Entry<Long, List<TimestampedFileInputSplit>> splits: splitsSortedByModTime.entrySet()) { long modificationTime = splits.getKey(); for (TimestampedFileInputSplit split: splits.getValue()) { LOG.info("Forwarding split: " + split); context.collect(split); } // update the global modification time globalModificationTime = Math.max(globalModificationTime, modificationTime); } }
@Override public void open(Configuration parameters) throws Exception { super.open(parameters); format.configure(parameters); if (LOG.isDebugEnabled()) { LOG.debug("Opened {} (taskIdx= {}) for path: {}", getClass().getSimpleName(), getRuntimeContext().getIndexOfThisSubtask(), path); } }
Path filePath = status.getPath(); long modificationTime = status.getModificationTime(); if (!shouldIgnore(filePath, modificationTime)) { files.put(filePath, status); files.putAll(listEligibleFiles(fileSystem, status.getPath()));
while (isRunning) { synchronized (checkpointLock) { monitorDirAndForwardSplits(fileSystem, context); monitorDirAndForwardSplits(fileSystem, context); globalModificationTime = Long.MAX_VALUE;
Path filePath = status.getPath(); long modificationTime = status.getModificationTime(); if (!shouldIgnore(filePath, modificationTime)) { files.put(filePath, status); files.putAll(listEligibleFiles(fileSystem, status.getPath()));
while (isRunning) { synchronized (checkpointLock) { monitorDirAndForwardSplits(fileSystem, context); monitorDirAndForwardSplits(fileSystem, context); globalModificationTime = Long.MAX_VALUE;
private void monitorDirAndForwardSplits(FileSystem fs, SourceContext<TimestampedFileInputSplit> context) throws IOException { assert (Thread.holdsLock(checkpointLock)); Map<Path, FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(path)); Map<Long, List<TimestampedFileInputSplit>> splitsSortedByModTime = getInputSplitsSortedByModTime(eligibleFiles); for (Map.Entry<Long, List<TimestampedFileInputSplit>> splits: splitsSortedByModTime.entrySet()) { long modificationTime = splits.getKey(); for (TimestampedFileInputSplit split: splits.getValue()) { LOG.info("Forwarding split: " + split); context.collect(split); } // update the global modification time globalModificationTime = Math.max(globalModificationTime, modificationTime); } }
Path filePath = status.getPath(); long modificationTime = status.getModificationTime(); if (!shouldIgnore(filePath, modificationTime)) { files.put(filePath, status); files.putAll(listEligibleFiles(fileSystem, status.getPath()));
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat, TypeInformation<OUT> typeInfo, String sourceName, FileProcessingMode monitoringMode, long interval) { Preconditions.checkNotNull(inputFormat, "Unspecified file input format."); Preconditions.checkNotNull(typeInfo, "Unspecified output type information."); Preconditions.checkNotNull(sourceName, "Unspecified name for the source."); Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode."); Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) || interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL, "The path monitoring interval cannot be less than " + ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms."); ContinuousFileMonitoringFunction<OUT> monitoringFunction = new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval); ContinuousFileReaderOperator<OUT> reader = new ContinuousFileReaderOperator<>(inputFormat); SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName) .transform("Split Reader: " + sourceName, typeInfo, reader); return new DataStreamSource<>(source); }
@Override public void restoreState(Long state) throws Exception { this.globalModificationTime = state; LOG.info("{} (taskIdx={}) restored global modification time from an older Flink version: {}", getClass().getSimpleName(), getRuntimeContext().getIndexOfThisSubtask(), globalModificationTime); } }
while (isRunning) { synchronized (checkpointLock) { monitorDirAndForwardSplits(fileSystem, context); monitorDirAndForwardSplits(fileSystem, context); globalModificationTime = Long.MAX_VALUE;
private void monitorDirAndForwardSplits(FileSystem fs, SourceContext<TimestampedFileInputSplit> context) throws IOException { assert (Thread.holdsLock(checkpointLock)); Map<Path, FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(path)); Map<Long, List<TimestampedFileInputSplit>> splitsSortedByModTime = getInputSplitsSortedByModTime(eligibleFiles); for (Map.Entry<Long, List<TimestampedFileInputSplit>> splits: splitsSortedByModTime.entrySet()) { long modificationTime = splits.getKey(); for (TimestampedFileInputSplit split: splits.getValue()) { LOG.info("Forwarding split: " + split); context.collect(split); } // update the global modification time globalModificationTime = Math.max(globalModificationTime, modificationTime); } }
Path filePath = status.getPath(); long modificationTime = status.getModificationTime(); if (!shouldIgnore(filePath, modificationTime)) { files.put(filePath, status); files.putAll(listEligibleFiles(fileSystem, status.getPath()));
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat, TypeInformation<OUT> typeInfo, String sourceName, FileProcessingMode monitoringMode, long interval) { Preconditions.checkNotNull(inputFormat, "Unspecified file input format."); Preconditions.checkNotNull(typeInfo, "Unspecified output type information."); Preconditions.checkNotNull(sourceName, "Unspecified name for the source."); Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode."); Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) || interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL, "The path monitoring interval cannot be less than " + ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms."); ContinuousFileMonitoringFunction<OUT> monitoringFunction = new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval); ContinuousFileReaderOperator<OUT> reader = new ContinuousFileReaderOperator<>(inputFormat); SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName) .transform("Split Reader: " + sourceName, typeInfo, reader); return new DataStreamSource<>(source); }
@Override public void open(Configuration parameters) throws Exception { super.open(parameters); format.configure(parameters); if (LOG.isDebugEnabled()) { LOG.debug("Opened {} (taskIdx= {}) for path: {}", getClass().getSimpleName(), getRuntimeContext().getIndexOfThisSubtask(), path); } }
while (isRunning) { synchronized (checkpointLock) { monitorDirAndForwardSplits(fileSystem, context); monitorDirAndForwardSplits(fileSystem, context); globalModificationTime = Long.MAX_VALUE;
private void monitorDirAndForwardSplits(FileSystem fs, SourceContext<TimestampedFileInputSplit> context) throws IOException { assert (Thread.holdsLock(checkpointLock)); Map<Path, FileStatus> eligibleFiles = listEligibleFiles(fs, new Path(path)); Map<Long, List<TimestampedFileInputSplit>> splitsSortedByModTime = getInputSplitsSortedByModTime(eligibleFiles); for (Map.Entry<Long, List<TimestampedFileInputSplit>> splits: splitsSortedByModTime.entrySet()) { long modificationTime = splits.getKey(); for (TimestampedFileInputSplit split: splits.getValue()) { LOG.info("Forwarding split: " + split); context.collect(split); } // update the global modification time globalModificationTime = Math.max(globalModificationTime, modificationTime); } }
private <OUT> DataStreamSource<OUT> createFileInput(FileInputFormat<OUT> inputFormat, TypeInformation<OUT> typeInfo, String sourceName, FileProcessingMode monitoringMode, long interval) { Preconditions.checkNotNull(inputFormat, "Unspecified file input format."); Preconditions.checkNotNull(typeInfo, "Unspecified output type information."); Preconditions.checkNotNull(sourceName, "Unspecified name for the source."); Preconditions.checkNotNull(monitoringMode, "Unspecified monitoring mode."); Preconditions.checkArgument(monitoringMode.equals(FileProcessingMode.PROCESS_ONCE) || interval >= ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL, "The path monitoring interval cannot be less than " + ContinuousFileMonitoringFunction.MIN_MONITORING_INTERVAL + " ms."); ContinuousFileMonitoringFunction<OUT> monitoringFunction = new ContinuousFileMonitoringFunction<>(inputFormat, monitoringMode, getParallelism(), interval); ContinuousFileReaderOperator<OUT> reader = new ContinuousFileReaderOperator<>(inputFormat); SingleOutputStreamOperator<OUT> source = addSource(monitoringFunction, sourceName) .transform("Split Reader: " + sourceName, typeInfo, reader); return new DataStreamSource<>(source); }
@Override public void open(Configuration parameters) throws Exception { super.open(parameters); format.configure(parameters); if (LOG.isDebugEnabled()) { LOG.debug("Opened {} (taskIdx= {}) for path: {}", getClass().getSimpleName(), getRuntimeContext().getIndexOfThisSubtask(), path); } }