/** * Creates the input splits to be forwarded to the downstream tasks of the * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before * being forwarded and only splits belonging to files in the {@code eligibleFiles} * list will be processed. * @param eligibleFiles The files to process. */ private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime( Map<Path, FileStatus> eligibleFiles) throws IOException { Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>(); if (eligibleFiles.isEmpty()) { return splitsByModTime; } for (FileInputSplit split: format.createInputSplits(readerParallelism)) { FileStatus fileStatus = eligibleFiles.get(split.getPath()); if (fileStatus != null) { Long modTime = fileStatus.getModificationTime(); List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime); if (splitsToForward == null) { splitsToForward = new ArrayList<>(); splitsByModTime.put(modTime, splitsToForward); } splitsToForward.add(new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames())); } } return splitsByModTime; }
@Test public void testIllegalArgument() { try { new TimestampedFileInputSplit(-10, 2, new Path("test"), 0, 100, null); // invalid modification time } catch (Exception e) { if (!(e instanceof IllegalArgumentException)) { Assert.fail(e.getMessage()); } } }
@Test public void testPriorityQ() { TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null); new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 0, 100, null); new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null); new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null); new TimestampedFileInputSplit(11, 1, new Path("test/test3"), 0, 100, null);
@Test public void testSplitEquality() { TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null); TimestampedFileInputSplit richSecondSplit = new TimestampedFileInputSplit(10, 2, new Path("test"), 0, 100, null); Assert.assertEquals(richFirstSplit, richSecondSplit); TimestampedFileInputSplit richModSecondSplit = new TimestampedFileInputSplit(11, 2, new Path("test"), 0, 100, null); Assert.assertNotEquals(richSecondSplit, richModSecondSplit); TimestampedFileInputSplit richThirdSplit = new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null); Assert.assertEquals(richThirdSplit.getModificationTime(), 10); Assert.assertNotEquals(richFirstSplit, richThirdSplit); TimestampedFileInputSplit richThirdSplitCopy = new TimestampedFileInputSplit(10, 2, new Path("test/test1"), 0, 100, null); Assert.assertEquals(richThirdSplitCopy, richThirdSplit); }
@Test public void testSplitComparison() { TimestampedFileInputSplit richFirstSplit = new TimestampedFileInputSplit(0, 3, new Path("test/test1"), 0, 100, null); TimestampedFileInputSplit richSecondSplit = new TimestampedFileInputSplit(10, 2, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit richThirdSplit = new TimestampedFileInputSplit(10, 1, new Path("test/test2"), 0, 100, null); TimestampedFileInputSplit richForthSplit = new TimestampedFileInputSplit(11, 0, new Path("test/test3"), 0, 100, null); TimestampedFileInputSplit richFifthSplit = new TimestampedFileInputSplit(11, 1, new Path("test/test3"), 0, 100, null); // smaller mod time Assert.assertTrue(richFirstSplit.compareTo(richSecondSplit) < 0); // lexicographically on the path Assert.assertTrue(richThirdSplit.compareTo(richFifthSplit) < 0); // same mod time, same file so smaller split number first Assert.assertTrue(richThirdSplit.compareTo(richSecondSplit) < 0); // smaller modification time first Assert.assertTrue(richThirdSplit.compareTo(richForthSplit) < 0); }
private TimestampedFileInputSplit getTimestampedSplit(long modTime, FileInputSplit split) { Preconditions.checkNotNull(split); return new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames()); }
/** * Creates the input splits to be forwarded to the downstream tasks of the * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before * being forwarded and only splits belonging to files in the {@code eligibleFiles} * list will be processed. * @param eligibleFiles The files to process. */ private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime( Map<Path, FileStatus> eligibleFiles) throws IOException { Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>(); if (eligibleFiles.isEmpty()) { return splitsByModTime; } for (FileInputSplit split: format.createInputSplits(readerParallelism)) { FileStatus fileStatus = eligibleFiles.get(split.getPath()); if (fileStatus != null) { Long modTime = fileStatus.getModificationTime(); List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime); if (splitsToForward == null) { splitsToForward = new ArrayList<>(); splitsByModTime.put(modTime, splitsToForward); } splitsToForward.add(new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames())); } } return splitsByModTime; }
/** * Creates the input splits to be forwarded to the downstream tasks of the * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before * being forwarded and only splits belonging to files in the {@code eligibleFiles} * list will be processed. * @param eligibleFiles The files to process. */ private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime( Map<Path, FileStatus> eligibleFiles) throws IOException { Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>(); if (eligibleFiles.isEmpty()) { return splitsByModTime; } for (FileInputSplit split: format.createInputSplits(readerParallelism)) { FileStatus fileStatus = eligibleFiles.get(split.getPath()); if (fileStatus != null) { Long modTime = fileStatus.getModificationTime(); List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime); if (splitsToForward == null) { splitsToForward = new ArrayList<>(); splitsByModTime.put(modTime, splitsToForward); } splitsToForward.add(new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames())); } } return splitsByModTime; }
/** * Creates the input splits to be forwarded to the downstream tasks of the * {@link ContinuousFileReaderOperator}. Splits are sorted <b>by modification time</b> before * being forwarded and only splits belonging to files in the {@code eligibleFiles} * list will be processed. * @param eligibleFiles The files to process. */ private Map<Long, List<TimestampedFileInputSplit>> getInputSplitsSortedByModTime( Map<Path, FileStatus> eligibleFiles) throws IOException { Map<Long, List<TimestampedFileInputSplit>> splitsByModTime = new TreeMap<>(); if (eligibleFiles.isEmpty()) { return splitsByModTime; } for (FileInputSplit split: format.createInputSplits(readerParallelism)) { FileStatus fileStatus = eligibleFiles.get(split.getPath()); if (fileStatus != null) { Long modTime = fileStatus.getModificationTime(); List<TimestampedFileInputSplit> splitsToForward = splitsByModTime.get(modTime); if (splitsToForward == null) { splitsToForward = new ArrayList<>(); splitsByModTime.put(modTime, splitsToForward); } splitsToForward.add(new TimestampedFileInputSplit( modTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames())); } } return splitsByModTime; }
private TimestampedFileInputSplit createTimestampedFileSplit(FileInputSplit split, long modificationTime, Serializable state) { TimestampedFileInputSplit timestampedSplit = new TimestampedFileInputSplit( modificationTime, split.getSplitNumber(), split.getPath(), split.getStart(), split.getLength(), split.getHostnames()); if (state != null) { timestampedSplit.setSplitState(state); } return timestampedSplit; } }