/** * Gets the latest slice - this can contain either * <p> * - just the log files without data file - (or) data file with 0 or more log files */ public Optional<FileSlice> getLatestFileSlice() { // there should always be one return getAllFileSlices().findFirst(); }
@Override public Stream<HoodieDataFile> getLatestDataFiles() { return fileGroupMap.values().stream() .map(fileGroup -> fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst()) .filter(Optional::isPresent) .map(Optional::get); }
/** * Get latest file slices including in-flight ones */ public Optional<FileSlice> getLatestFileSlicesIncludingInflight() { return getAllFileSlicesIncludingInflight().findFirst(); }
@Override public Stream<FileSlice> getLatestUnCompactedFileSlices(String partitionPath) { return getAllFileGroups(partitionPath) .map(fileGroup -> { FileSlice fileSlice = fileGroup.getLatestFileSlice().get(); // if the file-group is under compaction, pick the latest before compaction instant time. if (isFileSliceAfterPendingCompaction(fileSlice)) { String compactionInstantTime = fileIdToPendingCompaction.get(fileSlice.getFileId()).getLeft(); return fileGroup.getLatestFileSliceBefore(compactionInstantTime); } return Optional.of(fileSlice); }) .map(Optional::get); }
includeInflight, excludeCompaction); List<Comparable[]> rows = new ArrayList<>(); fsView.getAllFileGroups().forEach(fg -> fg.getAllFileSlices().forEach(fs -> { int idx = 0; row[idx++] = fg.getPartitionPath(); row[idx++] = fg.getId(); row[idx++] = fs.getBaseInstantTime(); row[idx++] = fs.getDataFile().isPresent() ? fs.getDataFile().get().getPath() : "";
fileIdSet.forEach(pair -> { String fileId = pair.getValue(); HoodieFileGroup group = new HoodieFileGroup(pair.getKey(), fileId, visibleActiveTimeline); if (dataFiles.containsKey(pair)) { dataFiles.get(pair).forEach(group::addDataFile); group.addNewFileSliceAtInstant(fileIdToPendingCompaction.get(fileId).getKey()); fileGroupMap.put(group.getId(), group); if (!partitionToFileGroupsMap.containsKey(group.getPartitionPath())) { partitionToFileGroupsMap.put(group.getPartitionPath(), new ArrayList<>()); partitionToFileGroupsMap.get(group.getPartitionPath()).add(group); });
assertEquals(3, fileGroups.size()); for (HoodieFileGroup fileGroup : fileGroups) { List<FileSlice> slices = fileGroup.getAllFileSlices().collect(Collectors.toList()); if (fileGroup.getId().equals(fileId1)) { assertEquals(2, slices.size()); assertEquals(commitTime4, slices.get(0).getBaseInstantTime()); assertEquals(commitTime1, slices.get(1).getBaseInstantTime()); } else if (fileGroup.getId().equals(fileId2)) { assertEquals(3, slices.size()); assertEquals(commitTime3, slices.get(0).getBaseInstantTime()); assertEquals(commitTime2, slices.get(1).getBaseInstantTime()); assertEquals(commitTime1, slices.get(2).getBaseInstantTime()); } else if (fileGroup.getId().equals(fileId3)) { assertEquals(2, slices.size()); assertEquals(commitTime4, slices.get(0).getBaseInstantTime());
Optional<Boolean> added = fsView.getAllFileGroups(partitionPath).findFirst() .map(fg -> { selectedFileIdForCompaction.put(fg.getId(), partitionPath); fg.getLatestFileSlice().map(fs -> compactionFileIdToLatestFileSlice.put(fg.getId(), fs)); return true; }); if (selectedFileIdForCompaction.containsKey(fileGroup.getId())) { fileGroup.getAllDataFiles().filter(df -> { return compactionFileIdToLatestFileSlice.get(fileGroup.getId()) .getBaseInstantTime().equals(df.getCommitTime()); }).findAny(); } else { String fileId = fileGroup.getId(); List<HoodieDataFile> dataFiles = fileGroup.getAllDataFiles().collect(Collectors.toList());
String fileId = fileGroup.getId(); Set<String> filenames = Sets.newHashSet(); fileGroup.getAllDataFiles().forEach(dataFile -> { assertEquals("All same fileId should be grouped", fileId, dataFile.getFileId()); filenames.add(dataFile.getFileName());
public Optional<FileSlice> getLatestFileSliceInRange(List<String> commitRange) { return getAllFileSlices() .filter(slice -> commitRange.contains(slice.getBaseInstantTime())) .findFirst(); }
@Override public Stream<HoodieDataFile> getLatestDataFilesInRange(List<String> commitsToReturn) { return fileGroupMap.values().stream() .map(fileGroup -> fileGroup.getAllDataFiles() .filter(dataFile -> commitsToReturn.contains(dataFile.getCommitTime()) && !isDataFileDueToPendingCompaction(dataFile)) .findFirst()) .filter(Optional::isPresent) .map(Optional::get); }
/** * Returns all file-slices including uncommitted ones. * @param partitionPath * @return */ private Stream<FileSlice> getAllRawFileSlices(String partitionPath) { return fsView.getAllFileGroups(partitionPath) .map(group -> group.getAllFileSlicesIncludingInflight()) .flatMap(sliceList -> sliceList); }
/** * Stream of committed data files, sorted reverse commit time */ public Stream<HoodieDataFile> getAllDataFiles() { return getAllFileSlices() .filter(slice -> slice.getDataFile().isPresent()) .map(slice -> slice.getDataFile().get()); }
@Override public Stream<HoodieDataFile> getLatestDataFiles(final String partitionPath) { return getAllFileGroups(partitionPath) .map(fileGroup -> { return fileGroup.getAllDataFiles().filter(df -> !isDataFileDueToPendingCompaction(df)).findFirst(); }) .filter(Optional::isPresent) .map(Optional::get); }
/** * Obtain the latest file slice, upto a commitTime i.e <= maxCommitTime */ public Optional<FileSlice> getLatestFileSliceBeforeOrOn(String maxCommitTime) { return getAllFileSlices() .filter(slice -> HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(), maxCommitTime, HoodieTimeline.LESSER_OR_EQUAL)) .findFirst(); }
@Override public Stream<HoodieDataFile> getLatestDataFilesOn(String partitionPath, String instantTime) { return getAllFileGroups(partitionPath) .map(fileGroup -> fileGroup.getAllDataFiles() .filter(dataFile -> HoodieTimeline.compareTimestamps(dataFile.getCommitTime(), instantTime, HoodieTimeline.EQUAL)) .filter(df -> !isDataFileDueToPendingCompaction(df)) .findFirst()) .filter(Optional::isPresent) .map(Optional::get); }
/** * Obtain the latest file slice, upto a commitTime i.e < maxInstantTime * @param maxInstantTime Max Instant Time * @return */ public Optional<FileSlice> getLatestFileSliceBefore(String maxInstantTime) { return getAllFileSlices() .filter(slice -> HoodieTimeline.compareTimestamps(slice.getBaseInstantTime(), maxInstantTime, HoodieTimeline.LESSER)) .findFirst(); }
@Override public Stream<HoodieDataFile> getLatestDataFilesBeforeOrOn(String partitionPath, String maxCommitTime) { return getAllFileGroups(partitionPath) .map(fileGroup -> fileGroup.getAllDataFiles() .filter(dataFile -> HoodieTimeline.compareTimestamps(dataFile.getCommitTime(), maxCommitTime, HoodieTimeline.LESSER_OR_EQUAL)) .filter(df -> !isDataFileDueToPendingCompaction(df)) .findFirst()) .filter(Optional::isPresent) .map(Optional::get); }
Iterator<FileSlice> fileSliceIterator = fileGroup.getAllFileSlices().iterator(); while (fileSliceIterator.hasNext() && keepVersions > 0) {
for (HoodieFileGroup fileGroup : fileGroups) { Set<String> commitTimes = new HashSet<>(); fileGroup.getAllDataFiles().forEach(value -> { logger.debug("Data File - " + value); commitTimes.add(value.getCommitTime());