/** * Stream of committed data files, sorted reverse commit time */ public Stream<HoodieDataFile> getAllDataFiles() { return getAllFileSlices() .filter(slice -> slice.getDataFile().isPresent()) .map(slice -> slice.getDataFile().get()); }
/** * Helper to merge last 2 file-slices. These 2 file-slices do not have compaction done yet. * * @param lastSlice Latest File slice for a file-group * @param penultimateSlice Penultimate file slice for a file-group in commit timeline order */ private static FileSlice mergeCompactionPendingFileSlices(FileSlice lastSlice, FileSlice penultimateSlice) { FileSlice merged = new FileSlice(penultimateSlice.getBaseInstantTime(), penultimateSlice.getFileId()); if (penultimateSlice.getDataFile().isPresent()) { merged.setDataFile(penultimateSlice.getDataFile().get()); } // Add Log files from penultimate and last slices penultimateSlice.getLogFiles().forEach(merged::addLogFile); lastSlice.getLogFiles().forEach(merged::addLogFile); return merged; }
Optional<HoodieDataFile> dataFile = nextSlice.getDataFile(); if (dataFile.isPresent() && savepointedFiles.contains(dataFile.get().getFileName())) { FileSlice nextSlice = fileSliceIterator.next(); if (!isFileSliceNeededForPendingCompaction(nextSlice)) { if (nextSlice.getDataFile().isPresent()) { HoodieDataFile dataFile = nextSlice.getDataFile().get(); deletePaths.add(dataFile.getFileStatus().getPath().toString());
Optional<HoodieDataFile> dataFile = nextSlice.getDataFile(); if (dataFile.isPresent() && savepointedFiles.contains(dataFile.get().getFileName())) { FileSlice nextSlice = fileSliceIterator.next(); if (!isFileSliceNeededForPendingCompaction(nextSlice)) { if (nextSlice.getDataFile().isPresent()) { HoodieDataFile dataFile = nextSlice.getDataFile().get(); deletePaths.add(dataFile.getFileStatus().getPath().toString());
/** * Generate compaction operation from file-slice * * @param partitionPath Partition path * @param fileSlice File Slice * @param metricsCaptureFunction Metrics Capture function * @return Compaction Operation */ public static HoodieCompactionOperation buildFromFileSlice(String partitionPath, FileSlice fileSlice, Optional<Function<Pair<String, FileSlice>, Map<String, Double>>> metricsCaptureFunction) { HoodieCompactionOperation.Builder builder = HoodieCompactionOperation.newBuilder(); builder.setPartitionPath(partitionPath); builder.setFileId(fileSlice.getFileId()); builder.setBaseInstantTime(fileSlice.getBaseInstantTime()); builder.setDeltaFilePaths(fileSlice.getLogFiles().map(lf -> lf.getPath().toString()).collect(Collectors.toList())); if (fileSlice.getDataFile().isPresent()) { builder.setDataFilePath(fileSlice.getDataFile().get().getPath()); } if (metricsCaptureFunction.isPresent()) { builder.setMetrics(metricsCaptureFunction.get().apply(Pair.of(partitionPath, fileSlice))); } return builder.build(); }
if (fileSliceOptional.isPresent()) { FileSlice fs = fileSliceOptional.get(); java.util.Optional<HoodieDataFile> df = fs.getDataFile(); if (operation.getDataFilePath().isPresent()) { String expPath = metaClient.getFs().getFileStatus(new Path(operation.getDataFilePath().get())).getPath()
String logExtn = fileSliceForCompaction.getLogFiles().findFirst().map(lf -> "." + lf.getFileExtension()) .orElse(HoodieLogFile.DELTA_EXTENSION); String parentPath = fileSliceForCompaction.getDataFile().map(df -> new Path(df.getPath()).getParent().toString()) .orElse(fileSliceForCompaction.getLogFiles().findFirst().map(lf -> lf.getPath().getParent().toString()).get()); for (HoodieLogFile toRepair : logFilesToRepair) {
Optional<HoodieDataFile> aFile = aSlice.getDataFile(); String fileCommitTime = aSlice.getBaseInstantTime(); if (aFile.isPresent() && savepointedFiles.contains(aFile.get().getFileName())) {
row[idx++] = fg.getId(); row[idx++] = fs.getBaseInstantTime(); row[idx++] = fs.getDataFile().isPresent() ? fs.getDataFile().get().getPath() : ""; row[idx++] = fs.getDataFile().isPresent() ? fs.getDataFile().get().getFileSize() : -1; if (!readOptimizedOnly) { row[idx++] = fs.getLogFiles().count();
Optional<HoodieDataFile> dataFile = s.getDataFile(); return new CompactionOperation(dataFile, partitionPath, logFiles, config.getCompactionStrategy().captureMetrics(config, dataFile, partitionPath, logFiles));
/** * Validates if generated compaction operation matches with input file slice and partition path * * @param slice File Slice * @param op HoodieCompactionOperation * @param expPartitionPath Partition path */ private void testFileSliceCompactionOpEquality(FileSlice slice, HoodieCompactionOperation op, String expPartitionPath) { Assert.assertEquals("Partition path is correct", expPartitionPath, op.getPartitionPath()); Assert.assertEquals("Same base-instant", slice.getBaseInstantTime(), op.getBaseInstantTime()); Assert.assertEquals("Same file-id", slice.getFileId(), op.getFileId()); if (slice.getDataFile().isPresent()) { Assert.assertEquals("Same data-file", slice.getDataFile().get().getPath(), op.getDataFilePath()); } List<String> paths = slice.getLogFiles().map(l -> l.getPath().toString()).collect(Collectors.toList()); IntStream.range(0, paths.size()).boxed().forEach(idx -> { Assert.assertEquals("Log File Index " + idx, paths.get(idx), op.getDeltaFilePaths().get(idx)); }); Assert.assertEquals("Metrics set", metrics, op.getMetrics()); } }
private void executeCompaction(String compactionInstantTime, HoodieWriteClient client, HoodieTable table, HoodieWriteConfig cfg, int expectedNumRecs, boolean hasDeltaCommitAfterPendingCompaction) throws IOException { client.compact(compactionInstantTime); List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table, cfg); assertTrue("Ensure latest file-slices are not empty", fileSliceList.stream().findAny().isPresent()); assertFalse("Verify all file-slices have base-instant same as compaction instant", fileSliceList.stream().filter(fs -> !fs.getBaseInstantTime().equals(compactionInstantTime)) .findAny().isPresent()); assertFalse("Verify all file-slices have data-files", fileSliceList.stream().filter(fs -> !fs.getDataFile().isPresent()).findAny().isPresent()); if (hasDeltaCommitAfterPendingCompaction) { assertFalse("Verify all file-slices have atleast one log-file", fileSliceList.stream().filter(fs -> fs.getLogFiles().count() == 0).findAny().isPresent()); } else { assertFalse("Verify all file-slices have no log-files", fileSliceList.stream().filter(fs -> fs.getLogFiles().count() > 0).findAny().isPresent()); } // verify that there is a commit table = HoodieTable.getHoodieTable( new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath(), true), cfg, jsc); HoodieTimeline timeline = table.getMetaClient().getCommitTimeline().filterCompletedInstants(); String latestCompactionCommitTime = timeline.lastInstant().get().getTimestamp(); assertEquals("Expect compaction instant time to be the latest commit time", latestCompactionCommitTime, compactionInstantTime); assertEquals("Must contain expected records", expectedNumRecs, HoodieClientTestUtils.readSince(basePath, sqlContext, timeline, "000").count()); }
if (slice.getFileId().equals(fileId1)) { assertEquals(slice.getBaseInstantTime(), commitTime3); assertTrue(slice.getDataFile().isPresent()); assertEquals(slice.getLogFiles().count(), 0); } else if (slice.getFileId().equals(fileId2)) { assertEquals(slice.getBaseInstantTime(), commitTime4); assertFalse(slice.getDataFile().isPresent()); assertEquals(slice.getLogFiles().count(), 1); } else if (slice.getFileId().equals(fileId3)) { assertEquals(slice.getBaseInstantTime(), commitTime4); assertTrue(slice.getDataFile().isPresent()); assertEquals(slice.getLogFiles().count(), 0);
/** * HELPER METHODS FOR TESTING **/ private void validateDeltaCommit(String latestDeltaCommit, final Map<String, Pair<String, HoodieCompactionOperation>> fileIdToCompactionOperation, HoodieWriteConfig cfg) throws IOException { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), cfg.getBasePath()); HoodieTable table = HoodieTable.getHoodieTable(metaClient, cfg, jsc); List<FileSlice> fileSliceList = getCurrentLatestFileSlices(table, cfg); fileSliceList.forEach(fileSlice -> { Pair<String, HoodieCompactionOperation> opPair = fileIdToCompactionOperation.get(fileSlice.getFileId()); if (opPair != null) { System.out.println("FileSlice :" + fileSlice); assertTrue("Expect baseInstant to match compaction Instant", fileSlice.getBaseInstantTime().equals(opPair.getKey())); assertTrue("Expect atleast one log file to be present where the latest delta commit was written", fileSlice.getLogFiles().count() > 0); assertFalse("Expect no data-file to be present", fileSlice.getDataFile().isPresent()); } else { assertTrue("Expect baseInstant to be less than or equal to latestDeltaCommit", fileSlice.getBaseInstantTime().compareTo(latestDeltaCommit) <= 0); } }); }
FileSlice fileSlice = fileSliceList.get(0); assertEquals("File-Id must be set correctly", fileId, fileSlice.getFileId()); assertFalse("Data file for base instant must be present", fileSlice.getDataFile().isPresent()); assertEquals("Base Instant for file-group set correctly", instantTime1, fileSlice.getBaseInstantTime()); List<HoodieLogFile> logFiles = fileSlice.getLogFiles().collect(Collectors.toList()); fileSlice = fileSliceList.get(0); assertEquals("File-Id must be set correctly", fileId, fileSlice.getFileId()); assertFalse("Data file for base instant must be present", fileSlice.getDataFile().isPresent()); assertEquals("Base Instant for file-group set correctly", instantTime1, fileSlice.getBaseInstantTime()); logFiles = fileSlice.getLogFiles().collect(Collectors.toList()); fileSlice = fileSliceList.get(0); assertEquals("File-Id must be set correctly", fileId, fileSlice.getFileId()); assertFalse("Data file for base instant must be present", fileSlice.getDataFile().isPresent()); assertEquals("Base Instant for file-group set correctly", instantTime1, fileSlice.getBaseInstantTime()); logFiles = fileSlice.getLogFiles().collect(Collectors.toList());
.filter(fs -> fs.getBaseInstantTime().equals(compactionInstant)) .filter(fs -> fs.getFileId().equals(op.getFileId())).forEach(fs -> { Assert.assertFalse("No Data file must be present", fs.getDataFile().isPresent()); Assert.assertTrue("No Log Files", fs.getLogFiles().count() == 0); });
Assert.assertFalse("No Data file must be present", fs.getDataFile().isPresent()); Assert.assertTrue("No Log Files", fs.getLogFiles().count() == 0); });
for (String partitionPath : dataGen.getPartitionPaths()) { Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice -> fileSlice.getDataFile().isPresent()).count() == 0); Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count() > 0); for (String partitionPath : dataGen.getPartitionPaths()) { Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice -> fileSlice.getDataFile().isPresent()).count() == 0); Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count() > 0);
for (String partitionPath : dataGen.getPartitionPaths()) { Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice -> fileSlice.getDataFile().isPresent()).count() == 0); Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count() > 0);
for (String partitionPath : dataGen.getPartitionPaths()) { Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice -> fileSlice.getDataFile().isPresent()).count() == 0); Assert.assertTrue(tableRTFileSystemView.getLatestFileSlices(partitionPath).filter(fileSlice -> fileSlice.getLogFiles().count() > 0).count() == 0);