/** * Potentially add a new file-slice by adding base-instant time * A file-slice without any data-file and log-files can exist (if a compaction just got requested) */ public void addNewFileSliceAtInstant(String baseInstantTime) { if (!fileSlices.containsKey(baseInstantTime)) { fileSlices.put(baseInstantTime, new FileSlice(baseInstantTime, id)); } }
/** * Add a new log file into the group */ public void addLogFile(HoodieLogFile logFile) { if (!fileSlices.containsKey(logFile.getBaseCommitTime())) { fileSlices.put(logFile.getBaseCommitTime(), new FileSlice(logFile.getBaseCommitTime(), id)); } fileSlices.get(logFile.getBaseCommitTime()).addLogFile(logFile); }
/** * Add a new datafile into the file group */ public void addDataFile(HoodieDataFile dataFile) { if (!fileSlices.containsKey(dataFile.getCommitTime())) { fileSlices.put(dataFile.getCommitTime(), new FileSlice(dataFile.getCommitTime(), id)); } fileSlices.get(dataFile.getCommitTime()).setDataFile(dataFile); }
/** * Helper to merge last 2 file-slices. These 2 file-slices do not have compaction done yet. * * @param lastSlice Latest File slice for a file-group * @param penultimateSlice Penultimate file slice for a file-group in commit timeline order */ private static FileSlice mergeCompactionPendingFileSlices(FileSlice lastSlice, FileSlice penultimateSlice) { FileSlice merged = new FileSlice(penultimateSlice.getBaseInstantTime(), penultimateSlice.getFileId()); if (penultimateSlice.getDataFile().isPresent()) { merged.setDataFile(penultimateSlice.getDataFile().get()); } // Add Log files from penultimate and last slices penultimateSlice.getLogFiles().forEach(merged::addLogFile); lastSlice.getLogFiles().forEach(merged::addLogFile); return merged; }
/** * With async compaction, it is possible to see partial/complete data-files due to inflight-compactions, * Ignore those data-files * @param fileSlice File Slice * @return */ private FileSlice filterDataFileAfterPendingCompaction(FileSlice fileSlice) { if (isFileSliceAfterPendingCompaction(fileSlice)) { // Data file is filtered out of the file-slice as the corresponding compaction // instant not completed yet. FileSlice transformed = new FileSlice(fileSlice.getBaseInstantTime(), fileSlice.getFileId()); fileSlice.getLogFiles().forEach(transformed::addLogFile); return transformed; } return fileSlice; }
/** * Generate input for compaction plan tests */ private Pair<List<Pair<String, FileSlice>>, HoodieCompactionPlan> buildCompactionPlan() { FileSlice emptyFileSlice = new FileSlice("000", "empty1"); FileSlice fileSlice = new FileSlice("000", "noData1"); fileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog.parquet")); fileSlice.addLogFile(new HoodieLogFile(new Path( FSUtils.makeLogFileName("noData1", ".log", "000", 1)))); fileSlice.addLogFile(new HoodieLogFile(new Path( FSUtils.makeLogFileName("noData1", ".log", "000", 2)))); FileSlice noLogFileSlice = new FileSlice("000", "noLog1"); noLogFileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog.parquet")); FileSlice noDataFileSlice = new FileSlice("000", "noData1"); noDataFileSlice.addLogFile(new HoodieLogFile(new Path( FSUtils.makeLogFileName("noData1", ".log", "000", 1)))); noDataFileSlice.addLogFile(new HoodieLogFile(new Path( FSUtils.makeLogFileName("noData1", ".log", "000", 2)))); List<FileSlice> fileSliceList = Arrays.asList(emptyFileSlice, noDataFileSlice, fileSlice, noLogFileSlice); List<Pair<String, FileSlice>> input = fileSliceList.stream().map(f -> Pair.of(DEFAULT_PARTITION_PATHS[0], f)) .collect(Collectors.toList()); return Pair.of(input, CompactionUtils.buildFromFileSlices(input, Optional.empty(), Optional.of(metricsCaptureFn))); }
} else { fileSlice = Optional.of(new FileSlice(baseInstantTime, this.fileId)); logger.info("New InsertHandle for partition :" + partitionPath);
} else { fileSlice = Optional.of(new FileSlice(baseInstantTime, this.fileId)); logger.info("New InsertHandle for partition :" + partitionPath);
@Test public void testBuildFromFileSlice() { FileSlice emptyFileSlice = new FileSlice("000", "empty1"); HoodieCompactionOperation op = CompactionUtils.buildFromFileSlice( DEFAULT_PARTITION_PATHS[0], emptyFileSlice, Optional.of(metricsCaptureFn)); FileSlice noLogFileSlice = new FileSlice("000", "noLog1"); noLogFileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog.parquet")); op = CompactionUtils.buildFromFileSlice( FileSlice noDataFileSlice = new FileSlice("000", "noData1"); noDataFileSlice.addLogFile(new HoodieLogFile(new Path( FSUtils.makeLogFileName("noData1", ".log", "000", 1)))); FileSlice fileSlice = new FileSlice("000", "noData1"); fileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog.parquet")); fileSlice.addLogFile(new HoodieLogFile(new Path(
HoodieTestUtils.createNewLogFile(metaClient.getFs(), metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0], instantId, fileId, Optional.of(2)); FileSlice slice = new FileSlice(instantId, fileId); if (createDataFile) { slice.setDataFile(new TestHoodieDataFile(metaClient.getBasePath() + "/" + DEFAULT_PARTITION_PATHS[0]