/** * Create a file system view, as of the given timeline */ public HoodieTableFileSystemView(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline) { this.metaClient = metaClient; this.visibleActiveTimeline = visibleActiveTimeline; this.fileGroupMap = new HashMap<>(); this.partitionToFileGroupsMap = new HashMap<>(); // Build fileId to Pending Compaction Instants List<HoodieInstant> pendingCompactionInstants = metaClient.getActiveTimeline().filterPendingCompactionTimeline().getInstants().collect(Collectors.toList()); this.fileIdToPendingCompaction = ImmutableMap.copyOf( CompactionUtils.getAllPendingCompactionOperations(metaClient).entrySet().stream() .map(entry -> Pair.of(entry.getKey(), Pair.of(entry.getValue().getKey(), CompactionOperation.convertFromAvroRecordInstance(entry.getValue().getValue())))) .collect(Collectors.toMap(Pair::getKey, Pair::getValue))); }
/** * Return all pending compactions with instant time for clients to decide what to compact next. * @return */ public List<Pair<String, HoodieCompactionPlan>> getPendingCompactions() { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), hoodieTable.getMetaClient().getBasePath(), true); return CompactionUtils.getAllPendingCompactionPlans(metaClient).stream() .map(instantWorkloadPair -> Pair.of(instantWorkloadPair.getKey().getTimestamp(), instantWorkloadPair.getValue())) .collect(Collectors.toList()); } }
/** * Get all pending compaction plans along with their instants * * @param metaClient Hoodie Meta Client */ public static List<Pair<HoodieInstant, HoodieCompactionPlan>> getAllPendingCompactionPlans( HoodieTableMetaClient metaClient) { List<HoodieInstant> pendingCompactionInstants = metaClient.getActiveTimeline().filterPendingCompactionTimeline().getInstants().collect(Collectors.toList()); return pendingCompactionInstants.stream().map(instant -> { try { return Pair.of(instant, getCompactionPlan(metaClient, instant.getTimestamp())); } catch (IOException e) { throw new HoodieException(e); } }).collect(Collectors.toList()); }
@Test public void testCompactionTransformation() { // check HoodieCompactionOperation <=> CompactionOperation transformation function Pair<List<Pair<String, FileSlice>>, HoodieCompactionPlan> inputAndPlan = buildCompactionPlan(); HoodieCompactionPlan plan = inputAndPlan.getRight(); List<HoodieCompactionOperation> originalOps = plan.getOperations(); List<HoodieCompactionOperation> regeneratedOps = originalOps.stream().map(op -> { // Convert to CompactionOperation return CompactionUtils.buildCompactionOperation(op); }).map(op2 -> { // Convert back to HoodieCompactionOperation and check for equality return CompactionUtils.buildHoodieCompactionOperation(op2); }).collect(Collectors.toList()); Assert.assertTrue("Transformation did get tested", originalOps.size() > 0); Assert.assertEquals("All fields set correctly in transformations", originalOps, regeneratedOps); }
CompactionUtils.getAllPendingCompactionOperations(metaClient).get(fileId); HoodieCompactionPlan plan = CompactionUtils .getCompactionPlan(metaClient, compactionOperationWithInstant.getKey()); List<HoodieCompactionOperation> newOps = plan.getOperations().stream() .filter(op -> !op.getFileId().equals(fileId)).collect(Collectors.toList());
CompactionUtils.getAllPendingCompactionPlans(metaClient); List<String> gotPendingCompactionInstants = pendingCompactions.stream().map(pc -> pc.getKey().getTimestamp()).sorted().collect(Collectors.toList()); CompactionUtils.getAllPendingCompactionOperations(metaClient);
public static final void createCompactionRequest(HoodieTableMetaClient metaClient, String instant, List<Pair<String, FileSlice>> fileSliceList) throws IOException { HoodieCompactionPlan plan = CompactionUtils.buildFromFileSlices(fileSliceList, Optional.empty(), Optional.empty()); HoodieInstant compactionInstant = new HoodieInstant(State.REQUESTED, HoodieTimeline.COMPACTION_ACTION, instant); metaClient.getActiveTimeline().saveToCompactionRequested(compactionInstant, AvroUtils.serializeCompactionPlan(plan)); }
/** * Generate compaction plan from file-slices * * @param partitionFileSlicePairs list of partition file-slice pairs * @param extraMetadata Extra Metadata * @param metricsCaptureFunction Metrics Capture function */ public static HoodieCompactionPlan buildFromFileSlices( List<Pair<String, FileSlice>> partitionFileSlicePairs, Optional<Map<String, String>> extraMetadata, Optional<Function<Pair<String, FileSlice>, Map<String, Double>>> metricsCaptureFunction) { HoodieCompactionPlan.Builder builder = HoodieCompactionPlan.newBuilder(); extraMetadata.ifPresent(m -> builder.setExtraMetadata(m)); builder.setOperations(partitionFileSlicePairs.stream().map(pfPair -> buildFromFileSlice(pfPair.getKey(), pfPair.getValue(), metricsCaptureFunction)).collect(Collectors.toList())); return builder.build(); }
/** * Generate input for compaction plan tests */ private Pair<List<Pair<String, FileSlice>>, HoodieCompactionPlan> buildCompactionPlan() { FileSlice emptyFileSlice = new FileSlice("000", "empty1"); FileSlice fileSlice = new FileSlice("000", "noData1"); fileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog.parquet")); fileSlice.addLogFile(new HoodieLogFile(new Path( FSUtils.makeLogFileName("noData1", ".log", "000", 1)))); fileSlice.addLogFile(new HoodieLogFile(new Path( FSUtils.makeLogFileName("noData1", ".log", "000", 2)))); FileSlice noLogFileSlice = new FileSlice("000", "noLog1"); noLogFileSlice.setDataFile(new TestHoodieDataFile("/tmp/noLog.parquet")); FileSlice noDataFileSlice = new FileSlice("000", "noData1"); noDataFileSlice.addLogFile(new HoodieLogFile(new Path( FSUtils.makeLogFileName("noData1", ".log", "000", 1)))); noDataFileSlice.addLogFile(new HoodieLogFile(new Path( FSUtils.makeLogFileName("noData1", ".log", "000", 2)))); List<FileSlice> fileSliceList = Arrays.asList(emptyFileSlice, noDataFileSlice, fileSlice, noLogFileSlice); List<Pair<String, FileSlice>> input = fileSliceList.stream().map(f -> Pair.of(DEFAULT_PARTITION_PATHS[0], f)) .collect(Collectors.toList()); return Pair.of(input, CompactionUtils.buildFromFileSlices(input, Optional.empty(), Optional.of(metricsCaptureFn))); }
HoodieCompactionOperation op = CompactionUtils.buildFromFileSlice( DEFAULT_PARTITION_PATHS[0], emptyFileSlice, Optional.of(metricsCaptureFn)); testFileSliceCompactionOpEquality(emptyFileSlice, op, DEFAULT_PARTITION_PATHS[0]); op = CompactionUtils.buildFromFileSlice( DEFAULT_PARTITION_PATHS[0], noLogFileSlice, Optional.of(metricsCaptureFn)); testFileSliceCompactionOpEquality(noLogFileSlice, op, DEFAULT_PARTITION_PATHS[0]); noDataFileSlice.addLogFile(new HoodieLogFile(new Path( FSUtils.makeLogFileName("noData1", ".log", "000", 2)))); op = CompactionUtils.buildFromFileSlice( DEFAULT_PARTITION_PATHS[0], noDataFileSlice, Optional.of(metricsCaptureFn)); testFileSliceCompactionOpEquality(noDataFileSlice, op, DEFAULT_PARTITION_PATHS[0]); fileSlice.addLogFile(new HoodieLogFile(new Path( FSUtils.makeLogFileName("noData1", ".log", "000", 2)))); op = CompactionUtils.buildFromFileSlice( DEFAULT_PARTITION_PATHS[0], fileSlice, Optional.of(metricsCaptureFn)); testFileSliceCompactionOpEquality(fileSlice, op, DEFAULT_PARTITION_PATHS[0]);
/** * Generate renaming actions for unscheduling a fileId from pending compaction. NOTE: Can only be used safely when no * writer (ingestion/compaction) is running. * * @param metaClient Hoodie Table MetaClient * @param fileId FileId to remove compaction * @param fsViewOpt Cached File System View * @param skipValidation Skip Validation * @return list of pairs of log-files (old, new) and for each pair, rename must be done to successfully unschedule * compaction. */ public List<Pair<HoodieLogFile, HoodieLogFile>> getRenamingActionsForUnschedulingCompactionForFileId( HoodieTableMetaClient metaClient, String fileId, Optional<HoodieTableFileSystemView> fsViewOpt, boolean skipValidation) throws IOException { Map<String, Pair<String, HoodieCompactionOperation>> allPendingCompactions = CompactionUtils.getAllPendingCompactionOperations(metaClient); if (allPendingCompactions.containsKey(fileId)) { Pair<String, HoodieCompactionOperation> opWithInstant = allPendingCompactions.get(fileId); return getRenamingActionsForUnschedulingCompactionOperation(metaClient, opWithInstant.getKey(), CompactionOperation.convertFromAvroRecordInstance(opWithInstant.getValue()), fsViewOpt, skipValidation); } throw new HoodieException("FileId " + fileId + " not in pending compaction"); }
/** * Return all pending compactions with instant time for clients to decide what to compact next. * @return */ public List<Pair<String, HoodieCompactionPlan>> getPendingCompactions() { HoodieTableMetaClient metaClient = new HoodieTableMetaClient(jsc.hadoopConfiguration(), hoodieTable.getMetaClient().getBasePath(), true); return CompactionUtils.getAllPendingCompactionPlans(metaClient).stream() .map(instantWorkloadPair -> Pair.of(instantWorkloadPair.getKey().getTimestamp(), instantWorkloadPair.getValue())) .collect(Collectors.toList()); } }
HoodieCompactionPlan plan = CompactionUtils.getCompactionPlan(metaClient, compactionInstant); HoodieCompactionPlan newPlan = HoodieCompactionPlan.newBuilder().setOperations(new ArrayList<>()).setExtraMetadata(plan.getExtraMetadata())
.map(e -> Pair.of(selectedFileIdForCompaction.get(e.getKey()), e.getValue())).collect(Collectors.toList()); HoodieCompactionPlan compactionPlan = CompactionUtils.buildFromFileSlices(partitionFileSlicePairs, Optional.empty(), Optional.empty()); List<String> instantTimes = HoodieTestUtils.monotonicIncreasingCommitTimestamps(9, 1); String compactionTime = instantTimes.get(0);
slice.addLogFile(new HoodieLogFile(new Path(logFilePath2))); HoodieCompactionOperation op = CompactionUtils.buildFromFileSlice(DEFAULT_PARTITION_PATHS[0], slice, Optional.empty()); if (deltaCommitsAfterCompactionRequests) { HoodieTestUtils.createNewLogFile(metaClient.getFs(), metaClient.getBasePath(), DEFAULT_PARTITION_PATHS[0],
CompactionUtils.getAllPendingCompactionOperations(metaClient);
HoodieTableMetaClient metaClient) { List<Pair<HoodieInstant, HoodieCompactionPlan>> pendingCompactionPlanWithInstants = getAllPendingCompactionPlans(metaClient);
@Test public void testUnscheduleCompactionFileId() throws Exception { int numEntriesPerInstant = 10; CompactionTestUtils .setupAndValidateCompactionOperations(metaClient, false, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant, numEntriesPerInstant); Map<String, CompactionOperation> instantsWithOp = Arrays.asList("001", "003", "005", "007").stream().map(instant -> { try { return Pair.of(instant, CompactionUtils.getCompactionPlan(metaClient, instant)); } catch (IOException ioe) { throw new HoodieException(ioe); } }).map(instantWithPlan -> instantWithPlan.getRight().getOperations().stream().map(op -> Pair.of( instantWithPlan.getLeft(), CompactionOperation.convertFromAvroRecordInstance(op))).findFirst().get()) .collect(Collectors.toMap(Pair::getLeft, Pair::getRight)); // THere are delta-commits after compaction instant validateUnScheduleFileId(client, "000", "001", instantsWithOp.get("001"), 2); // THere are delta-commits after compaction instant validateUnScheduleFileId(client, "002", "003", instantsWithOp.get("003"), 2); // THere are no delta-commits after compaction instant validateUnScheduleFileId(client, "004", "005", instantsWithOp.get("005"), 0); // THere are no delta-commits after compaction instant validateUnScheduleFileId(client, "006", "007", instantsWithOp.get("007"), 0); }
List<Pair<String, FileSlice>> partitionFileSlicesPairs = new ArrayList<>(); partitionFileSlicesPairs.add(Pair.of(partitionPath, fileSlices.get(0))); HoodieCompactionPlan compactionPlan = CompactionUtils.buildFromFileSlices(partitionFileSlicesPairs, Optional.empty(), Optional.empty()); HoodieInstant compactionInstant = null;
@Test(expected = IllegalStateException.class) public void testGetAllPendingCompactionOperationsWithDupFileId() throws IOException { // Case where there is duplicate fileIds in compaction requests HoodieCompactionPlan plan1 = createCompactionPlan(metaClient, "000", "001", 10, true, true); HoodieCompactionPlan plan2 = createCompactionPlan(metaClient, "002", "003", 0, false, false); scheduleCompaction(metaClient, "001", plan1); scheduleCompaction(metaClient, "003", plan2); // schedule same plan again so that there will be duplicates scheduleCompaction(metaClient, "005", plan1); metaClient = new HoodieTableMetaClient(metaClient.getHadoopConf(), basePath, true); Map<String, Pair<String, HoodieCompactionOperation>> res = CompactionUtils.getAllPendingCompactionOperations(metaClient); }