int maxVersions = 2; // keep upto 2 versions for each file HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig( HoodieCompactionConfig.newBuilder().withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS) .retainFileVersions(maxVersions).build()) .withParallelism(1, 1).withBulkInsertParallelism(1)
HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig( HoodieCompactionConfig.newBuilder() .withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainCommits(maxCommits).build()) .withParallelism(1, 1).withBulkInsertParallelism(1) .withFinalizeWriteParallelism(1).withConsistencyCheckEnabled(true).build();
public void testSavepointAndRollback() throws Exception { HoodieWriteConfig cfg = getConfigBuilder().withCompactionConfig( HoodieCompactionConfig.newBuilder().withCleanerPolicy(HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(1) .build()).build(); HoodieWriteClient client = new HoodieWriteClient(jsc, cfg);
compactionConfigBuilder.withCleanerPolicy(HoodieCleaningPolicy .valueOf(getProperty(HOODIE_CLEANER_POLICY, DEFAULT_HOODIE_CLEANER_POLICY))); compactionConfigBuilder.retainCommits(
public void testKeepLatestCommits() throws IOException { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy( HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build();
public void testCleaningSkewedPartitons() throws IOException { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy( HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build(); Map<Long, Long> stageOneShuffleReadTaskRecordsCountMap = new HashMap<>();
public void testKeepLatestFileVersions() throws IOException { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy( HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build()) .build();
.withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy( HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(1).build()) .build();
/** * Test Keep Latest Commits when there are pending compactions */ @Test public void testKeepLatestCommitsWithPendingCompactions() throws IOException { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy( HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build(); // Deletions: // . FileId Parquet Logs Total Retained Commits // FileId7 5 10 15 009, 011 // FileId6 5 10 15 009 // FileId5 3 6 9 005 // FileId4 2 4 6 003 // FileId3 1 2 3 001 // FileId2 0 0 0 000 // FileId1 0 0 0 000 testPendingCompactions(config, 48, 18); }
/** * Test Keep Latest Versions when there are pending compactions */ @Test public void testKeepLatestVersionsWithPendingCompactions() throws IOException { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy( HoodieCleaningPolicy.KEEP_LATEST_FILE_VERSIONS).retainFileVersions(2).build()).build(); // Deletions: // . FileId Parquet Logs Total Retained Commits // FileId7 5 10 15 009, 011 // FileId6 4 8 12 007, 009 // FileId5 2 4 6 003 005 // FileId4 1 2 3 001, 003 // FileId3 0 0 0 000, 001 // FileId2 0 0 0 000 // FileId1 0 0 0 000 testPendingCompactions(config, 36, 9); }
/** * Test CLeaner Stat when there are no partition paths. */ @Test public void testCleaningWithZeroPartitonPaths() throws IOException { HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath).withAssumeDatePartitioning(true) .withCompactionConfig(HoodieCompactionConfig.newBuilder().withCleanerPolicy( HoodieCleaningPolicy.KEEP_LATEST_COMMITS).retainCommits(2).build()).build(); // Make a commit, although there are no partitionPaths. // Example use-case of this is when a client wants to create a table // with just some commit metadata, but no data/partitionPaths. HoodieTestUtils.createCommitFiles(basePath, "000"); HoodieTable table = HoodieTable.getHoodieTable( new HoodieTableMetaClient(jsc.hadoopConfiguration(), config.getBasePath(), true), config, jsc); List<HoodieCleanStat> hoodieCleanStatsOne = table.clean(jsc); assertTrue("HoodieCleanStats should be empty for a table with empty partitionPaths", hoodieCleanStatsOne.isEmpty()); }