@Test public void testBoundedIOSimple() { Map<Long, List<Long>> sizesMap = Maps.newHashMap(); sizesMap.put(120 * MB, Lists.newArrayList(60 * MB, 10 * MB, 80 * MB)); sizesMap.put(110 * MB, Lists.newArrayList()); sizesMap.put(100 * MB, Lists.newArrayList(MB)); sizesMap.put(90 * MB, Lists.newArrayList(1024 * MB)); BoundedIOCompactionStrategy strategy = new BoundedIOCompactionStrategy(); HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig( HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).withTargetIOPerCompactionInMB(400).build()) .build(); List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap); List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>()); assertTrue("BoundedIOCompaction should have resulted in fewer compactions", returned.size() < operations.size()); assertEquals("BoundedIOCompaction should have resulted in 2 compactions being chosen", 2, returned.size()); // Total size of all the log files Long returnedSize = returned.stream().map(s -> s.getMetrics().get(BoundedIOCompactionStrategy.TOTAL_IO_MB)) .map(s -> s.longValue()).reduce((size1, size2) -> size1 + size2).orElse(0L); assertEquals("Should chose the first 2 compactions which should result in a total IO of 690 MB", 610, (long) returnedSize); }
@Test public void testLogFileSizeCompactionSimple() { Map<Long, List<Long>> sizesMap = Maps.newHashMap(); sizesMap.put(120 * MB, Lists.newArrayList(60 * MB, 10 * MB, 80 * MB)); sizesMap.put(110 * MB, Lists.newArrayList()); sizesMap.put(100 * MB, Lists.newArrayList(MB)); sizesMap.put(90 * MB, Lists.newArrayList(1024 * MB)); LogFileSizeBasedCompactionStrategy strategy = new LogFileSizeBasedCompactionStrategy(); HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig( HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).withTargetIOPerCompactionInMB(400).build()) .build(); List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap); List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>()); assertTrue("LogFileSizeBasedCompactionStrategy should have resulted in fewer compactions", returned.size() < operations.size()); assertEquals("LogFileSizeBasedCompactionStrategy should have resulted in 1 compaction", 1, returned.size()); // Total size of all the log files Long returnedSize = returned.stream().map(s -> s.getMetrics().get(BoundedIOCompactionStrategy.TOTAL_IO_MB)) .map(s -> s.longValue()).reduce((size1, size2) -> size1 + size2).orElse(0L); assertEquals("Should chose the first 2 compactions which should result in a total IO of 690 MB", 1204, (long) returnedSize); }
@Test public void testPartitionAwareCompactionSimple() { Map<Long, List<Long>> sizesMap = Maps.newHashMap(); sizesMap.put(120 * MB, Lists.newArrayList(60 * MB, 10 * MB, 80 * MB)); sizesMap.put(110 * MB, Lists.newArrayList()); sizesMap.put(100 * MB, Lists.newArrayList(MB)); sizesMap.put(90 * MB, Lists.newArrayList(1024 * MB)); Map<Long, String> keyToPartitionMap = new ImmutableMap.Builder().put(120 * MB, partitionPaths[2]) .put(110 * MB, partitionPaths[2]) .put(100 * MB, partitionPaths[1]) .put(90 * MB, partitionPaths[0]) .build(); DayBasedCompactionStrategy strategy = new DayBasedCompactionStrategy(); HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig( HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy) .withTargetPartitionsPerDayBasedCompaction(1) .build()).build(); List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap, keyToPartitionMap); List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>()); assertTrue("DayBasedCompactionStrategy should have resulted in fewer compactions", returned.size() < operations.size()); Assert.assertEquals("DayBasedCompactionStrategy should have resulted in fewer compactions", returned.size(), 2); int comparision = strategy.getComparator().compare(returned.get(returned.size() - 1).getPartitionPath(), returned .get(0).getPartitionPath()); // Either the partition paths are sorted in descending order or they are equal assertTrue("DayBasedCompactionStrategy should sort partitions in descending order", comparision >= 0); }
/** * Build Hoodie write client * * @param jsc Java Spark Context * @param basePath Base Path * @param schemaStr Schema * @param parallelism Parallelism */ public static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath, String schemaStr, int parallelism, Optional<String> compactionStrategyClass) throws Exception { HoodieCompactionConfig compactionConfig = compactionStrategyClass.map(strategy -> HoodieCompactionConfig.newBuilder().withInlineCompaction(false) .withCompactionStrategy(ReflectionUtils.loadClass(strategy)) .build()).orElse(HoodieCompactionConfig.newBuilder().withInlineCompaction(false).build()); HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath) .withParallelism(parallelism, parallelism).withSchema(schemaStr) .combineInput(true, true) .withCompactionConfig(compactionConfig) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .build(); return new HoodieWriteClient(jsc, config); }
/** * Build Hoodie write client * * @param jsc Java Spark Context * @param basePath Base Path * @param schemaStr Schema * @param parallelism Parallelism */ public static HoodieWriteClient createHoodieClient(JavaSparkContext jsc, String basePath, String schemaStr, int parallelism, Optional<String> compactionStrategyClass) throws Exception { HoodieCompactionConfig compactionConfig = compactionStrategyClass.map(strategy -> HoodieCompactionConfig.newBuilder().withInlineCompaction(false) .withCompactionStrategy(ReflectionUtils.loadClass(strategy)) .build()).orElse(HoodieCompactionConfig.newBuilder().withInlineCompaction(false).build()); HoodieWriteConfig config = HoodieWriteConfig.newBuilder().withPath(basePath) .withParallelism(parallelism, parallelism).withSchema(schemaStr) .combineInput(true, true) .withCompactionConfig(compactionConfig) .withIndexConfig(HoodieIndexConfig.newBuilder().withIndexType(HoodieIndex.IndexType.BLOOM).build()) .build(); return new HoodieWriteClient(jsc, config); }
@Test public void testUnBounded() { Map<Long, List<Long>> sizesMap = Maps.newHashMap(); sizesMap.put(120 * MB, Lists.newArrayList(60 * MB, 10 * MB, 80 * MB)); sizesMap.put(110 * MB, Lists.newArrayList()); sizesMap.put(100 * MB, Lists.newArrayList(MB)); sizesMap.put(90 * MB, Lists.newArrayList(1024 * MB)); UnBoundedCompactionStrategy strategy = new UnBoundedCompactionStrategy(); HoodieWriteConfig writeConfig = HoodieWriteConfig.newBuilder().withPath("/tmp").withCompactionConfig( HoodieCompactionConfig.newBuilder().withCompactionStrategy(strategy).build()).build(); List<HoodieCompactionOperation> operations = createCompactionOperations(writeConfig, sizesMap); List<HoodieCompactionOperation> returned = strategy.orderAndFilter(writeConfig, operations, new ArrayList<>()); assertEquals("UnBounded should not re-order or filter", operations, returned); }