/** * Finalize the written data onto storage. Perform any final cleanups * * @param jsc Spark Context * @param stats List of HoodieWriteStats * @throws HoodieIOException if some paths can't be finalized on storage */ public void finalizeWrite(JavaSparkContext jsc, List<HoodieWriteStat> stats) throws HoodieIOException { if (config.isConsistencyCheckEnabled()) { List<String> pathsToCheck = stats.stream() .map(stat -> stat.getTempPath() != null ? stat.getTempPath() : stat.getPath()) .collect(Collectors.toList()); List<String> failingPaths = new ConsistencyCheck(config.getBasePath(), pathsToCheck, jsc, config.getFinalizeWriteParallelism()) .check(MAX_CONSISTENCY_CHECKS, INITIAL_CONSISTENCY_CHECK_INTERVAL_MS); if (failingPaths.size() > 0) { throw new HoodieIOException("Could not verify consistency of paths : " + failingPaths); } } } }
sleepSafe(waitMs); waitMs = waitMs * 2; // double check interval every attempt
/** * Finalize the written data onto storage. Perform any final cleanups * * @param jsc Spark Context * @param writeStatuses List of WriteStatus * @throws HoodieIOException if some paths can't be finalized on storage */ public void finalizeWrite(JavaSparkContext jsc, List<WriteStatus> writeStatuses) throws HoodieIOException { if (config.isConsistencyCheckEnabled()) { List<String> pathsToCheck = writeStatuses.stream() .map(ws -> ws.getStat().getTempPath() != null ? ws.getStat().getTempPath() : ws.getStat().getPath()) .collect(Collectors.toList()); List<String> failingPaths = new ConsistencyCheck(config.getBasePath(), pathsToCheck, jsc, config.getFinalizeWriteParallelism()) .check(MAX_CONSISTENCY_CHECKS, INITIAL_CONSISTENCY_CHECK_INTERVAL_MS); if (failingPaths.size() > 0) { throw new HoodieIOException("Could not verify consistency of paths : " + failingPaths); } } } }
sleepSafe(waitMs); waitMs = waitMs * 2; // double check interval every attempt
@Test public void testCheckPassingAndFailing() throws Exception { HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1"); HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f2"); HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f3"); ConsistencyCheck passing = new ConsistencyCheck(basePath, Arrays.asList("partition/path/f1_0_000.parquet", "partition/path/f2_0_000.parquet"), jsc, 2); assertEquals(0, passing.check(1, 1000).size()); ConsistencyCheck failing = new ConsistencyCheck(basePath, Arrays.asList("partition/path/f1_0_000.parquet", "partition/path/f4_0_000.parquet"), jsc, 2); assertEquals(1, failing.check(1, 1000).size()); } }
@Test public void testExponentialBackoff() throws Exception { HoodieClientTestUtils.fakeDataFile(basePath, "partition/path", "000", "f1"); JavaSparkContext jscSpy = spy(jsc); ConsistencyCheck failing = new ConsistencyCheck(basePath, Arrays.asList("partition/path/f1_0_000.parquet", "partition/path/f2_0_000.parquet"), jscSpy, 2); long startMs = System.currentTimeMillis(); assertEquals(1, failing.check(5, 10).size()); assertTrue((System.currentTimeMillis() - startMs) > (10 + 20 + 40 + 80)); verify(jscSpy, times(5)).parallelize(anyList(), anyInt()); }