@Override public synchronized void runInBackground() { Preconditions.checkState(daemons == null); int numThreads = sweepThreads.get(); daemons = Sets.newHashSetWithExpectedSize(numThreads); for (int idx = 1; idx <= numThreads; idx++) { BackgroundSweepThread backgroundSweepThread = new BackgroundSweepThread(lockService, nextTableToSweepProvider, sweepBatchConfigSource, isSweepEnabled, sweepPauseMillis, sweepPriorityOverrideConfig, specificTableSweeper, sweepOutcomeMetrics, shuttingDown, idx); Thread daemon = new Thread(backgroundSweepThread); daemon.setDaemon(true); daemon.setName("BackgroundSweeper " + idx); daemon.start(); daemons.add(daemon); } }
@Override public void run() { try (SingleLockService locks = createSweepLocks()) { waitUntilSpecificTableSweeperIsInitialized(); sleepFor(getBackoffTimeWhenSweepHasNotRun()); log.info("Starting background sweeper with thread index {}", SafeArg.of("threadIndex", threadIndex)); while (true) { SweepOutcome outcome = checkConfigAndRunSweep(locks); logOutcome(outcome); updateBatchSize(outcome); updateMetrics(outcome); sleepUntilNextRun(outcome); closeTableLockIfHeld(); sweepOutcomeMetrics.registerOccurrenceOf( SweepOutcome.SHUTDOWN); log.error("BackgroundSweeper failed fatally and will not rerun until restarted: {}", UnsafeArg.of("message", t.getMessage()), t); closeTableLockIfHeld(); sweepOutcomeMetrics.registerOccurrenceOf( SweepOutcome.FATAL);
private SweepOutcome grabLocksAndRun(SingleLockService locks) throws InterruptedException { try { locks.lockOrRefresh(); if (locks.haveLocks()) { return runOnce(); } else { log.debug("Skipping sweep because sweep is running elsewhere."); closeTableLockIfHeld(); return SweepOutcome.UNABLE_TO_ACQUIRE_LOCKS; } } catch (RuntimeException e) { specificTableSweeper.updateSweepErrorMetric(); log.info("Sweep failed", e); return SweepOutcome.ERROR; } }
private void sleepUntilNextRun(SweepOutcome outcome) throws InterruptedException { Duration sleepDuration = getBackoffTimeWhenSweepHasNotRun(); if (outcome == SweepOutcome.SUCCESS) { sleepDuration = Duration.ofMillis(sweepPauseMillis.get()); } else if (outcome == SweepOutcome.NOTHING_TO_SWEEP) { sleepDuration = getBackoffTimeWhenNothingToSweep(); } sleepFor(sleepDuration); }
@VisibleForTesting SweepOutcome checkConfigAndRunSweep(SingleLockService locks) throws InterruptedException { if (isSweepEnabled.get()) { return grabLocksAndRun(locks); } log.debug("Skipping sweep because it is currently disabled."); closeTableLockIfHeld(); return SweepOutcome.DISABLED; }
private void waitUntilSpecificTableSweeperIsInitialized() throws InterruptedException { while (!specificTableSweeper.isInitialized()) { log.info("Sweep Priority Table and Sweep Progress Table are not initialized yet. If you have enabled " + "asynchronous initialization, these tables are being initialized asynchronously. Background " + "sweeper will start once the initialization is complete."); sleepFor(getBackoffTimeWhenSweepHasNotRun()); } }
@Test public void smokeTest() throws Exception { createTable(TABLE_1, SweepStrategy.CONSERVATIVE); createTable(TABLE_2, SweepStrategy.THOROUGH); createTable(TABLE_3, SweepStrategy.NOTHING); putManyCells(TABLE_1, 100, 110); putManyCells(TABLE_1, 103, 113); putManyCells(TABLE_1, 105, 115); putManyCells(TABLE_2, 101, 111); putManyCells(TABLE_2, 104, 114); putManyCells(TABLE_3, 120, 130); try (SingleLockService sweepLocks = backgroundSweeper.createSweepLocks()) { for (int i = 0; i < 50; ++i) { backgroundSweeper.checkConfigAndRunSweep(sweepLocks); } } verifyTableSwept(TABLE_1, 75, true); verifyTableSwept(TABLE_2, 58, false); List<SweepPriority> priorities = txManager.runTaskReadOnly( tx -> SweepPriorityStoreImpl.create(kvs, SweepTableFactory.of(), false).loadNewPriorities(tx)); Assert.assertTrue(priorities.stream().anyMatch(p -> p.tableRef().equals(TABLE_1))); Assert.assertTrue(priorities.stream().anyMatch(p -> p.tableRef().equals(TABLE_2))); }
private Optional<TableToSweep> getTableToSweep() { return specificTableSweeper.getTxManager().runTaskWithRetry( tx -> { Optional<SweepProgress> progress = currentTable.flatMap( tableToSweep -> specificTableSweeper.getSweepProgressStore().loadProgress( tableToSweep.getTableRef())); SweepPriorityOverrideConfig overrideConfig = sweepPriorityOverrideConfig.get(); if (progress.map( realProgress -> shouldContinueSweepingCurrentTable(realProgress, overrideConfig)) .orElse(false)) { try { // If we're here, currentTable exists and we're going to sweep it again this iteration updateProgressAndRefreshLock(progress.get()); return currentTable; } catch (InterruptedException ex) { log.info("Sweep lost the lock for table {}", LoggingArgs.tableRef(progress.get().tableRef())); closeTableLockIfHeld(); currentTable = Optional.empty(); // We'll fall through and choose a new table } } log.info("Sweep is choosing a new table to sweep."); closeTableLockIfHeld(); return getNextTableToSweep(tx, overrideConfig); }); }
@Test public void testMetricsRecordedAfterIncompleteRunForOneIterationOnly() { setNoProgress(); setNextTableToSweep(TABLE_REF); SweepResults intermediateResults = SweepResults.builder() .staleValuesDeleted(2) .cellTsPairsExamined(10) .minSweptTimestamp(12345L) .nextStartRow(Optional.of(new byte[] {1, 2, 3})) .timeInMillis(10L) .timeSweepStarted(20L) .build(); setupTaskRunner(intermediateResults); backgroundSweeper.runOnce(); ArgumentCaptor<Long> sweepTime = ArgumentCaptor.forClass(Long.class); ArgumentCaptor<Long> totalTimeElapsed = ArgumentCaptor.forClass(Long.class); Mockito.verify(sweepMetrics).updateSweepTime( sweepTime.capture(), totalTimeElapsed.capture()); Assertions.assertThat(intermediateResults.getTimeInMillis()).isEqualTo(sweepTime.getValue()); Assertions.assertThat(intermediateResults.getTimeElapsedSinceStartedSweeping()) .isCloseTo(totalTimeElapsed.getValue(), Percentage.withPercentage(5d)); }
@VisibleForTesting SweepOutcome runOnce() { Optional<TableToSweep> tableToSweep = getTableToSweep(); if (!tableToSweep.isPresent()) { // Don't change this log statement. It's parsed by test automation code. log.debug( "Skipping sweep because no table has enough new writes to be worth sweeping at the moment."); return SweepOutcome.NOTHING_TO_SWEEP; } SweepBatchConfig batchConfig = sweepBatchConfigSource.getAdjustedSweepConfig(); try { specificTableSweeper.runOnceAndSaveResults(tableToSweep.get(), batchConfig); return SweepOutcome.SUCCESS; } catch (InsufficientConsistencyException e) { log.info("Could not sweep because not all nodes of the database are online.", e); return SweepOutcome.NOT_ENOUGH_DB_NODES_ONLINE; } catch (RuntimeException e) { specificTableSweeper.updateSweepErrorMetric(); return determineCauseOfFailure(e, tableToSweep.get()); } }
private Optional<TableToSweep> getNextTableToSweep(Transaction tx, SweepPriorityOverrideConfig overrideConfig) { Optional<TableToSweep> nextTableToSweep = nextTableToSweepProvider.getNextTableToSweep( tx, specificTableSweeper.getSweepRunner().getConservativeSweepTimestamp(), overrideConfig); if (nextTableToSweep.isPresent()) { // Check if we're resuming this table after a previous sweep nextTableToSweep = augmentWithProgress(nextTableToSweep.get()); currentTable = nextTableToSweep; } return nextTableToSweep; }
private SweepOutcome determineCauseOfFailure(Exception originalException, TableToSweep tableToSweep) { try { Set<TableReference> tables = specificTableSweeper.getKvs().getAllTableNames(); if (!tables.contains(tableToSweep.getTableRef())) { clearSweepProgress(tableToSweep.getTableRef()); log.info("The table being swept by the background sweeper was dropped, moving on..."); tableToSweep.getSweepLock().close(); return SweepOutcome.TABLE_DROPPED_WHILE_SWEEPING; } log.info("The background sweep job failed unexpectedly; will retry with a lower batch size...", originalException); return SweepOutcome.ERROR; } catch (RuntimeException newE) { log.warn("Sweep failed", originalException); log.warn("Failed to check whether the table being swept was dropped. Retrying...", newE); return SweepOutcome.ERROR; } }
@Test public void smokeTest() throws Exception { createTable(TABLE_1, SweepStrategy.CONSERVATIVE); createTable(TABLE_2, SweepStrategy.THOROUGH); createTable(TABLE_3, SweepStrategy.NOTHING); putManyCells(TABLE_1, 100, 110); putManyCells(TABLE_1, 103, 113); putManyCells(TABLE_1, 105, 115); putManyCells(TABLE_2, 101, 111); putManyCells(TABLE_2, 104, 114); putManyCells(TABLE_3, 120, 130); try (SingleLockService sweepLocks = backgroundSweeper.createSweepLocks()) { for (int i = 0; i < 50; ++i) { backgroundSweeper.checkConfigAndRunSweep(sweepLocks); } } verifyTableSwept(TABLE_1, 75, true); verifyTableSwept(TABLE_2, 58, false); List<SweepPriority> priorities = txManager.runTaskReadOnly( tx -> SweepPriorityStoreImpl.create(kvs, SweepTableFactory.of(), false).loadNewPriorities(tx)); Assert.assertTrue(priorities.stream().anyMatch(p -> p.tableRef().equals(TABLE_1))); Assert.assertTrue(priorities.stream().anyMatch(p -> p.tableRef().equals(TABLE_2))); }
private Optional<TableToSweep> getTableToSweep() { return specificTableSweeper.getTxManager().runTaskWithRetry( tx -> { Optional<SweepProgress> progress = currentTable.flatMap( tableToSweep -> specificTableSweeper.getSweepProgressStore().loadProgress( tableToSweep.getTableRef())); SweepPriorityOverrideConfig overrideConfig = sweepPriorityOverrideConfig.get(); if (progress.map( realProgress -> shouldContinueSweepingCurrentTable(realProgress, overrideConfig)) .orElse(false)) { try { // If we're here, currentTable exists and we're going to sweep it again this iteration updateProgressAndRefreshLock(progress.get()); return currentTable; } catch (InterruptedException ex) { log.info("Sweep lost the lock for table {}", LoggingArgs.tableRef(progress.get().tableRef())); closeTableLockIfHeld(); currentTable = Optional.empty(); // We'll fall through and choose a new table } } log.info("Sweep is choosing a new table to sweep."); closeTableLockIfHeld(); return getNextTableToSweep(tx, overrideConfig); }); }
@Test public void testPutZeroWriteCountAfterFreshIncompleteRun() { setNoProgress(); setNextTableToSweep(TABLE_REF); setupTaskRunner(SweepResults.builder() .staleValuesDeleted(2) .cellTsPairsExamined(10) .minSweptTimestamp(12345L) .nextStartRow(Optional.of(new byte[] {1, 2, 3})) .timeInMillis(10L) .timeSweepStarted(20L) .build()); backgroundSweeper.runOnce(); Mockito.verify(priorityStore).update( any(), eq(TABLE_REF), eq(ImmutableUpdateSweepPriority.builder() .newWriteCount(0L) .build())); }
private void sleepUntilNextRun(SweepOutcome outcome) throws InterruptedException { Duration sleepDuration = getBackoffTimeWhenSweepHasNotRun(); if (outcome == SweepOutcome.SUCCESS) { sleepDuration = Duration.ofMillis(sweepPauseMillis.get()); } else if (outcome == SweepOutcome.NOTHING_TO_SWEEP) { sleepDuration = getBackoffTimeWhenNothingToSweep(); } sleepFor(sleepDuration); }
@VisibleForTesting SweepOutcome checkConfigAndRunSweep(SingleLockService locks) throws InterruptedException { if (isSweepEnabled.get()) { return grabLocksAndRun(locks); } log.debug("Skipping sweep because it is currently disabled."); closeTableLockIfHeld(); return SweepOutcome.DISABLED; }
private void waitUntilSpecificTableSweeperIsInitialized() throws InterruptedException { while (!specificTableSweeper.isInitialized()) { log.info("Sweep Priority Table and Sweep Progress Table are not initialized yet. If you have enabled " + "asynchronous initialization, these tables are being initialized asynchronously. Background " + "sweeper will start once the initialization is complete."); sleepFor(getBackoffTimeWhenSweepHasNotRun()); } }
@VisibleForTesting SweepOutcome runOnce() { Optional<TableToSweep> tableToSweep = getTableToSweep(); if (!tableToSweep.isPresent()) { // Don't change this log statement. It's parsed by test automation code. log.debug( "Skipping sweep because no table has enough new writes to be worth sweeping at the moment."); return SweepOutcome.NOTHING_TO_SWEEP; } SweepBatchConfig batchConfig = sweepBatchConfigSource.getAdjustedSweepConfig(); try { specificTableSweeper.runOnceAndSaveResults(tableToSweep.get(), batchConfig); return SweepOutcome.SUCCESS; } catch (InsufficientConsistencyException e) { log.info("Could not sweep because not all nodes of the database are online.", e); return SweepOutcome.NOT_ENOUGH_DB_NODES_ONLINE; } catch (RuntimeException e) { specificTableSweeper.updateSweepErrorMetric(); return determineCauseOfFailure(e, tableToSweep.get()); } }
private Optional<TableToSweep> getNextTableToSweep(Transaction tx, SweepPriorityOverrideConfig overrideConfig) { Optional<TableToSweep> nextTableToSweep = nextTableToSweepProvider.getNextTableToSweep( tx, specificTableSweeper.getSweepRunner().getConservativeSweepTimestamp(), overrideConfig); if (nextTableToSweep.isPresent()) { // Check if we're resuming this table after a previous sweep nextTableToSweep = augmentWithProgress(nextTableToSweep.get()); currentTable = nextTableToSweep; } return nextTableToSweep; }