SweepResults runOneIteration(TableReference tableRef, byte[] startRow, SweepBatchConfig batchConfig) { try { SweepResults results = sweepRunner.run(tableRef, batchConfig, startRow); logSweepPerformance(tableRef, startRow, results); return results; } catch (RuntimeException e) { // This error may be logged on some paths above, but I prefer to log defensively. logSweepError(tableRef, startRow, batchConfig, e); throw e; } }
@Provides @Singleton public SweepTaskRunner provideSweepTaskRunner(SerializableTransactionManager txm, @Named("kvs") KeyValueService kvs, TransactionService transactionService, SweepStrategyManager sweepStrategyManager, Follower follower, PersistentLockManager persistentLockManager, ServicesConfig config) { LongSupplier unreadable = unreadableTs.orElse(txm::getUnreadableTimestamp); LongSupplier immutable = immutableTs.orElse(txm::getImmutableTimestamp); return new SweepTaskRunner( kvs, unreadable, immutable, transactionService, sweepStrategyManager, new CellsSweeper( txm, kvs, persistentLockManager, ImmutableList.of(follower))); }
private SweepResults runInternal( TableReference tableRef, SweepBatchConfig batchConfig, byte[] startRow, RunType runType) { Preconditions.checkNotNull(tableRef, "tableRef cannot be null"); Preconditions.checkState(!AtlasDbConstants.HIDDEN_TABLES.contains(tableRef)); if (tableRef.getQualifiedName().startsWith(AtlasDbConstants.NAMESPACE_PREFIX)) { // this happens sometimes; I think it's because some places in the code can // start this sweeper without doing the full normally ordered KVSModule startup. // I did check and sweep.stats did contain the FQ table name for all of the tables, // so it is at least broken in some way that still allows namespaced tables to eventually be swept. log.warn("The sweeper should not be run on tables passed through namespace mapping."); return SweepResults.createEmptySweepResultWithNoMoreToSweep(); } if (keyValueService.getMetadataForTable(tableRef).length == 0) { log.warn("The sweeper tried to sweep table '{}', but the table does not exist. Skipping table.", LoggingArgs.tableRef("tableRef", tableRef)); return SweepResults.createEmptySweepResultWithNoMoreToSweep(); } SweepStrategy sweepStrategy = sweepStrategyManager.get().getOrDefault(tableRef, SweepStrategy.CONSERVATIVE); Optional<Sweeper> maybeSweeper = Sweeper.of(sweepStrategy); return maybeSweeper.map(sweeper -> doRun(tableRef, batchConfig, startRow, runType, sweeper)) .orElseGet(SweepResults::createEmptySweepResultWithNoMoreToSweep); }
tableRef, request)) { ExaminedCellLimit limit = new ExaminedCellLimit(startRow, batchConfig.maxCellTsPairsToExamine()); Iterator<BatchOfCellsToSweep> batchesToSweep = getBatchesToSweep( candidates, batchConfig, sweepableCellFilter, limit); long totalCellTsPairsExamined = 0; long cellsDeleted = sweepBatch(tableRef, batch.cells(), runType, 2 * batchConfig.deleteBatchSize()); totalCellTsPairsDeleted += cellsDeleted;
public SweepResults dryRun(TableReference tableRef, SweepBatchConfig batchConfig, byte[] startRow) { return runInternal(tableRef, batchConfig, startRow, RunType.DRY); }
private Optional<TableToSweep> getNextTableToSweep(Transaction tx, SweepPriorityOverrideConfig overrideConfig) { Optional<TableToSweep> nextTableToSweep = nextTableToSweepProvider.getNextTableToSweep( tx, specificTableSweeper.getSweepRunner().getConservativeSweepTimestamp(), overrideConfig); if (nextTableToSweep.isPresent()) { // Check if we're resuming this table after a previous sweep nextTableToSweep = augmentWithProgress(nextTableToSweep.get()); currentTable = nextTableToSweep; } return nextTableToSweep; }
tableRef, request)) { ExaminedCellLimit limit = new ExaminedCellLimit(startRow, batchConfig.maxCellTsPairsToExamine()); Iterator<BatchOfCellsToSweep> batchesToSweep = getBatchesToSweep( candidates, batchConfig, sweepableCellFilter, limit); long totalCellTsPairsExamined = 0; long cellsDeleted = sweepBatch(tableRef, batch.cells(), runType, 2 * batchConfig.deleteBatchSize()); totalCellTsPairsDeleted += cellsDeleted;
public SweepResults run(TableReference tableRef, SweepBatchConfig batchConfig, byte[] startRow) { return runInternal(tableRef, batchConfig, startRow, RunType.FULL); }
private Optional<TableToSweep> getNextTableToSweep(Transaction tx, SweepPriorityOverrideConfig overrideConfig) { Optional<TableToSweep> nextTableToSweep = nextTableToSweepProvider.getNextTableToSweep( tx, specificTableSweeper.getSweepRunner().getConservativeSweepTimestamp(), overrideConfig); if (nextTableToSweep.isPresent()) { // Check if we're resuming this table after a previous sweep nextTableToSweep = augmentWithProgress(nextTableToSweep.get()); currentTable = nextTableToSweep; } return nextTableToSweep; }
protected void setupTaskRunner(TableReference tableRef, SweepResults results) { doReturn(results).when(sweepTaskRunner).run(eq(tableRef), any(), any()); }
@Provides @Singleton public SweepTaskRunner provideSweepTaskRunner(SerializableTransactionManager txm, @Named("kvs") KeyValueService kvs, TransactionService transactionService, SweepStrategyManager sweepStrategyManager, Follower follower, PersistentLockManager persistentLockManager, ServicesConfig config) { return new SweepTaskRunner( kvs, txm::getUnreadableTimestamp, txm::getImmutableTimestamp, transactionService, sweepStrategyManager, new CellsSweeper( txm, kvs, persistentLockManager, ImmutableList.of(follower))); }
public SweepResults dryRun(TableReference tableRef, SweepBatchConfig batchConfig, byte[] startRow) { return runInternal(tableRef, batchConfig, startRow, RunType.DRY); }
private SweepResults runInternal( TableReference tableRef, SweepBatchConfig batchConfig, byte[] startRow, RunType runType) { Preconditions.checkNotNull(tableRef, "tableRef cannot be null"); Preconditions.checkState(!AtlasDbConstants.HIDDEN_TABLES.contains(tableRef)); if (tableRef.getQualifiedName().startsWith(AtlasDbConstants.NAMESPACE_PREFIX)) { // this happens sometimes; I think it's because some places in the code can // start this sweeper without doing the full normally ordered KVSModule startup. // I did check and sweep.stats did contain the FQ table name for all of the tables, // so it is at least broken in some way that still allows namespaced tables to eventually be swept. log.warn("The sweeper should not be run on tables passed through namespace mapping."); return SweepResults.createEmptySweepResultWithNoMoreToSweep(); } if (keyValueService.getMetadataForTable(tableRef).length == 0) { log.warn("The sweeper tried to sweep table '{}', but the table does not exist. Skipping table.", LoggingArgs.tableRef("tableRef", tableRef)); return SweepResults.createEmptySweepResultWithNoMoreToSweep(); } SweepStrategy sweepStrategy = sweepStrategyManager.get().getOrDefault(tableRef, SweepStrategy.CONSERVATIVE); Optional<Sweeper> maybeSweeper = Sweeper.of(sweepStrategy); return maybeSweeper.map(sweeper -> doRun(tableRef, batchConfig, startRow, runType, sweeper)) .orElseGet(SweepResults::createEmptySweepResultWithNoMoreToSweep); }
@SuppressWarnings("unchecked") private Pair<List<List<Cell>>, SweepResults> runSweep(CellsSweeper cellsSweeper, SweepTaskRunner spiedSweepRunner, int maxCellTsPairsToExamine, int candidateBatchSize, int deleteBatchSize) { sweepTimestamp.set(Long.MAX_VALUE); List<List<Cell>> sweptCells = Lists.newArrayList(); doAnswer((invocationOnMock) -> { Object[] arguments = invocationOnMock.getArguments(); Collection<Cell> sentinelsToAdd = (Collection<Cell>) arguments[2]; sweptCells.add(new ArrayList(sentinelsToAdd)); return null; }).when(cellsSweeper).sweepCells(eq(TABLE_NAME), any(), any()); SweepResults sweepResults = spiedSweepRunner.run(TABLE_NAME, ImmutableSweepBatchConfig.builder() .maxCellTsPairsToExamine(maxCellTsPairsToExamine) .candidateBatchSize(candidateBatchSize) .deleteBatchSize(deleteBatchSize) .build(), PtBytes.EMPTY_BYTE_ARRAY); return new Pair(sweptCells, sweepResults); }
@Before public void setup() { super.setup(); tsSupplier = sweepTimestamp::get; CellsSweeper cellsSweeper = new CellsSweeper(txManager, kvs, persistentLockManager, ImmutableList.of()); sweepRunner = new SweepTaskRunner(kvs, tsSupplier, tsSupplier, txService, ssm, cellsSweeper); }
public SweepResults run(TableReference tableRef, SweepBatchConfig batchConfig, byte[] startRow) { return runInternal(tableRef, batchConfig, startRow, RunType.FULL); }
SweepResults sweepTable(TableReference table) { SweepBatchConfig sweepConfig = ImmutableSweepBatchConfig.builder() .candidateBatchSize(AtlasDbConstants.DEFAULT_SWEEP_CANDIDATE_BATCH_HINT) .deleteBatchSize(AtlasDbConstants.DEFAULT_SWEEP_DELETE_BATCH_HINT) .maxCellTsPairsToExamine(AtlasDbConstants.DEFAULT_SWEEP_READ_LIMIT) .build(); return sweepTaskRunner.get().run(table, sweepConfig, PtBytes.EMPTY_BYTE_ARRAY); }
@Test(timeout = 50000) public void testSweepBatchesDownToDeleteBatchSize() { CellsSweeper cellsSweeper = Mockito.mock(CellsSweeper.class); SweepTaskRunner spiedSweepRunner = new SweepTaskRunner(kvs, tsSupplier, tsSupplier, txService, ssm, cellsSweeper); putTwoValuesInEachCell(SMALL_LIST_OF_CELLS); int deleteBatchSize = 1; Pair<List<List<Cell>>, SweepResults> sweptCellsAndSweepResults = runSweep(cellsSweeper, spiedSweepRunner, 8, 8, deleteBatchSize); List<List<Cell>> sweptCells = sweptCellsAndSweepResults.getLhSide(); assertThat(sweptCells).allMatch(list -> list.size() <= 2 * deleteBatchSize); assertThat(Iterables.concat(sweptCells)).containsExactlyElementsOf(SMALL_LIST_OF_CELLS); }