public MarkSweepGarbageCollector getCollector(long blobGcMaxAgeInSecs) throws Exception { collector = new MarkSweepGarbageCollector(referenceRetriever, blobStore, executor, root.getAbsolutePath(), 2048, blobGcMaxAgeInSecs, repoId, wb, statsProvider); return collector; }
@Override public void collectGarbage(boolean markOnly, boolean forceBlobRetrieve) throws Exception { markAndSweep(markOnly, forceBlobRetrieve); }
mark(fs); if (!markOnly) { long deleteCount = sweep(fs, markStart, forceBlobRetrieve); threw = false; long maxTime = getMaxModifiedTime(markStart) > 0 ? getMaxModifiedTime(markStart) : markStart; sw.stop(); sw.toString(), sw.elapsed(TimeUnit.MILLISECONDS), deleteCount, timestampToString(maxTime));
protected Set<String> executeGarbageCollection(Cluster cluster, MarkSweepGarbageCollector collector, boolean markOnly) throws Exception { collector.collectGarbage(markOnly); assertEquals(0, cluster.executor.getTaskCount()); Set<String> existingAfterGC = iterate(cluster.blobStore); log.info("{} blobs existing after gc : {}", existingAfterGC.size(), existingAfterGC); return existingAfterGC; }
difference(fs); long count = 0; long deleted = 0; long maxModifiedTime = getMaxModifiedTime(earliestRefAvailTime); LOG.debug("Starting sweep phase of the garbage collector"); LOG.debug("Sweeping blobs with modified time > than the configured max deleted time ({}). ", timestampToString(maxModifiedTime)); FileUtils.lineIterator(fs.getGcCandidates(), Charsets.UTF_8.name()); Iterator<List<String>> partitions = Iterators.partition(iterator, getBatchCount()); while (partitions.hasNext()) { List<String> ids = partitions.next(); deleted += BlobCollectionType.get(blobStore) .sweepInternal(blobStore, ids, removesQueue, maxModifiedTime); saveBatchToFile(newArrayList(removesQueue), removesWriter); + "modified time is > " + "than the max deleted time ({})", deleted, count, timestampToString(maxModifiedTime));
@Test public void consistencyCheckOnlyActiveDeletion() throws Exception { Cluster cluster = new Cluster("cluster1"); BlobStore s = cluster.blobStore; BlobIdTracker tracker = (BlobIdTracker) ((BlobTrackingStore) s).getTracker(); DataStoreState state = init(cluster.nodeStore, 0); List<String> addlAdded = doActiveDelete(cluster.nodeStore, (DataStoreBlobStore) cluster.blobStore, tracker, folder,0, 2); List<String> addlPresent = Lists.newArrayList(addlAdded.get(2), addlAdded.get(3)); List<String> activeDeleted = Lists.newArrayList(addlAdded.get(0), addlAdded.get(1)); state.blobsPresent.addAll(addlPresent); state.blobsAdded.addAll(addlPresent); // Since datastore in consistent state and only active deletions the missing list should be empty assertEquals(0, cluster.gc.checkConsistency()); }
@Test public void checkConsistency() throws Exception { log.info("Starting checkConsistency()"); long afterSetupTime = clock.getTime(); log.info("after setup time {}", afterSetupTime); MarkSweepGarbageCollector collector = cluster.getCollector(0); long missing = collector.checkConsistency(); assertEquals(0, missing); assertStats(cluster.statsProvider, 1, 0, 0, 0, CONSISTENCY_NAME); assertStatsBean(collector.getConsistencyOperationStats(), 1, 0, 0); }
@Test public void markAndSweepShouldSucceedWhenAllRepositoriesAreAvailable() throws Exception { setupSharedDataRecords("REPO1", "REPO1"); when(blobStore.getAllChunkIds(0L)).thenReturn(ImmutableList.<String>of().iterator()); collector.markAndSweep(false, true); assertThat(collector.getOperationStats().numDeleted(), is(0L)); assertThat(collector.getOperationStats().getFailureCount(), is(0L)); }
/** * Mark phase of the GC. * @param fs the garbage collector file state */ protected void mark(GarbageCollectorFileState fs) throws IOException, DataStoreException { LOG.debug("Starting mark phase of the garbage collector"); String uniqueSuffix = UUID.randomUUID().toString(); // Create a time marker in the data store if applicable GarbageCollectionType.get(blobStore).addMarkedStartMarker(blobStore, repoId, uniqueSuffix); // Mark all used references iterateNodeTree(fs, false); // Move the marked references file to the data store meta area if applicable GarbageCollectionType.get(blobStore).addMarked(blobStore, fs, repoId, uniqueSuffix); LOG.debug("Ending mark phase of the garbage collector"); }
private Set<String> gcInternal(long maxBlobGcInSecs) throws Exception { ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10); MarkSweepGarbageCollector gc = init(maxBlobGcInSecs, executor); gc.collectGarbage(false); assertEquals(0, executor.getTaskCount()); Set<String> existingAfterGC = iterate(); log.info("{} blobs existing after gc : {}", existingAfterGC.size(), existingAfterGC); return existingAfterGC; }
difference(fs); long count = 0; long deleted = 0; long maxModifiedTime = getMaxModifiedTime(earliestRefAvailTime); LOG.debug("Starting sweep phase of the garbage collector"); LOG.debug("Sweeping blobs with modified time > than the configured max deleted time ({}). ", timestampToString(maxModifiedTime)); FileUtils.lineIterator(fs.getGcCandidates(), Charsets.UTF_8.name()); Iterator<List<String>> partitions = Iterators.partition(iterator, getBatchCount()); while (partitions.hasNext()) { List<String> ids = partitions.next(); deleted += BlobCollectionType.get(blobStore) .sweepInternal(blobStore, ids, removesQueue, maxModifiedTime); saveBatchToFile(newArrayList(removesQueue), removesWriter); + "modified time is > " + "than the max deleted time ({})", deleted, count, timestampToString(maxModifiedTime));
@Test public void consistencyCheckWithRenegadeDelete() throws Exception { DataStoreState state = setUp(); // Simulate faulty state by deleting some blobs directly Random rand = new Random(87); List<String> existing = Lists.newArrayList(state.blobsPresent); long count = blobStore.countDeleteChunks(ImmutableList.of(existing.get(rand.nextInt(existing.size()))), 0); ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10); MarkSweepGarbageCollector gcObj = init(86400, executor); long candidates = gcObj.checkConsistency(); assertEquals(1, executor.getTaskCount()); assertEquals(count, candidates); }
@Test public void checkConsistencyFailure() throws Exception { log.info("Starting checkConsistencyFailure()"); long afterSetupTime = clock.getTime(); log.info("after setup time {}", afterSetupTime); cluster.blobStore .countDeleteChunks(Lists.newArrayList(Iterators.getLast(cluster.blobStoreState.blobsPresent.iterator())), 0); MarkSweepGarbageCollector collector = cluster.getCollector(0); long missing = collector.checkConsistency(); assertEquals(1, missing); assertStats(cluster.statsProvider, 1, 1, 1, 0, CONSISTENCY_NAME); assertStatsBean(collector.getConsistencyOperationStats(), 1, 1, 1); }
@Test public void markAndSweepShouldFailIfNotAllRepositoriesHaveMarkedReferencesAvailable() throws Exception { setupSharedDataRecords("REPO1", "REPO2"); collector.markAndSweep(false, true); assertThat(collector.getOperationStats().numDeleted(), is(0L)); assertThat(collector.getOperationStats().getFailureCount(), is(1L)); }
/** * Mark phase of the GC. * @param fs the garbage collector file state */ protected void mark(GarbageCollectorFileState fs) throws IOException, DataStoreException { LOG.debug("Starting mark phase of the garbage collector"); String uniqueSuffix = UUID.randomUUID().toString(); // Create a time marker in the data store if applicable GarbageCollectionType.get(blobStore).addMarkedStartMarker(blobStore, repoId, uniqueSuffix); // Mark all used references iterateNodeTree(fs, false); // Move the marked references file to the data store meta area if applicable GarbageCollectionType.get(blobStore).addMarked(blobStore, fs, repoId, uniqueSuffix); LOG.debug("Ending mark phase of the garbage collector"); }
if(blobStore instanceof GarbageCollectableBlobStore){ try { blobGC = new MarkSweepGarbageCollector( new DocumentBlobReferenceRetriever(this), (GarbageCollectableBlobStore) blobStore,
private Set<String> gc(int blobGcMaxAgeInSecs) throws Exception { ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10); MarkSweepGarbageCollector gc = init(blobGcMaxAgeInSecs, executor); gc.collectGarbage(false); assertEquals(0, executor.getTaskCount()); return iterate(); }
difference(fs); long count = 0; long deleted = 0; long maxModifiedTime = getMaxModifiedTime(earliestRefAvailTime); LOG.debug("Starting sweep phase of the garbage collector"); LOG.debug("Sweeping blobs with modified time > than the configured max deleted time ({}). ", timestampToString(maxModifiedTime)); FileUtils.lineIterator(fs.getGcCandidates(), Charsets.UTF_8.name()); Iterator<List<String>> partitions = Iterators.partition(iterator, getBatchCount()); while (partitions.hasNext()) { List<String> ids = partitions.next(); deleted += BlobCollectionType.get(blobStore) .sweepInternal(blobStore, ids, removesQueue, maxModifiedTime); saveBatchToFile(newArrayList(removesQueue), removesWriter); + "modified time is > " + "than the max deleted time ({})", deleted, count, timestampToString(maxModifiedTime));
@Test public void consistencyCheckWithRenegadeDelete() throws Exception { DataStoreState state = setUp(true); // Simulate faulty state by deleting some blobs directly Random rand = new Random(87); List<String> existing = Lists.newArrayList(state.blobsPresent); GarbageCollectableBlobStore store = (GarbageCollectableBlobStore) mk.getNodeStore().getBlobStore(); long count = store.countDeleteChunks(ImmutableList.of(existing.get(rand.nextInt(existing.size()))), 0); ThreadPoolExecutor executor = (ThreadPoolExecutor) Executors.newFixedThreadPool(10); MarkSweepGarbageCollector gcObj = init(86400, executor); long candidates = gcObj.checkConsistency(); assertEquals(1, executor.getTaskCount()); assertEquals(count, candidates); }
long markFinish; try { mark(fs); } finally { markFinish = sw.elapsed(TimeUnit.MILLISECONDS); long deleteCount; try { deleteCount = sweep(fs, markStart, forceBlobRetrieve); long maxTime = getMaxModifiedTime(markStart) > 0 ? getMaxModifiedTime(markStart) : markStart; LOG.info("Blob garbage collection completed in {} ({} ms). Number of blobs deleted [{}] with max modification time of [{}]", sw.toString(), sw.elapsed(TimeUnit.MILLISECONDS), deleteCount, timestampToString(maxTime));