Fix Noisy Logging during Snapshot Delete (#56264) (#56329)

We were logging the cleanup of the snap- and meta- blobs for every snapshot delete
which is needlessly noisy and confusing to users. We should only log actual stale/unexpected
blobs here.
This commit is contained in:
Armin Braun 2020-05-07 13:48:53 +02:00 committed by GitHub
parent 60b6d4eddc
commit 3bad5b3c01
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 32 additions and 16 deletions

View File

@ -667,7 +667,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
// Run unreferenced blobs cleanup in parallel to shard-level snapshot deletion
final ActionListener<Void> afterCleanupsListener =
new GroupedActionListener<>(ActionListener.wrap(() -> listener.onResponse(null)), 2);
asyncCleanupUnlinkedRootAndIndicesBlobs(foundIndices, rootBlobs, updatedRepoData, afterCleanupsListener);
asyncCleanupUnlinkedRootAndIndicesBlobs(snapshotIds, foundIndices, rootBlobs, updatedRepoData, afterCleanupsListener);
asyncCleanupUnlinkedShardLevelBlobs(snapshotIds, writeShardMetaDataAndComputeDeletesStep.result(), afterCleanupsListener);
}, listener::onFailure);
} else {
@ -677,7 +677,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
// Run unreferenced blobs cleanup in parallel to shard-level snapshot deletion
final ActionListener<Void> afterCleanupsListener =
new GroupedActionListener<>(ActionListener.wrap(() -> listener.onResponse(null)), 2);
asyncCleanupUnlinkedRootAndIndicesBlobs(foundIndices, rootBlobs, updatedRepoData, afterCleanupsListener);
asyncCleanupUnlinkedRootAndIndicesBlobs(snapshotIds, foundIndices, rootBlobs, updatedRepoData, afterCleanupsListener);
final StepListener<Collection<ShardSnapshotMetaDeleteResult>> writeMetaAndComputeDeletesStep = new StepListener<>();
writeUpdatedShardMetaDataAndComputeDeletes(snapshotIds, repositoryData, false, writeMetaAndComputeDeletesStep);
writeMetaAndComputeDeletesStep.whenComplete(deleteResults ->
@ -687,11 +687,12 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
}
}
private void asyncCleanupUnlinkedRootAndIndicesBlobs(Map<String, BlobContainer> foundIndices, Map<String, BlobMetadata> rootBlobs,
RepositoryData updatedRepoData, ActionListener<Void> listener) {
private void asyncCleanupUnlinkedRootAndIndicesBlobs(Collection<SnapshotId> deletedSnapshots, Map<String, BlobContainer> foundIndices,
Map<String, BlobMetadata> rootBlobs, RepositoryData updatedRepoData,
ActionListener<Void> listener) {
threadPool.executor(ThreadPool.Names.SNAPSHOT).execute(ActionRunnable.wrap(
listener,
l -> cleanupStaleBlobs(foundIndices, rootBlobs, updatedRepoData, ActionListener.map(l, ignored -> null))));
l -> cleanupStaleBlobs(deletedSnapshots, foundIndices, rootBlobs, updatedRepoData, ActionListener.map(l, ignored -> null))));
}
private void asyncCleanupUnlinkedShardLevelBlobs(Collection<SnapshotId> snapshotIds,
@ -820,13 +821,16 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
* snapshots. This method is only to be called directly after a new {@link RepositoryData} was written to the repository and with
* parameters {@code foundIndices}, {@code rootBlobs}
*
* @param foundIndices all indices blob containers found in the repository before {@code newRepoData} was written
* @param rootBlobs all blobs found directly under the repository root
* @param newRepoData new repository data that was just written
* @param listener listener to invoke with the combined {@link DeleteResult} of all blobs removed in this operation
* @param deletedSnapshots if this method is called as part of a delete operation, the snapshot ids just deleted or empty if called as
* part of a repository cleanup
* @param foundIndices all indices blob containers found in the repository before {@code newRepoData} was written
* @param rootBlobs all blobs found directly under the repository root
* @param newRepoData new repository data that was just written
* @param listener listener to invoke with the combined {@link DeleteResult} of all blobs removed in this operation
*/
private void cleanupStaleBlobs(Map<String, BlobContainer> foundIndices, Map<String, BlobMetadata> rootBlobs,
RepositoryData newRepoData, ActionListener<DeleteResult> listener) {
private void cleanupStaleBlobs(Collection<SnapshotId> deletedSnapshots, Map<String, BlobContainer> foundIndices,
Map<String, BlobMetadata> rootBlobs, RepositoryData newRepoData,
ActionListener<DeleteResult> listener) {
final GroupedActionListener<DeleteResult> groupedListener = new GroupedActionListener<>(ActionListener.wrap(deleteResults -> {
DeleteResult deleteResult = DeleteResult.ZERO;
for (DeleteResult result : deleteResults) {
@ -837,7 +841,7 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
final Executor executor = threadPool.executor(ThreadPool.Names.SNAPSHOT);
executor.execute(ActionRunnable.supply(groupedListener, () -> {
List<String> deletedBlobs = cleanupStaleRootFiles(staleRootBlobs(newRepoData, rootBlobs.keySet()));
List<String> deletedBlobs = cleanupStaleRootFiles(deletedSnapshots, staleRootBlobs(newRepoData, rootBlobs.keySet()));
return new DeleteResult(deletedBlobs.size(), deletedBlobs.stream().mapToLong(name -> rootBlobs.get(name).length()).sum());
}));
@ -874,8 +878,8 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
} else {
// write new index-N blob to ensure concurrent operations will fail
writeIndexGen(repositoryData, repositoryStateId, SnapshotsService.useShardGenerations(repositoryMetaVersion),
Function.identity(), ActionListener.wrap(v -> cleanupStaleBlobs(foundIndices, rootBlobs, repositoryData,
ActionListener.map(listener, RepositoryCleanupResult::new)), listener::onFailure));
Function.identity(), ActionListener.wrap(v -> cleanupStaleBlobs(Collections.emptyList(), foundIndices, rootBlobs,
repositoryData, ActionListener.map(listener, RepositoryCleanupResult::new)), listener::onFailure));
}
} catch (Exception e) {
listener.onFailure(e);
@ -912,12 +916,24 @@ public abstract class BlobStoreRepository extends AbstractLifecycleComponent imp
).collect(Collectors.toList());
}
private List<String> cleanupStaleRootFiles(List<String> blobsToDelete) {
private List<String> cleanupStaleRootFiles(Collection<SnapshotId> deletedSnapshots, List<String> blobsToDelete) {
if (blobsToDelete.isEmpty()) {
return blobsToDelete;
}
try {
logger.info("[{}] Found stale root level blobs {}. Cleaning them up", metadata.name(), blobsToDelete);
if (logger.isInfoEnabled()) {
// If we're running root level cleanup as part of a snapshot delete we should not log the snapshot- and global metadata
// blobs associated with the just deleted snapshots as they are expected to exist and not stale. Otherwise every snapshot
// delete would also log a confusing INFO message about "stale blobs".
final Set<String> blobNamesToIgnore = deletedSnapshots.stream().flatMap(
snapshotId -> Stream.of(globalMetadataFormat.blobName(snapshotId.getUUID()),
snapshotFormat.blobName(snapshotId.getUUID()))).collect(Collectors.toSet());
final List<String> blobsToLog = blobsToDelete.stream().filter(b -> blobNamesToIgnore.contains(b) == false)
.collect(Collectors.toList());
if (blobsToLog.isEmpty() == false) {
logger.info("[{}] Found stale root level blobs {}. Cleaning them up", metadata.name(), blobsToLog);
}
}
blobContainer().deleteBlobsIgnoringIfNotExists(blobsToDelete);
return blobsToDelete;
} catch (IOException e) {