This fixes the behavior of the snapshot state machine in the following edge case: 1. Snapshot is running 2. Delete/abort for the snapshot is started 3. Snapshot fails to finalize We were not removing the failed snapshot id from the list of snapshots to delete in the delete. This lead to an error in the repository, which throws if we try to delete a non-existing snapshot. This commmit updates the deletions in progress by removing the failed snapshot id. The fact that this could lead to snapshot delete entries without any snapshot ids is not optimized on purpose because it allows for another attempt at writing clean `RepositoryData` and will run basic cleanup on the repository (root level blobs and stale indices) and thus bring the repository back into a clean state after a failed finalization. Closes #60274
This commit is contained in:
parent
381cec2ba9
commit
8429b4ace8
|
@ -1172,6 +1172,22 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
|
|||
assertThat(repositoryData.shardGenerations(), is(ShardGenerations.EMPTY));
|
||||
}
|
||||
|
||||
public void testQueuedDeleteAfterFinalizationFailure() throws Exception {
|
||||
final String masterNode = internalCluster().startMasterOnlyNode();
|
||||
final String repoName = "test-repo";
|
||||
createRepository(repoName, "mock");
|
||||
blockMasterFromFinalizingSnapshotOnIndexFile(repoName);
|
||||
final String snapshotName = "snap-1";
|
||||
final ActionFuture<CreateSnapshotResponse> snapshotFuture = startFullSnapshot(repoName, snapshotName);
|
||||
waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L));
|
||||
final ActionFuture<AcknowledgedResponse> deleteFuture = startDelete(repoName, snapshotName);
|
||||
awaitNDeletionsInProgress(1);
|
||||
unblockNode(repoName, masterNode);
|
||||
assertAcked(deleteFuture.get());
|
||||
final SnapshotException sne = expectThrows(SnapshotException.class, snapshotFuture::actionGet);
|
||||
assertThat(sne.getCause().getMessage(), containsString("exception after block"));
|
||||
}
|
||||
|
||||
private static String startDataNodeWithLargeSnapshotPool() {
|
||||
return internalCluster().startDataOnlyNode(LARGE_SNAPSHOT_POOL_SETTINGS);
|
||||
}
|
||||
|
|
|
@ -1437,7 +1437,11 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
|||
|
||||
@Override
|
||||
public ClusterState execute(ClusterState currentState) {
|
||||
return stateWithoutSnapshot(currentState, snapshot);
|
||||
final ClusterState updatedState = stateWithoutSnapshot(currentState, snapshot);
|
||||
// now check if there are any delete operations that refer to the just failed snapshot and remove the snapshot from them
|
||||
return updateWithSnapshots(updatedState, null, deletionsWithoutSnapshots(
|
||||
updatedState.custom(SnapshotDeletionsInProgress.TYPE, SnapshotDeletionsInProgress.EMPTY),
|
||||
Collections.singletonList(snapshot.getSnapshotId()), snapshot.getRepository()));
|
||||
}
|
||||
|
||||
@Override
|
||||
|
@ -1473,6 +1477,36 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
|||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove the given {@link SnapshotId}s for the given {@code repository} from an instance of {@link SnapshotDeletionsInProgress}.
|
||||
* If no deletion contained any of the snapshot ids to remove then return {@code null}.
|
||||
*
|
||||
* @param deletions snapshot deletions to update
|
||||
* @param snapshotIds snapshot ids to remove
|
||||
* @param repository repository that the snapshot ids belong to
|
||||
* @return updated {@link SnapshotDeletionsInProgress} or {@code null} if unchanged
|
||||
*/
|
||||
@Nullable
|
||||
private static SnapshotDeletionsInProgress deletionsWithoutSnapshots(SnapshotDeletionsInProgress deletions,
|
||||
Collection<SnapshotId> snapshotIds, String repository) {
|
||||
boolean changed = false;
|
||||
List<SnapshotDeletionsInProgress.Entry> updatedEntries = new ArrayList<>(deletions.getEntries().size());
|
||||
for (SnapshotDeletionsInProgress.Entry entry : deletions.getEntries()) {
|
||||
if (entry.repository().equals(repository)) {
|
||||
final List<SnapshotId> updatedSnapshotIds = new ArrayList<>(entry.getSnapshots());
|
||||
if (updatedSnapshotIds.removeAll(snapshotIds)) {
|
||||
changed = true;
|
||||
updatedEntries.add(entry.withSnapshots(updatedSnapshotIds));
|
||||
} else {
|
||||
updatedEntries.add(entry);
|
||||
}
|
||||
} else {
|
||||
updatedEntries.add(entry);
|
||||
}
|
||||
}
|
||||
return changed ? SnapshotDeletionsInProgress.of(updatedEntries) : null;
|
||||
}
|
||||
|
||||
private void failSnapshotCompletionListeners(Snapshot snapshot, Exception e) {
|
||||
endingSnapshots.remove(snapshot);
|
||||
failListenersIgnoringException(snapshotCompletionListeners.remove(snapshot), e);
|
||||
|
@ -1968,22 +2002,9 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
|
|||
clusterStateUpdateTask = new RemoveSnapshotDeletionAndContinueTask(deleteEntry, repositoryData) {
|
||||
@Override
|
||||
protected SnapshotDeletionsInProgress filterDeletions(SnapshotDeletionsInProgress deletions) {
|
||||
boolean changed = false;
|
||||
List<SnapshotDeletionsInProgress.Entry> updatedEntries = new ArrayList<>(deletions.getEntries().size());
|
||||
for (SnapshotDeletionsInProgress.Entry entry : deletions.getEntries()) {
|
||||
if (entry.repository().equals(deleteEntry.repository())) {
|
||||
final List<SnapshotId> updatedSnapshotIds = new ArrayList<>(entry.getSnapshots());
|
||||
if (updatedSnapshotIds.removeAll(deleteEntry.getSnapshots())) {
|
||||
changed = true;
|
||||
updatedEntries.add(entry.withSnapshots(updatedSnapshotIds));
|
||||
} else {
|
||||
updatedEntries.add(entry);
|
||||
}
|
||||
} else {
|
||||
updatedEntries.add(entry);
|
||||
}
|
||||
}
|
||||
return changed ? SnapshotDeletionsInProgress.of(updatedEntries) : deletions;
|
||||
final SnapshotDeletionsInProgress updatedDeletions =
|
||||
deletionsWithoutSnapshots(deletions, deleteEntry.getSnapshots(), deleteEntry.repository());
|
||||
return updatedDeletions == null ? deletions : updatedDeletions;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
Loading…
Reference in New Issue