Fix testMasterFailoverDuringCloneStep1 (#63580) (#64127)

Assuming the clone failed when the request failed is not sufficient.
There are failure modes where the request fails but the clone still works out
because the data node resent the requeest after the first clone had already been
failed and removed from the cluster state when master was restarted.

Closes #63473
This commit is contained in:
Armin Braun 2020-10-26 09:30:09 +01:00 committed by GitHub
parent bec3eca234
commit bdea16301d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 9 additions and 1 deletions

View File

@ -360,8 +360,9 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
createFullSnapshot(repoName, sourceSnapshot);
blockMasterOnReadIndexMeta(repoName);
final String cloneName = "target-snapshot";
final ActionFuture<AcknowledgedResponse> cloneFuture =
startCloneFromDataNode(repoName, sourceSnapshot, "target-snapshot", testIndex);
startCloneFromDataNode(repoName, sourceSnapshot, cloneName, testIndex);
awaitNumberOfSnapshotsInProgress(1);
final String masterNode = internalCluster().getMasterName();
waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L));
@ -377,6 +378,9 @@ public class CloneSnapshotIT extends AbstractSnapshotIntegTestCase {
awaitNoMoreRunningOperations(internalCluster().getMasterName());
// Check if the clone operation worked out by chance as a result of the clone request being retried because of the master failover
cloneSucceeded = cloneSucceeded ||
getRepositoryData(repoName).getSnapshotIds().stream().anyMatch(snapshotId -> snapshotId.getName().equals(cloneName));
assertAllSnapshotsSuccessful(getRepositoryData(repoName), cloneSucceeded ? 2 : 1);
}

View File

@ -359,6 +359,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
final String repositoryName = request.repository();
final String snapshotName = indexNameExpressionResolver.resolveDateMathExpression(request.snapshot());
validate(repositoryName, snapshotName);
// TODO: create snapshot UUID in CreateSnapshotRequest and make this operation idempotent to cleanly deal with transport layer
// retries
final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID()); // new UUID for the snapshot
Repository repository = repositoriesService.repository(request.repository());
if (repository.isReadOnly()) {
@ -486,6 +488,8 @@ public class SnapshotsService extends AbstractLifecycleComponent implements Clus
}
final String snapshotName = indexNameExpressionResolver.resolveDateMathExpression(request.target());
validate(repositoryName, snapshotName);
// TODO: create snapshot UUID in CloneSnapshotRequest and make this operation idempotent to cleanly deal with transport layer
// retries
final SnapshotId snapshotId = new SnapshotId(snapshotName, UUIDs.randomBase64UUID());
final Snapshot snapshot = new Snapshot(repositoryName, snapshotId);
initializingClones.add(snapshot);