Fix Races in testQueuedSnapshotOperationsAndBrokenRepoOnMasterFailOverMultipleRepos (#62431) (#62614)

This test (in-part) verifies that snapshot creation is not retried on master fail-over once a snaphot has been started already. Unless we wait for the snapshot creation to show up in the cluster state before failing the master node though, we could run into a race where the snapshot wasn't yet in the cluster state and a retry goes through successfully.
2020-09-18 12:20:23 +02:00 · 2020-09-18 12:20:23 +02:00 · 73d19271a9
parent d87268a264
commit 73d19271a9
1 changed files with 4 additions and 0 deletions
--- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java
+++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java
@ -784,17 +784,21 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
        blockNodeOnAnyFiles(blockedRepoName, masterNode);
        final ActionFuture<AcknowledgedResponse> deleteFuture = startDeleteFromNonMasterClient(blockedRepoName, "*");
        waitForBlock(masterNode, blockedRepoName, TimeValue.timeValueSeconds(30L));
+        awaitNDeletionsInProgress(1);
        final ActionFuture<CreateSnapshotResponse> createBlockedSnapshot =
            startFullSnapshotFromNonMasterClient(blockedRepoName, "queued-snapshot");
+        awaitNSnapshotsInProgress(1);

        final long generation = getRepositoryData(repoName).getGenId();
        blockNodeOnAnyFiles(repoName, masterNode);
        final ActionFuture<CreateSnapshotResponse> snapshotThree = startFullSnapshotFromNonMasterClient(repoName, "snapshot-three");
        waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L));
+        awaitNSnapshotsInProgress(2);

        corruptIndexN(repoPath, generation);

        final ActionFuture<CreateSnapshotResponse> snapshotFour = startFullSnapshotFromNonMasterClient(repoName, "snapshot-four");
+        awaitNSnapshotsInProgress(3);
        internalCluster().stopCurrentMasterNode();
        ensureStableCluster(3);