From 73d19271a9de503a964fb6b8a158e5a5d883af43 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Fri, 18 Sep 2020 12:20:23 +0200 Subject: [PATCH] Fix Races in testQueuedSnapshotOperationsAndBrokenRepoOnMasterFailOverMultipleRepos (#62431) (#62614) This test (in-part) verifies that snapshot creation is not retried on master fail-over once a snaphot has been started already. Unless we wait for the snapshot creation to show up in the cluster state before failing the master node though, we could run into a race where the snapshot wasn't yet in the cluster state and a retry goes through successfully. --- .../org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java index 538548fc9f0..8f71eadf0b2 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java @@ -784,17 +784,21 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase { blockNodeOnAnyFiles(blockedRepoName, masterNode); final ActionFuture deleteFuture = startDeleteFromNonMasterClient(blockedRepoName, "*"); waitForBlock(masterNode, blockedRepoName, TimeValue.timeValueSeconds(30L)); + awaitNDeletionsInProgress(1); final ActionFuture createBlockedSnapshot = startFullSnapshotFromNonMasterClient(blockedRepoName, "queued-snapshot"); + awaitNSnapshotsInProgress(1); final long generation = getRepositoryData(repoName).getGenId(); blockNodeOnAnyFiles(repoName, masterNode); final ActionFuture snapshotThree = startFullSnapshotFromNonMasterClient(repoName, "snapshot-three"); waitForBlock(masterNode, repoName, TimeValue.timeValueSeconds(30L)); + awaitNSnapshotsInProgress(2); corruptIndexN(repoPath, generation); final ActionFuture snapshotFour = startFullSnapshotFromNonMasterClient(repoName, "snapshot-four"); + awaitNSnapshotsInProgress(3); internalCluster().stopCurrentMasterNode(); ensureStableCluster(3);