Fix ConcurrentSnapshotsIT.testMasterFailOverWithQueuedDeletes (#60307) (#60376)

The test assumed that the master fail-over would always work out as a single step. This is not guaranteed however and we can randomly see master failing over twice, in which case the transport listener will be failed on the node that stops being leader and we have to catch an exception for the deletes as well just like we do for the snapshot. Closes #60262
2020-07-29 15:54:00 +02:00 · 2020-07-29 15:54:00 +02:00 · 381cec2ba9
parent 1cfdb4fc08
commit 381cec2ba9
1 changed files with 10 additions and 2 deletions
--- a/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java
+++ b/server/src/internalClusterTest/java/org/elasticsearch/snapshots/ConcurrentSnapshotsIT.java
@ -71,6 +71,7 @@ import static org.hamcrest.Matchers.contains;
 import static org.hamcrest.Matchers.containsInAnyOrder;
 import static org.hamcrest.Matchers.containsString;
 import static org.hamcrest.Matchers.empty;
+import static org.hamcrest.Matchers.endsWith;
 import static org.hamcrest.Matchers.equalTo;
 import static org.hamcrest.Matchers.greaterThan;
 import static org.hamcrest.Matchers.hasSize;
@ -468,8 +469,15 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
        unblockNode(repoName, dataNode);
        unblockNode(repoName, dataNode2);

-        assertAcked(firstDeleteFuture.get());
-        assertAcked(deleteAllSnapshots.get());
+        for (ActionFuture<AcknowledgedResponse> deleteFuture : Arrays.asList(firstDeleteFuture, deleteAllSnapshots)) {
+            try {
+                assertAcked(deleteFuture.actionGet());
+            } catch (RepositoryException rex) {
+                // rarely the master node fails over twice when shutting down the initial master and fails the transport listener
+                assertThat(rex.repository(), is("_all"));
+                assertThat(rex.getMessage(), endsWith("Failed to update cluster state during repository operation"));
+            }
+        }
        expectThrows(SnapshotException.class, snapshotThreeFuture::actionGet);

        logger.info("--> verify that all snapshots are gone and no more work is left in the cluster state");