Fix ConcurrentSnapshotsIT.testMasterFailOverWithQueuedDeletes (#60307) (#60376)

The test assumed that the master fail-over would always work out as a single step.
This is not guaranteed however and we can randomly see master failing over twice,
in which case the transport listener will be failed on the node that stops being
leader and we have to catch an exception for the deletes as well just like we do
for the snapshot.

Closes #60262
This commit is contained in:
Armin Braun 2020-07-29 15:54:00 +02:00 committed by GitHub
parent 1cfdb4fc08
commit 381cec2ba9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 10 additions and 2 deletions

View File

@ -71,6 +71,7 @@ import static org.hamcrest.Matchers.contains;
import static org.hamcrest.Matchers.containsInAnyOrder;
import static org.hamcrest.Matchers.containsString;
import static org.hamcrest.Matchers.empty;
import static org.hamcrest.Matchers.endsWith;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.greaterThan;
import static org.hamcrest.Matchers.hasSize;
@ -468,8 +469,15 @@ public class ConcurrentSnapshotsIT extends AbstractSnapshotIntegTestCase {
unblockNode(repoName, dataNode);
unblockNode(repoName, dataNode2);
assertAcked(firstDeleteFuture.get());
assertAcked(deleteAllSnapshots.get());
for (ActionFuture<AcknowledgedResponse> deleteFuture : Arrays.asList(firstDeleteFuture, deleteAllSnapshots)) {
try {
assertAcked(deleteFuture.actionGet());
} catch (RepositoryException rex) {
// rarely the master node fails over twice when shutting down the initial master and fails the transport listener
assertThat(rex.repository(), is("_all"));
assertThat(rex.getMessage(), endsWith("Failed to update cluster state during repository operation"));
}
}
expectThrows(SnapshotException.class, snapshotThreeFuture::actionGet);
logger.info("--> verify that all snapshots are gone and no more work is left in the cluster state");