Fix Cluster Stabilization in SnapshotResiliencyTests (#55159) (#55168)

Just like in `AbstractCoordinatorTestCase` we can't just assume the cluster is stable once all the cluster states align since stray follower/leader check tasks could still hit us after a disconnect, causing future test operations to fail. => fixed by running all tasks in the possible time span of running into these checks before validating that cluster states align on all nodes to prevent this like we do in the coordinator tests. Closes #55103
2025-02-24 22:09:24 +00:00 · 2020-04-14 19:22:26 +02:00 · 2020-04-14 19:22:26 +02:00 · f7467a7fe8
commit f7467a7fe8
parent 7f35b927d1
1 changed files with 6 additions and 0 deletions
--- a/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java
+++ b/server/src/test/java/org/elasticsearch/snapshots/SnapshotResiliencyTests.java
@ -98,6 +98,7 @@ import org.elasticsearch.cluster.SnapshotsInProgress;
 import org.elasticsearch.cluster.action.index.MappingUpdatedAction;
 import org.elasticsearch.cluster.action.index.NodeMappingRefreshAction;
 import org.elasticsearch.cluster.action.shard.ShardStateAction;
+import org.elasticsearch.cluster.coordination.AbstractCoordinatorTestCase;
 import org.elasticsearch.cluster.coordination.ClusterBootstrapService;
 import org.elasticsearch.cluster.coordination.CoordinationMetadata.VotingConfiguration;
 import org.elasticsearch.cluster.coordination.CoordinationState;
@ -910,6 +911,11 @@ public class SnapshotResiliencyTests extends ESTestCase {
    }

    private void stabilize() {
+        final long endTime = deterministicTaskQueue.getCurrentTimeMillis() + AbstractCoordinatorTestCase.DEFAULT_STABILISATION_TIME;
+        while (deterministicTaskQueue.getCurrentTimeMillis() < endTime) {
+            deterministicTaskQueue.advanceTime();
+            deterministicTaskQueue.runAllRunnableTasks();
+        }
        runUntil(
            () -> {
                final Collection<ClusterState> clusterStates =