Fix test timeout for health on master failover (#63455)

testHealthOnMasterFailover could timeout on some of the health requests
in the case where an index is added, since the recovery leads to
extended test run time.

Closes #62690
This commit is contained in:
Henning Andersen 2020-10-21 12:12:15 +02:00 committed by Henning Andersen
parent 428fd7218c
commit ddd897f747
1 changed files with 10 additions and 4 deletions

View File

@ -26,6 +26,7 @@ import org.elasticsearch.action.support.IndicesOptions;
import org.elasticsearch.action.support.PlainActionFuture;
import org.elasticsearch.cluster.health.ClusterHealthStatus;
import org.elasticsearch.cluster.metadata.IndexMetadata;
import org.elasticsearch.cluster.routing.UnassignedInfo;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.Priority;
import org.elasticsearch.common.settings.Settings;
@ -308,21 +309,26 @@ public class ClusterHealthIT extends ESIntegTestCase {
}
}
@AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/62690")
public void testHealthOnMasterFailover() throws Exception {
final String node = internalCluster().startDataOnlyNode();
boolean withIndex = randomBoolean();
final boolean withIndex = randomBoolean();
if (withIndex) {
// Create index with many shards to provoke the health request to wait (for green) while master is being shut down.
// Notice that this is set to 0 after the test completed starting a number of health requests and master restarts.
// This ensures that the cluster is yellow when the health request is made, making the health request wait on the observer,
// triggering a call to observer.onClusterServiceClose when master is shutdown.
createIndex("test", Settings.builder().put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, randomIntBetween(0, 10)).build());
createIndex("test",
Settings.builder()
.put(IndexMetadata.SETTING_NUMBER_OF_REPLICAS, randomIntBetween(0, 10))
// avoid full recoveries of index, just wait for replica to reappear
.put(UnassignedInfo.INDEX_DELAYED_NODE_LEFT_TIMEOUT_SETTING.getKey(), "5m")
.build());
}
final List<ActionFuture<ClusterHealthResponse>> responseFutures = new ArrayList<>();
// Run a few health requests concurrent to master fail-overs against a data-node to make sure master failover is handled
// without exceptions
for (int i = 0; i < 20; ++i) {
final int iterations = withIndex ? 10 : 20;
for (int i = 0; i < iterations; ++i) {
responseFutures.add(client(node).admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID)
.setWaitForGreenStatus().setMasterNodeTimeout(TimeValue.timeValueMinutes(1)).execute());
internalCluster().restartNode(internalCluster().getMasterName(), InternalTestCluster.EMPTY_CALLBACK);