From cc1f40ca18e0bca3b1446c93cde8ff1cac9f9f99 Mon Sep 17 00:00:00 2001 From: Ali Beyad Date: Thu, 6 Jul 2017 14:34:14 -0400 Subject: [PATCH] Fix cluster health wait conditions in rolling restart tests In the rolling upgrade tests, there is a test to create an index with replica shards and ensure that in the mixed cluster environment, the cluster health is green before any other tests are executed. However, there were two problems with this. First, if the replica shard was residing on the restarted node, then delayed allocation will kick in and cause the cluster health request to timeout after 1m. The fix to this was to drastically lower the delayed allocation setting. Second, if the primary exists on the higher version node, then the replica cannot be assigned to the lower version node because recovery cannot happen from lower lucene versions. The fix here was to wait for the cluster health to be yellow instead of green in the mixed cluster environment. In the fully upgraded cluster, the cluster health check waits for a green cluster as before. Closes #25185 --- .../resources/rest-api-spec/test/mixed_cluster/10_basic.yml | 2 +- .../resources/rest-api-spec/test/old_cluster/10_basic.yml | 6 +++++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/10_basic.yml b/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/10_basic.yml index 14282e6a00f..09124e7b8cb 100644 --- a/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/10_basic.yml +++ b/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/mixed_cluster/10_basic.yml @@ -2,7 +2,7 @@ "Index data and search on the mixed cluster": - do: cluster.health: - wait_for_status: green + wait_for_status: yellow wait_for_nodes: 2 - do: diff --git a/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/old_cluster/10_basic.yml b/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/old_cluster/10_basic.yml index e5312160796..cb24cdbdbdb 100644 --- a/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/old_cluster/10_basic.yml +++ b/qa/rolling-upgrade/src/test/resources/rest-api-spec/test/old_cluster/10_basic.yml @@ -33,7 +33,11 @@ - do: indices.create: index: index_with_replicas # dummy index to ensure we can recover indices with replicas just fine - + body: + # if the node with the replica is the first to be restarted, then delayed + # allocation will kick in, and the cluster health won't return to GREEN + # before timing out + index.unassigned.node_left.delayed_timeout: "100ms" - do: bulk: refresh: true