From cd2a4372b49aa595b1ad811d00a90a0ffd3e81d2 Mon Sep 17 00:00:00 2001 From: Boaz Leskes Date: Sun, 24 Sep 2017 22:27:32 +0200 Subject: [PATCH] RecoveryIT should wait for green when in mixed cluster to avoid unassigned shards The test starts with two old nodes and creates indices (without waiting for green, which is fixed here too). Then it restarts one of the nodes and waits for it to join the cluster. This wait condition only uses wait for yellow as our generic infra doesn't how many nodes are there in total. Once the restarted node is part of the cluster (mixed mode) the second old node is restarted. If indices are not fully allocated when that happens, the shards will go into delayed unassigned mode. If the recovery of the replica never completed we may end up with corrupted / no secondary copy on the node. This will cause the shards to be delayed for 1m before being reassigned and the test will time out. --- .../test/java/org/elasticsearch/upgrades/RecoveryIT.java | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java index e205751f4ab..1851420e2eb 100644 --- a/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java +++ b/qa/rolling-upgrade/src/test/java/org/elasticsearch/upgrades/RecoveryIT.java @@ -105,6 +105,7 @@ public class RecoveryIT extends ESRestTestCase { .put(IndexMetaData.INDEX_NUMBER_OF_SHARDS_SETTING.getKey(), 1) .put(IndexMetaData.INDEX_NUMBER_OF_REPLICAS_SETTING.getKey(), 1); createIndex(index, settings.build()); + ensureGreen(); } else if (clusterType == CLUSTER_TYPE.UPGRADED) { ensureGreen(); Response response = client().performRequest("GET", index + "/_stats", Collections.singletonMap("level", "shards")); @@ -123,6 +124,11 @@ public class RecoveryIT extends ESRestTestCase { assertThat("different history uuid found for shard on " + nodeID, historyUUID, equalTo(expectHistoryUUID)); } } + } else { + // we are now in mixed cluster mode. we want to make sure the shard is fully allocated on the new node that was just + // started in order not to run into delayed unassigned shards when we bring down the old node (there must be a fully valid + // copy) + ensureGreen(); } }