From dca8a918f336f23a9f9c0613f28e584eba951edb Mon Sep 17 00:00:00 2001 From: David Turner Date: Wed, 17 Jul 2019 08:34:56 +0100 Subject: [PATCH] Use applied cluster state in cluster health (#44426) In #44348 we changed the cluster health action so that it sometimes uses the cluster state directly from the master service rather than from the cluster applier. If the state is not recovered then this is inappropriate, because prior to state recovery the state available to the cluster applier contains no indices. This commit moves us back to using the state from the applier. Fixes #44416. --- .../health/TransportClusterHealthAction.java | 8 +++++++- .../gateway/GatewayIndexStateIT.java | 15 ++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java b/server/src/main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java index d320ac1b763..06edbad8327 100644 --- a/server/src/main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java +++ b/server/src/main/java/org/elasticsearch/action/admin/cluster/health/TransportClusterHealthAction.java @@ -145,7 +145,13 @@ public class TransportClusterHealthAction extends StreamableTransportMasterNodeR final long timeoutInMillis = Math.max(0, endTimeRelativeMillis - threadPool.relativeTimeInMillis()); final TimeValue newTimeout = TimeValue.timeValueMillis(timeoutInMillis); request.timeout(newTimeout); - executeHealth(request, newState, listener, waitCount, + + // we must use the state from the applier service, because if the state-not-recovered block is in place then the + // applier service has a different view of the cluster state from the one supplied here + final ClusterState appliedState = clusterService.state(); + assert newState.stateUUID().equals(appliedState.stateUUID()) + : newState.stateUUID() + " vs " + appliedState.stateUUID(); + executeHealth(request, appliedState, listener, waitCount, observedState -> waitForEventsAndExecuteHealth(request, listener, waitCount, endTimeRelativeMillis)); } diff --git a/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java b/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java index 1925d15d78e..53d3635ec3f 100644 --- a/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java +++ b/server/src/test/java/org/elasticsearch/gateway/GatewayIndexStateIT.java @@ -211,7 +211,6 @@ public class GatewayIndexStateIT extends ESIntegTestCase { client().prepareIndex("test", "type1", "2").setSource("field1", "value1").execute().actionGet(); } - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/44416") public void testJustMasterNode() throws Exception { logger.info("--> cleaning nodes"); @@ -221,11 +220,13 @@ public class GatewayIndexStateIT extends ESIntegTestCase { logger.info("--> create an index"); client().admin().indices().prepareCreate("test").setWaitForActiveShards(ActiveShardCount.NONE).execute().actionGet(); - logger.info("--> closing master node"); - internalCluster().closeNonSharedNodes(false); - - logger.info("--> starting 1 master node non data again"); - internalCluster().startNode(Settings.builder().put(Node.NODE_DATA_SETTING.getKey(), false).build()); + logger.info("--> restarting master node"); + internalCluster().fullRestart(new RestartCallback(){ + @Override + public Settings onNodeStopped(String nodeName) { + return Settings.builder().put(Node.NODE_DATA_SETTING.getKey(), false).build(); + } + }); logger.info("--> waiting for test index to be created"); ClusterHealthResponse health = client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).setIndices("test") @@ -237,7 +238,7 @@ public class GatewayIndexStateIT extends ESIntegTestCase { assertThat(clusterStateResponse.getState().metaData().hasIndex("test"), equalTo(true)); } - public void testJustMasterNodeAndJustDataNode() throws Exception { + public void testJustMasterNodeAndJustDataNode() { logger.info("--> cleaning nodes"); logger.info("--> starting 1 master node non data");