diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelper.java b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelper.java index a4f31fb3ab1..ee9d3e0468d 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelper.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelper.java @@ -31,6 +31,7 @@ import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; import org.elasticsearch.common.unit.TimeValue; import org.elasticsearch.common.util.concurrent.AbstractRunnable; +import org.elasticsearch.gateway.GatewayMetaState; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.threadpool.ThreadPool.Names; @@ -210,7 +211,12 @@ public class ClusterFormationFailureHelper { assert requiredNodes <= realNodeIds.size() : nodeIds; if (nodeIds.size() == 1) { - return "a node with id " + realNodeIds; + if (nodeIds.contains(GatewayMetaState.STALE_STATE_CONFIG_NODE_ID)) { + return "one or more nodes that have already participated as master-eligible nodes in the cluster but this node was " + + "not master-eligible the last time it joined the cluster"; + } else { + return "a node with id " + realNodeIds; + } } else if (nodeIds.size() == 2) { return "two nodes with ids " + realNodeIds; } else { diff --git a/server/src/main/java/org/elasticsearch/gateway/GatewayMetaState.java b/server/src/main/java/org/elasticsearch/gateway/GatewayMetaState.java index 8ac1b73ae5d..3fab416f73c 100644 --- a/server/src/main/java/org/elasticsearch/gateway/GatewayMetaState.java +++ b/server/src/main/java/org/elasticsearch/gateway/GatewayMetaState.java @@ -81,6 +81,13 @@ import static org.elasticsearch.common.util.concurrent.EsExecutors.daemonThreadF */ public class GatewayMetaState implements Closeable { + /** + * Fake node ID for a voting configuration written by a master-ineligible data node to indicate that its on-disk state is potentially + * stale (since it is written asynchronously after application, rather than before acceptance). This node ID means that if the node is + * restarted as a master-eligible node then it does not win any elections until it has received a fresh cluster state. + */ + public static final String STALE_STATE_CONFIG_NODE_ID = "STALE_STATE_CONFIG"; + // Set by calling start() private final SetOnce persistedState = new SetOnce<>(); @@ -425,7 +432,7 @@ public class GatewayMetaState implements Closeable { } static final CoordinationMetaData.VotingConfiguration staleStateConfiguration = - new CoordinationMetaData.VotingConfiguration(Collections.singleton("STALE_STATE_CONFIG")); + new CoordinationMetaData.VotingConfiguration(Collections.singleton(STALE_STATE_CONFIG_NODE_ID)); static ClusterState resetVotingConfiguration(ClusterState clusterState) { CoordinationMetaData newCoordinationMetaData = CoordinationMetaData.builder(clusterState.coordinationMetaData()) diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelperTests.java b/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelperTests.java index ec115265652..dcea2b1118b 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelperTests.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/ClusterFormationFailureHelperTests.java @@ -29,6 +29,7 @@ import org.elasticsearch.cluster.node.DiscoveryNodeRole; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.transport.TransportAddress; +import org.elasticsearch.gateway.GatewayMetaState; import org.elasticsearch.test.ESTestCase; import java.util.Arrays; @@ -412,5 +413,14 @@ public class ClusterFormationFailureHelperTests extends ESTestCase { "have discovered [] which is not a quorum; " + "discovery will continue using [] from hosts providers and [" + otherMasterNode + ", " + localNode + "] from last-known cluster state; node term 0, last-accepted version 0 in term 0"))); + + assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, GatewayMetaState.STALE_STATE_CONFIG_NODE_ID), emptyList(), + emptyList(), 0L, electionStrategy).getDescription(), + is("master not discovered or elected yet, an election requires one or more nodes that have already participated as " + + "master-eligible nodes in the cluster but this node was not master-eligible the last time it joined the cluster, " + + "have discovered [] which is not a quorum; " + + "discovery will continue using [] from hosts providers and [" + localNode + + "] from last-known cluster state; node term 0, last-accepted version 0 in term 0")); + } }