Describe STALE_STATE_CONFIG in ClusterFormationFH (#53878)
We mark cluster states persisted on master-ineligible nodes as potentially-stale using the voting configuration `{STALE_STATE_CONFIG}` which prevents these nodes from being elected as master if they are restarted as master-eligible. Today we do not handle this special voting configuration differently in the `ClusterFormationFailureHandler`, leading to a mysterious message `an election requires a node with id [STALE_STATE_CONFIG]` if the election does not succeed. This commit adds a special case description for this situation to explain better why this node cannot win an election. Closes #53734
This commit is contained in:
parent
0cfe6d90cc
commit
879e26ec06
|
@ -31,6 +31,7 @@ import org.elasticsearch.common.settings.Settings;
|
|||
import org.elasticsearch.common.transport.TransportAddress;
|
||||
import org.elasticsearch.common.unit.TimeValue;
|
||||
import org.elasticsearch.common.util.concurrent.AbstractRunnable;
|
||||
import org.elasticsearch.gateway.GatewayMetaState;
|
||||
import org.elasticsearch.threadpool.ThreadPool;
|
||||
import org.elasticsearch.threadpool.ThreadPool.Names;
|
||||
|
||||
|
@ -210,7 +211,12 @@ public class ClusterFormationFailureHelper {
|
|||
assert requiredNodes <= realNodeIds.size() : nodeIds;
|
||||
|
||||
if (nodeIds.size() == 1) {
|
||||
return "a node with id " + realNodeIds;
|
||||
if (nodeIds.contains(GatewayMetaState.STALE_STATE_CONFIG_NODE_ID)) {
|
||||
return "one or more nodes that have already participated as master-eligible nodes in the cluster but this node was " +
|
||||
"not master-eligible the last time it joined the cluster";
|
||||
} else {
|
||||
return "a node with id " + realNodeIds;
|
||||
}
|
||||
} else if (nodeIds.size() == 2) {
|
||||
return "two nodes with ids " + realNodeIds;
|
||||
} else {
|
||||
|
|
|
@ -81,6 +81,13 @@ import static org.elasticsearch.common.util.concurrent.EsExecutors.daemonThreadF
|
|||
*/
|
||||
public class GatewayMetaState implements Closeable {
|
||||
|
||||
/**
|
||||
* Fake node ID for a voting configuration written by a master-ineligible data node to indicate that its on-disk state is potentially
|
||||
* stale (since it is written asynchronously after application, rather than before acceptance). This node ID means that if the node is
|
||||
* restarted as a master-eligible node then it does not win any elections until it has received a fresh cluster state.
|
||||
*/
|
||||
public static final String STALE_STATE_CONFIG_NODE_ID = "STALE_STATE_CONFIG";
|
||||
|
||||
// Set by calling start()
|
||||
private final SetOnce<PersistedState> persistedState = new SetOnce<>();
|
||||
|
||||
|
@ -425,7 +432,7 @@ public class GatewayMetaState implements Closeable {
|
|||
}
|
||||
|
||||
static final CoordinationMetaData.VotingConfiguration staleStateConfiguration =
|
||||
new CoordinationMetaData.VotingConfiguration(Collections.singleton("STALE_STATE_CONFIG"));
|
||||
new CoordinationMetaData.VotingConfiguration(Collections.singleton(STALE_STATE_CONFIG_NODE_ID));
|
||||
|
||||
static ClusterState resetVotingConfiguration(ClusterState clusterState) {
|
||||
CoordinationMetaData newCoordinationMetaData = CoordinationMetaData.builder(clusterState.coordinationMetaData())
|
||||
|
|
|
@ -29,6 +29,7 @@ import org.elasticsearch.cluster.node.DiscoveryNodeRole;
|
|||
import org.elasticsearch.cluster.node.DiscoveryNodes;
|
||||
import org.elasticsearch.common.settings.Settings;
|
||||
import org.elasticsearch.common.transport.TransportAddress;
|
||||
import org.elasticsearch.gateway.GatewayMetaState;
|
||||
import org.elasticsearch.test.ESTestCase;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
@ -412,5 +413,14 @@ public class ClusterFormationFailureHelperTests extends ESTestCase {
|
|||
"have discovered [] which is not a quorum; " +
|
||||
"discovery will continue using [] from hosts providers and [" + otherMasterNode + ", " + localNode +
|
||||
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0")));
|
||||
|
||||
assertThat(new ClusterFormationState(Settings.EMPTY, state(localNode, GatewayMetaState.STALE_STATE_CONFIG_NODE_ID), emptyList(),
|
||||
emptyList(), 0L, electionStrategy).getDescription(),
|
||||
is("master not discovered or elected yet, an election requires one or more nodes that have already participated as " +
|
||||
"master-eligible nodes in the cluster but this node was not master-eligible the last time it joined the cluster, " +
|
||||
"have discovered [] which is not a quorum; " +
|
||||
"discovery will continue using [] from hosts providers and [" + localNode +
|
||||
"] from last-known cluster state; node term 0, last-accepted version 0 in term 0"));
|
||||
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue