Allow election of nodes outside voting config (#43243)
Today we suppress election attempts on master-eligible nodes that are not in the voting configuration. In fact this restriction is not necessary: any master-eligible node can safely become master as long as it has a fresh enough cluster state and can gather a quorum of votes. Moreover, this restriction is sometimes undesirable: there may be a reason why we do not want any of the nodes in the voting configuration to become master. The reason for this restriction is as follows. If you want to shut the master down then you might first exclude it from the voting configuration. When this exclusion succeeds you might reasonably expect that a new master has been elected, since the voting config exclusion is almost always a step towards shutting the node down. If we allow nodes outside the voting configuration to be the master then the excluded node will continue to be master, which is confusing. This commit adjusts the logic to allow master-eligible nodes to attempt an election even if they are not in the voting configuration. If such a master is successfully elected then it adds itself to the voting configuration. This commit also adjusts the logic that causes master nodes to abdicate when they are excluded from the voting configuration, to avoid the confusion described above. Relates #37712, #37802.
This commit is contained in:
parent
5a9c48369b
commit
2e064e0d13
|
@ -380,9 +380,8 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
|
||||||
// The preVoteCollector is only active while we are candidate, but it does not call this method with synchronisation, so we have
|
// The preVoteCollector is only active while we are candidate, but it does not call this method with synchronisation, so we have
|
||||||
// to check our mode again here.
|
// to check our mode again here.
|
||||||
if (mode == Mode.CANDIDATE) {
|
if (mode == Mode.CANDIDATE) {
|
||||||
if (electionQuorumContainsLocalNode(getLastAcceptedState()) == false) {
|
if (localNodeMayWinElection(getLastAcceptedState()) == false) {
|
||||||
logger.trace("skip election as local node is not part of election quorum: {}",
|
logger.trace("skip election as local node may not win it: {}", getLastAcceptedState().coordinationMetaData());
|
||||||
getLastAcceptedState().coordinationMetaData());
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -415,16 +414,17 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
|
||||||
becomeCandidate("after abdicating to " + newMaster);
|
becomeCandidate("after abdicating to " + newMaster);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean electionQuorumContainsLocalNode(ClusterState lastAcceptedState) {
|
private static boolean localNodeMayWinElection(ClusterState lastAcceptedState) {
|
||||||
final DiscoveryNode localNode = lastAcceptedState.nodes().getLocalNode();
|
final DiscoveryNode localNode = lastAcceptedState.nodes().getLocalNode();
|
||||||
assert localNode != null;
|
assert localNode != null;
|
||||||
return electionQuorumContains(lastAcceptedState, localNode);
|
return nodeMayWinElection(lastAcceptedState, localNode);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean electionQuorumContains(ClusterState lastAcceptedState, DiscoveryNode node) {
|
private static boolean nodeMayWinElection(ClusterState lastAcceptedState, DiscoveryNode node) {
|
||||||
final String nodeId = node.getId();
|
final String nodeId = node.getId();
|
||||||
return lastAcceptedState.getLastCommittedConfiguration().getNodeIds().contains(nodeId)
|
return lastAcceptedState.getLastCommittedConfiguration().getNodeIds().contains(nodeId)
|
||||||
|| lastAcceptedState.getLastAcceptedConfiguration().getNodeIds().contains(nodeId);
|
|| lastAcceptedState.getLastAcceptedConfiguration().getNodeIds().contains(nodeId)
|
||||||
|
|| lastAcceptedState.getVotingConfigExclusions().stream().noneMatch(vce -> vce.getNodeId().equals(nodeId));
|
||||||
}
|
}
|
||||||
|
|
||||||
private Optional<Join> ensureTermAtLeast(DiscoveryNode sourceNode, long targetTerm) {
|
private Optional<Join> ensureTermAtLeast(DiscoveryNode sourceNode, long targetTerm) {
|
||||||
|
@ -867,8 +867,8 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
|
||||||
metaDataBuilder.coordinationMetaData(coordinationMetaData);
|
metaDataBuilder.coordinationMetaData(coordinationMetaData);
|
||||||
|
|
||||||
coordinationState.get().setInitialState(ClusterState.builder(currentState).metaData(metaDataBuilder).build());
|
coordinationState.get().setInitialState(ClusterState.builder(currentState).metaData(metaDataBuilder).build());
|
||||||
assert electionQuorumContainsLocalNode(getLastAcceptedState()) :
|
assert localNodeMayWinElection(getLastAcceptedState()) :
|
||||||
"initial state does not have local node in its election quorum: " + getLastAcceptedState().coordinationMetaData();
|
"initial state does not allow local node to win election: " + getLastAcceptedState().coordinationMetaData();
|
||||||
preVoteCollector.update(getPreVoteResponse(), null); // pick up the change to last-accepted version
|
preVoteCollector.update(getPreVoteResponse(), null); // pick up the change to last-accepted version
|
||||||
startElectionScheduler();
|
startElectionScheduler();
|
||||||
return true;
|
return true;
|
||||||
|
@ -1164,8 +1164,8 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
|
||||||
if (mode == Mode.CANDIDATE) {
|
if (mode == Mode.CANDIDATE) {
|
||||||
final ClusterState lastAcceptedState = coordinationState.get().getLastAcceptedState();
|
final ClusterState lastAcceptedState = coordinationState.get().getLastAcceptedState();
|
||||||
|
|
||||||
if (electionQuorumContainsLocalNode(lastAcceptedState) == false) {
|
if (localNodeMayWinElection(lastAcceptedState) == false) {
|
||||||
logger.trace("skip prevoting as local node is not part of election quorum: {}",
|
logger.trace("skip prevoting as local node may not win election: {}",
|
||||||
lastAcceptedState.coordinationMetaData());
|
lastAcceptedState.coordinationMetaData());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -1329,16 +1329,20 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
|
||||||
updateMaxTermSeen(getCurrentTerm());
|
updateMaxTermSeen(getCurrentTerm());
|
||||||
|
|
||||||
if (mode == Mode.LEADER) {
|
if (mode == Mode.LEADER) {
|
||||||
|
// if necessary, abdicate to another node or improve the voting configuration
|
||||||
|
boolean attemptReconfiguration = true;
|
||||||
final ClusterState state = getLastAcceptedState(); // committed state
|
final ClusterState state = getLastAcceptedState(); // committed state
|
||||||
if (electionQuorumContainsLocalNode(state) == false) {
|
if (localNodeMayWinElection(state) == false) {
|
||||||
final List<DiscoveryNode> masterCandidates = completedNodes().stream()
|
final List<DiscoveryNode> masterCandidates = completedNodes().stream()
|
||||||
.filter(DiscoveryNode::isMasterNode)
|
.filter(DiscoveryNode::isMasterNode)
|
||||||
.filter(node -> electionQuorumContains(state, node))
|
.filter(node -> nodeMayWinElection(state, node))
|
||||||
.collect(Collectors.toList());
|
.collect(Collectors.toList());
|
||||||
if (masterCandidates.isEmpty() == false) {
|
if (masterCandidates.isEmpty() == false) {
|
||||||
abdicateTo(masterCandidates.get(random.nextInt(masterCandidates.size())));
|
abdicateTo(masterCandidates.get(random.nextInt(masterCandidates.size())));
|
||||||
|
attemptReconfiguration = false;
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
if (attemptReconfiguration) {
|
||||||
scheduleReconfigurationIfNeeded();
|
scheduleReconfigurationIfNeeded();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -150,6 +150,11 @@ public class Reconfigurator {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int compareTo(VotingConfigNode other) {
|
public int compareTo(VotingConfigNode other) {
|
||||||
|
// prefer current master
|
||||||
|
final int currentMasterComp = Boolean.compare(other.currentMaster, currentMaster);
|
||||||
|
if (currentMasterComp != 0) {
|
||||||
|
return currentMasterComp;
|
||||||
|
}
|
||||||
// prefer nodes that are live
|
// prefer nodes that are live
|
||||||
final int liveComp = Boolean.compare(other.live, live);
|
final int liveComp = Boolean.compare(other.live, live);
|
||||||
if (liveComp != 0) {
|
if (liveComp != 0) {
|
||||||
|
@ -160,11 +165,6 @@ public class Reconfigurator {
|
||||||
if (inCurrentConfigComp != 0) {
|
if (inCurrentConfigComp != 0) {
|
||||||
return inCurrentConfigComp;
|
return inCurrentConfigComp;
|
||||||
}
|
}
|
||||||
// prefer current master
|
|
||||||
final int currentMasterComp = Boolean.compare(other.currentMaster, currentMaster);
|
|
||||||
if (currentMasterComp != 0) {
|
|
||||||
return currentMasterComp;
|
|
||||||
}
|
|
||||||
// tiebreak by node id to have stable ordering
|
// tiebreak by node id to have stable ordering
|
||||||
return id.compareTo(other.id);
|
return id.compareTo(other.id);
|
||||||
}
|
}
|
||||||
|
|
|
@ -53,7 +53,7 @@ public class ReconfiguratorTests extends ESTestCase {
|
||||||
|
|
||||||
check(nodes("a"), conf("a"), true, conf("a"));
|
check(nodes("a"), conf("a"), true, conf("a"));
|
||||||
check(nodes("a", "b"), conf("a"), true, conf("a"));
|
check(nodes("a", "b"), conf("a"), true, conf("a"));
|
||||||
check(nodes("a", "b"), conf("b"), true, conf("b"));
|
check(nodes("a", "b"), conf("b"), true, conf("a"));
|
||||||
check(nodes("a", "b"), conf("a", "c"), true, conf("a"));
|
check(nodes("a", "b"), conf("a", "c"), true, conf("a"));
|
||||||
check(nodes("a", "b"), conf("a", "b"), true, conf("a"));
|
check(nodes("a", "b"), conf("a", "b"), true, conf("a"));
|
||||||
check(nodes("a", "b"), conf("a", "b", "e"), true, conf("a", "b", "e"));
|
check(nodes("a", "b"), conf("a", "b", "e"), true, conf("a", "b", "e"));
|
||||||
|
|
|
@ -18,17 +18,38 @@
|
||||||
*/
|
*/
|
||||||
package org.elasticsearch.cluster.coordination;
|
package org.elasticsearch.cluster.coordination;
|
||||||
|
|
||||||
|
import org.elasticsearch.ElasticsearchException;
|
||||||
import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction;
|
import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsAction;
|
||||||
import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest;
|
import org.elasticsearch.action.admin.cluster.configuration.AddVotingConfigExclusionsRequest;
|
||||||
|
import org.elasticsearch.cluster.ClusterState;
|
||||||
|
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||||
import org.elasticsearch.common.Priority;
|
import org.elasticsearch.common.Priority;
|
||||||
|
import org.elasticsearch.plugins.Plugin;
|
||||||
import org.elasticsearch.test.ESIntegTestCase;
|
import org.elasticsearch.test.ESIntegTestCase;
|
||||||
|
import org.elasticsearch.test.transport.MockTransportService;
|
||||||
|
import org.elasticsearch.transport.TransportService;
|
||||||
|
|
||||||
|
import java.util.Collection;
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
import java.util.concurrent.ExecutionException;
|
import java.util.concurrent.ExecutionException;
|
||||||
|
|
||||||
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
|
import static org.hamcrest.Matchers.equalTo;
|
||||||
|
import static org.hamcrest.Matchers.hasItem;
|
||||||
|
import static org.hamcrest.Matchers.hasSize;
|
||||||
|
import static org.hamcrest.Matchers.nullValue;
|
||||||
|
|
||||||
|
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0, autoManageMasterNodes = false)
|
||||||
public class VotingConfigurationIT extends ESIntegTestCase {
|
public class VotingConfigurationIT extends ESIntegTestCase {
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected Collection<Class<? extends Plugin>> nodePlugins() {
|
||||||
|
return Collections.singletonList(MockTransportService.TestPlugin.class);
|
||||||
|
}
|
||||||
|
|
||||||
public void testAbdicateAfterVotingConfigExclusionAdded() throws ExecutionException, InterruptedException {
|
public void testAbdicateAfterVotingConfigExclusionAdded() throws ExecutionException, InterruptedException {
|
||||||
|
internalCluster().setBootstrapMasterNodeIndex(0);
|
||||||
internalCluster().startNodes(2);
|
internalCluster().startNodes(2);
|
||||||
final String originalMaster = internalCluster().getMasterName();
|
final String originalMaster = internalCluster().getMasterName();
|
||||||
|
|
||||||
|
@ -38,4 +59,56 @@ public class VotingConfigurationIT extends ESIntegTestCase {
|
||||||
client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get();
|
client().admin().cluster().prepareHealth().setWaitForEvents(Priority.LANGUID).get();
|
||||||
assertNotEquals(originalMaster, internalCluster().getMasterName());
|
assertNotEquals(originalMaster, internalCluster().getMasterName());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void testElectsNodeNotInVotingConfiguration() throws Exception {
|
||||||
|
internalCluster().setBootstrapMasterNodeIndex(0);
|
||||||
|
final List<String> nodeNames = internalCluster().startNodes(4);
|
||||||
|
|
||||||
|
// a 4-node cluster settles on a 3-node configuration; we then prevent the nodes in the configuration from winning an election
|
||||||
|
// by failing at the pre-voting stage, so that the extra node must be elected instead when the master shuts down. This extra node
|
||||||
|
// should then add itself into the voting configuration.
|
||||||
|
|
||||||
|
assertFalse(internalCluster().client().admin().cluster().prepareHealth()
|
||||||
|
.setWaitForNodes("4").setWaitForEvents(Priority.LANGUID).get().isTimedOut());
|
||||||
|
|
||||||
|
String excludedNodeName = null;
|
||||||
|
final ClusterState clusterState
|
||||||
|
= internalCluster().client().admin().cluster().prepareState().clear().setNodes(true).setMetaData(true).get().getState();
|
||||||
|
final Set<String> votingConfiguration = clusterState.getLastCommittedConfiguration().getNodeIds();
|
||||||
|
assertThat(votingConfiguration, hasSize(3));
|
||||||
|
assertThat(clusterState.nodes().getSize(), equalTo(4));
|
||||||
|
assertThat(votingConfiguration, hasItem(clusterState.nodes().getMasterNodeId()));
|
||||||
|
for (DiscoveryNode discoveryNode : clusterState.nodes()) {
|
||||||
|
if (votingConfiguration.contains(discoveryNode.getId()) == false) {
|
||||||
|
assertThat(excludedNodeName, nullValue());
|
||||||
|
excludedNodeName = discoveryNode.getName();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (final String sender : nodeNames) {
|
||||||
|
if (sender.equals(excludedNodeName)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
final MockTransportService senderTransportService
|
||||||
|
= (MockTransportService) internalCluster().getInstance(TransportService.class, sender);
|
||||||
|
for (final String receiver : nodeNames) {
|
||||||
|
senderTransportService.addSendBehavior(internalCluster().getInstance(TransportService.class, receiver),
|
||||||
|
(connection, requestId, action, request, options) -> {
|
||||||
|
if (action.equals(PreVoteCollector.REQUEST_PRE_VOTE_ACTION_NAME)) {
|
||||||
|
throw new ElasticsearchException("rejected");
|
||||||
|
}
|
||||||
|
connection.sendRequest(requestId, action, request, options);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
internalCluster().stopCurrentMasterNode();
|
||||||
|
assertFalse(internalCluster().client().admin().cluster().prepareHealth()
|
||||||
|
.setWaitForNodes("3").setWaitForEvents(Priority.LANGUID).get().isTimedOut());
|
||||||
|
|
||||||
|
final ClusterState newClusterState
|
||||||
|
= internalCluster().client().admin().cluster().prepareState().clear().setNodes(true).setMetaData(true).get().getState();
|
||||||
|
assertThat(newClusterState.nodes().getMasterNode().getName(), equalTo(excludedNodeName));
|
||||||
|
assertThat(newClusterState.getLastCommittedConfiguration().getNodeIds(), hasItem(newClusterState.nodes().getMasterNodeId()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue