[Zen2] Only elect master-eligible nodes (#35996)
Today any node can win an election. However, the whole point of master-eligibility is that master-ineligible nodes should not be elected as the leader; furthermore master-ineligible nodes do not have any outgoing STATE channels so cannot publish cluster states, so their leadership is ineffective and disruptive. This change ensures that the elected leader is master-eligible by preventing master-ineligible nodes from scheduling an election.
This commit is contained in:
parent
0b45fb98b9
commit
87408b04d4
|
@ -344,6 +344,7 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
|
|||
|
||||
private void handleJoinRequest(JoinRequest joinRequest, JoinHelper.JoinCallback joinCallback) {
|
||||
assert Thread.holdsLock(mutex) == false;
|
||||
assert getLocalNode().isMasterNode() : getLocalNode() + " received a join but is not master-eligible";
|
||||
logger.trace("handleJoinRequest: as {}, handling {}", mode, joinRequest);
|
||||
transportService.connectToNode(joinRequest.getSourceNode());
|
||||
|
||||
|
@ -392,6 +393,8 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
|
|||
void becomeLeader(String method) {
|
||||
assert Thread.holdsLock(mutex) : "Coordinator mutex not held";
|
||||
assert mode == Mode.CANDIDATE : "expected candidate but was " + mode;
|
||||
assert getLocalNode().isMasterNode() : getLocalNode() + " became a leader but is not master-eligible";
|
||||
|
||||
logger.debug("{}: becoming LEADER (was {}, lastKnownLeader was [{}])", method, mode, lastKnownLeader);
|
||||
|
||||
mode = Mode.LEADER;
|
||||
|
@ -409,6 +412,8 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
|
|||
|
||||
void becomeFollower(String method, DiscoveryNode leaderNode) {
|
||||
assert Thread.holdsLock(mutex) : "Coordinator mutex not held";
|
||||
assert leaderNode.isMasterNode() : leaderNode + " became a leader but is not master-eligible";
|
||||
|
||||
logger.debug("{}: becoming FOLLOWER of [{}] (was {}, lastKnownLeader was [{}])", method, leaderNode, mode, lastKnownLeader);
|
||||
|
||||
final boolean restartLeaderChecker = (mode == Mode.FOLLOWER && Optional.of(leaderNode).equals(lastKnownLeader)) == false;
|
||||
|
@ -642,9 +647,9 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
|
|||
logger.info("setting initial configuration to {}", votingConfiguration);
|
||||
final Builder builder = masterService.incrementVersion(currentState);
|
||||
final CoordinationMetaData coordinationMetaData = CoordinationMetaData.builder(currentState.coordinationMetaData())
|
||||
.lastAcceptedConfiguration(votingConfiguration)
|
||||
.lastCommittedConfiguration(votingConfiguration)
|
||||
.build();
|
||||
.lastAcceptedConfiguration(votingConfiguration)
|
||||
.lastCommittedConfiguration(votingConfiguration)
|
||||
.build();
|
||||
|
||||
MetaData.Builder metaDataBuilder = MetaData.builder();
|
||||
// automatically generate a UID for the metadata if we need to
|
||||
|
@ -927,6 +932,11 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
|
|||
|
||||
private void startElectionScheduler() {
|
||||
assert electionScheduler == null : electionScheduler;
|
||||
|
||||
if (getLocalNode().isMasterNode() == false) {
|
||||
return;
|
||||
}
|
||||
|
||||
final TimeValue gracePeriod = TimeValue.ZERO; // TODO variable grace period
|
||||
electionScheduler = electionSchedulerFactory.startElectionScheduler(gracePeriod, new Runnable() {
|
||||
@Override
|
||||
|
|
|
@ -166,6 +166,7 @@ public class JoinHelper {
|
|||
}
|
||||
|
||||
public void sendJoinRequest(DiscoveryNode destination, Optional<Join> optionalJoin) {
|
||||
assert destination.isMasterNode() : "trying to join master-ineligible " + destination;
|
||||
final JoinRequest joinRequest = new JoinRequest(transportService.getLocalNode(), optionalJoin);
|
||||
final Tuple<DiscoveryNode, JoinRequest> dedupKey = Tuple.tuple(destination, joinRequest);
|
||||
if (pendingOutgoingJoins.add(dedupKey)) {
|
||||
|
@ -210,6 +211,8 @@ public class JoinHelper {
|
|||
}
|
||||
|
||||
public void sendStartJoinRequest(final StartJoinRequest startJoinRequest, final DiscoveryNode destination) {
|
||||
assert startJoinRequest.getSourceNode().isMasterNode()
|
||||
: "sending start-join request for master-ineligible " + startJoinRequest.getSourceNode();
|
||||
transportService.sendRequest(destination, START_JOIN_ACTION_NAME,
|
||||
startJoinRequest, new TransportResponseHandler<Empty>() {
|
||||
@Override
|
||||
|
|
|
@ -23,19 +23,22 @@ import org.apache.logging.log4j.LogManager;
|
|||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.logging.log4j.message.ParameterizedMessage;
|
||||
import org.elasticsearch.ElasticsearchException;
|
||||
import org.elasticsearch.Version;
|
||||
import org.elasticsearch.cluster.ClusterName;
|
||||
import org.elasticsearch.cluster.ClusterState;
|
||||
import org.elasticsearch.cluster.coordination.CoordinationMetaData.VotingConfiguration;
|
||||
import org.elasticsearch.cluster.ClusterStateUpdateTask;
|
||||
import org.elasticsearch.cluster.ESAllocationTestCase;
|
||||
import org.elasticsearch.cluster.coordination.ClusterStatePublisher.AckListener;
|
||||
import org.elasticsearch.cluster.coordination.CoordinationMetaData.VotingConfiguration;
|
||||
import org.elasticsearch.cluster.coordination.CoordinationState.PersistedState;
|
||||
import org.elasticsearch.cluster.coordination.Coordinator.Mode;
|
||||
import org.elasticsearch.cluster.coordination.CoordinatorTests.Cluster.ClusterNode;
|
||||
import org.elasticsearch.cluster.metadata.MetaData;
|
||||
import org.elasticsearch.cluster.node.DiscoveryNode;
|
||||
import org.elasticsearch.cluster.node.DiscoveryNode.Role;
|
||||
import org.elasticsearch.cluster.service.ClusterApplier;
|
||||
import org.elasticsearch.common.Randomness;
|
||||
import org.elasticsearch.common.UUIDs;
|
||||
import org.elasticsearch.common.lease.Releasable;
|
||||
import org.elasticsearch.common.settings.ClusterSettings;
|
||||
import org.elasticsearch.common.settings.Setting;
|
||||
|
@ -58,6 +61,7 @@ import java.io.UncheckedIOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Collections;
|
||||
import java.util.EnumSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
|
@ -136,6 +140,15 @@ public class CoordinatorTests extends ESTestCase {
|
|||
}
|
||||
}
|
||||
|
||||
public void testDoesNotElectNonMasterNode() {
|
||||
final Cluster cluster = new Cluster(randomIntBetween(1, 5), false);
|
||||
cluster.runRandomly();
|
||||
cluster.stabilise();
|
||||
|
||||
final ClusterNode leader = cluster.getAnyLeader();
|
||||
assertTrue(leader.localNode.isMasterNode());
|
||||
}
|
||||
|
||||
public void testNodesJoinAfterStableCluster() {
|
||||
final Cluster cluster = new Cluster(randomIntBetween(1, 5));
|
||||
cluster.runRandomly();
|
||||
|
@ -889,10 +902,6 @@ public class CoordinatorTests extends ESTestCase {
|
|||
// then wait for the new leader to commit a state without the old leader
|
||||
+ DEFAULT_CLUSTER_STATE_UPDATE_DELAY;
|
||||
|
||||
private static String nodeIdFromIndex(int nodeIndex) {
|
||||
return "node" + nodeIndex;
|
||||
}
|
||||
|
||||
class Cluster {
|
||||
|
||||
static final long EXTREME_DELAY_VARIABILITY = 10000L;
|
||||
|
@ -910,26 +919,29 @@ public class CoordinatorTests extends ESTestCase {
|
|||
private final Map<Long, ClusterState> committedStatesByVersion = new HashMap<>();
|
||||
|
||||
Cluster(int initialNodeCount) {
|
||||
this(initialNodeCount, true);
|
||||
}
|
||||
|
||||
Cluster(int initialNodeCount, boolean allNodesMasterEligible) {
|
||||
deterministicTaskQueue.setExecutionDelayVariabilityMillis(DEFAULT_DELAY_VARIABILITY);
|
||||
|
||||
assertThat(initialNodeCount, greaterThan(0));
|
||||
|
||||
final Set<String> initialConfigurationNodeIds = new HashSet<>(initialNodeCount);
|
||||
while (initialConfigurationNodeIds.isEmpty()) {
|
||||
for (int i = 0; i < initialNodeCount; i++) {
|
||||
if (randomBoolean()) {
|
||||
initialConfigurationNodeIds.add(nodeIdFromIndex(i));
|
||||
}
|
||||
}
|
||||
}
|
||||
initialConfiguration = new VotingConfiguration(initialConfigurationNodeIds);
|
||||
logger.info("--> creating cluster of {} nodes with initial configuration {}", initialNodeCount, initialConfiguration);
|
||||
|
||||
final Set<String> masterEligibleNodeIds = new HashSet<>(initialNodeCount);
|
||||
clusterNodes = new ArrayList<>(initialNodeCount);
|
||||
for (int i = 0; i < initialNodeCount; i++) {
|
||||
final ClusterNode clusterNode = new ClusterNode(i);
|
||||
final ClusterNode clusterNode = new ClusterNode(i, allNodesMasterEligible || i == 0 || randomBoolean());
|
||||
clusterNodes.add(clusterNode);
|
||||
if (clusterNode.getLocalNode().isMasterNode()) {
|
||||
masterEligibleNodeIds.add(clusterNode.getId());
|
||||
}
|
||||
}
|
||||
|
||||
initialConfiguration = new VotingConfiguration(new HashSet<>(
|
||||
randomSubsetOf(randomIntBetween(1, masterEligibleNodeIds.size()), masterEligibleNodeIds)));
|
||||
|
||||
logger.info("--> creating cluster of {} nodes (master-eligible nodes: {}) with initial configuration {}",
|
||||
initialNodeCount, masterEligibleNodeIds, initialConfiguration);
|
||||
}
|
||||
|
||||
void addNodesAndStabilise(int newNodesCount) {
|
||||
|
@ -950,7 +962,7 @@ public class CoordinatorTests extends ESTestCase {
|
|||
|
||||
final int nodeSizeAtStart = clusterNodes.size();
|
||||
for (int i = 0; i < newNodesCount; i++) {
|
||||
final ClusterNode clusterNode = new ClusterNode(nodeSizeAtStart + i);
|
||||
final ClusterNode clusterNode = new ClusterNode(nodeSizeAtStart + i, true);
|
||||
clusterNodes.add(clusterNode);
|
||||
}
|
||||
}
|
||||
|
@ -1090,11 +1102,11 @@ public class CoordinatorTests extends ESTestCase {
|
|||
deterministicTaskQueue.getExecutionDelayVariabilityMillis(), lessThanOrEqualTo(DEFAULT_DELAY_VARIABILITY));
|
||||
assertFalse("stabilisation requires stable storage", disruptStorage);
|
||||
|
||||
if (clusterNodes.stream().allMatch(n -> n.coordinator.isInitialConfigurationSet() == false)) {
|
||||
if (clusterNodes.stream().allMatch(ClusterNode::isNotUsefullyBootstrapped)) {
|
||||
assertThat("setting initial configuration may fail with disconnected nodes", disconnectedNodes, empty());
|
||||
assertThat("setting initial configuration may fail with blackholed nodes", blackholedNodes, empty());
|
||||
runFor(defaultMillis(DISCOVERY_FIND_PEERS_INTERVAL_SETTING) * 2, "discovery prior to setting initial configuration");
|
||||
final ClusterNode bootstrapNode = getAnyNode();
|
||||
final ClusterNode bootstrapNode = getAnyMasterEligibleNode();
|
||||
bootstrapNode.applyInitialConfiguration();
|
||||
} else {
|
||||
logger.info("setting initial configuration not required");
|
||||
|
@ -1211,6 +1223,10 @@ public class CoordinatorTests extends ESTestCase {
|
|||
return connectionStatus;
|
||||
}
|
||||
|
||||
ClusterNode getAnyMasterEligibleNode() {
|
||||
return randomFrom(clusterNodes.stream().filter(n -> n.getLocalNode().isMasterNode()).collect(Collectors.toList()));
|
||||
}
|
||||
|
||||
ClusterNode getAnyNode() {
|
||||
return getAnyNodeExcept();
|
||||
}
|
||||
|
@ -1283,16 +1299,20 @@ public class CoordinatorTests extends ESTestCase {
|
|||
private DisruptableMockTransport mockTransport;
|
||||
private ClusterStateApplyResponse clusterStateApplyResponse = ClusterStateApplyResponse.SUCCEED;
|
||||
|
||||
ClusterNode(int nodeIndex) {
|
||||
ClusterNode(int nodeIndex, boolean masterEligible) {
|
||||
this.nodeIndex = nodeIndex;
|
||||
localNode = createDiscoveryNode();
|
||||
localNode = createDiscoveryNode(masterEligible);
|
||||
persistedState = new MockPersistedState(0L,
|
||||
clusterState(0L, 0L, localNode, VotingConfiguration.EMPTY_CONFIG, VotingConfiguration.EMPTY_CONFIG, 0L));
|
||||
onNode(localNode, this::setUp).run();
|
||||
}
|
||||
|
||||
private DiscoveryNode createDiscoveryNode() {
|
||||
return CoordinationStateTests.createNode(nodeIdFromIndex(nodeIndex));
|
||||
private DiscoveryNode createDiscoveryNode(boolean masterEligible) {
|
||||
final TransportAddress address = buildNewFakeTransportAddress();
|
||||
return new DiscoveryNode("", "node" + nodeIndex,
|
||||
UUIDs.randomBase64UUID(random()), // generated deterministically for repeatable tests
|
||||
address.address().getHostString(), address.getAddress(), address, Collections.emptyMap(),
|
||||
masterEligible ? EnumSet.allOf(Role.class) : emptySet(), Version.CURRENT);
|
||||
}
|
||||
|
||||
private void setUp() {
|
||||
|
@ -1483,6 +1503,10 @@ public class CoordinatorTests extends ESTestCase {
|
|||
}).run();
|
||||
}
|
||||
|
||||
private boolean isNotUsefullyBootstrapped() {
|
||||
return getLocalNode().isMasterNode() == false || coordinator.isInitialConfigurationSet() == false;
|
||||
}
|
||||
|
||||
private class FakeClusterApplier implements ClusterApplier {
|
||||
|
||||
final ClusterName clusterName;
|
||||
|
|
Loading…
Reference in New Issue