make election stop not be a failure (#19705)

During our master elections, nodes "vote" for a master being issuing a join request to it. Since this is done in an async fashion, joins may arrive before the master itself has realized it had won the election. Therefore we start accumulating node joins on every node at election start (we don't know the result yet). When the election finish nodes that did not become the master (i.e., joined another node which won the election) need to potentially process and fail any incoming join request they may have received during the election. This is currently achieved by always issuing a cluster state update task that is doomed to fail, even if no pending joins are actually there. That aspect results in confusing (debug) log messages, making it seems like something is wrong. For example (note that `NotMasterException`)

```
[2016-07-30 22:25:53,040][DEBUG][cluster.service          ] [node_t1] processing [zen-disco-process-pending-joins [{node_t0}{4SqBTyYNQ82J9c75Cs7jtg}{kutaNSYbTZCSybvqczgWCA}{127.0.0.1}{127.0.0.1:9400} elected]]: execute
[2016-07-30 22:25:53,041][DEBUG][transport                ] [node_t1] connected to node [{node_t0}{4SqBTyYNQ82J9c75Cs7jtg}{kutaNSYbTZCSybvqczgWCA}{127.0.0.1}{127.0.0.1:9400}]
[2016-07-30 22:25:53,045][DEBUG][cluster.service          ] [node_t1] cluster state update task [zen-disco-process-pending-joins [{node_t0}{4SqBTyYNQ82J9c75Cs7jtg}{kutaNSYbTZCSybvqczgWCA}{127.0.0.1}{127.0.0.1:9400} elected]] failed
NotMasterException[Node [{node_t1}{eAQts270TiGFpoCDE-0PQQ}{or5bsv2ET220su78DLJk5g}{127.0.0.1}{127.0.0.1:9401}] not master for join request]
[2016-07-30 22:25:53,048][DEBUG][cluster.service          ] [node_t1] processing [zen-disco-process-pending-joins [{node_t0}{4SqBTyYNQ82J9c75Cs7jtg}{kutaNSYbTZCSybvqczgWCA}{127.0.0.1}{127.0.0.1:9400} elected]]: took [7ms] no change in cluster_state
```

This commit cleans up  the logic a bit to only use failure where there are actual joins that are failed. The result is cleaner logs as well:

```
[2016-07-30 22:23:12,880][DEBUG][cluster.service          ] [node_t1] processing [zen-disco-election-stop [{node_t0}{jMR5HCpOQnOM4pGeFkUjng}{B5WIZQAdQk2cWbjGZ21mvQ}{127.0.0.1}{127.0.0.1:9400} elected]]: execute
[2016-07-30 22:23:12,881][DEBUG][cluster.service          ] [node_t1] processing [zen-disco-election-stop [{node_t0}{jMR5HCpOQnOM4pGeFkUjng}{B5WIZQAdQk2cWbjGZ21mvQ}{127.0.0.1}{127.0.0.1:9400} elected]]: took [0s] no change in cluster_state
[2016-07-30 22:23:12,881][DEBUG][transport                ] [node_t1] connected to node [{node_t0}{jMR5HCpOQnOM4pGeFkUjng}{B5WIZQAdQk2cWbjGZ21mvQ}{127.0.0.1}{127.0.0.1:9400}]
```
This commit is contained in:
Boaz Leskes 2016-08-01 13:08:50 +02:00 committed by GitHub
parent 5104437e4f
commit 7c6527ed09
1 changed files with 29 additions and 18 deletions

View File

@ -281,16 +281,16 @@ public class NodeJoinController extends AbstractComponent {
Map<DiscoveryNode, ClusterStateTaskListener> tasks = getPendingAsTasks();
final String source = "zen-disco-elected-as-master ([" + tasks.size() + "] nodes joined)";
tasks.put(BECOME_MASTER_TASK, joinProcessedListener);
tasks.put(BECOME_MASTER_TASK, (source1, e) -> {}); // noop listener, the election finished listener determines result
tasks.put(FINISH_ELECTION_TASK, electionFinishedListener);
clusterService.submitStateUpdateTasks(source, tasks, ClusterStateTaskConfig.build(Priority.URGENT), joinTaskExecutor);
}
public synchronized void closeAndProcessPending(String reason) {
innerClose();
Map<DiscoveryNode, ClusterStateTaskListener> tasks = getPendingAsTasks();
final String source = "zen-disco-process-pending-joins [" + reason + "]";
tasks.put(FINISH_ELECTION_NOT_MASTER_TASK, joinProcessedListener);
final String source = "zen-disco-election-stop [" + reason + "]";
tasks.put(FINISH_ELECTION_TASK, electionFinishedListener);
clusterService.submitStateUpdateTasks(source, tasks, ClusterStateTaskConfig.build(Priority.URGENT), joinTaskExecutor);
}
@ -327,12 +327,15 @@ public class NodeJoinController extends AbstractComponent {
}
}
private final ClusterStateTaskListener joinProcessedListener = new ClusterStateTaskListener() {
private final ClusterStateTaskListener electionFinishedListener = new ClusterStateTaskListener() {
@Override
public void clusterStateProcessed(String source, ClusterState oldState, ClusterState newState) {
assert newState.nodes().isLocalNodeElectedMaster() : "should have become a master but isn't " + newState.prettyPrint();
onElectedAsMaster(newState);
if (newState.nodes().isLocalNodeElectedMaster()) {
ElectionContext.this.onElectedAsMaster(newState);
} else {
onFailure(source, new NotMasterException("election stopped [" + source + "]"));
}
}
@Override
@ -379,7 +382,9 @@ public class NodeJoinController extends AbstractComponent {
}
}
// a task indicated that the current node should become master, if no current master is known
/**
* a task indicated that the current node should become master, if no current master is known
*/
private static final DiscoveryNode BECOME_MASTER_TASK = new DiscoveryNode("_BECOME_MASTER_TASK_", LocalTransportAddress.buildUnique(),
Collections.emptyMap(), Collections.emptySet(), Version.CURRENT) {
@Override
@ -388,9 +393,11 @@ public class NodeJoinController extends AbstractComponent {
}
};
// a task that is used to process pending joins without explicitly becoming a master and listening to the results
// this task is used when election is stop without the local node becoming a master per se (though it might
private static final DiscoveryNode FINISH_ELECTION_NOT_MASTER_TASK = new DiscoveryNode("_NOT_MASTER_TASK_",
/**
* a task that is used to signal the election is stopped and we should process pending joins.
* it may be use in combination with {@link #BECOME_MASTER_TASK}
*/
private static final DiscoveryNode FINISH_ELECTION_TASK = new DiscoveryNode("_FINISH_ELECTION_",
LocalTransportAddress.buildUnique(), Collections.emptyMap(), Collections.emptySet(), Version.CURRENT) {
@Override
public String toString() {
@ -402,31 +409,35 @@ public class NodeJoinController extends AbstractComponent {
@Override
public BatchResult<DiscoveryNode> execute(ClusterState currentState, List<DiscoveryNode> joiningNodes) throws Exception {
final DiscoveryNodes currentNodes = currentState.nodes();
final BatchResult.Builder<DiscoveryNode> results = BatchResult.builder();
final DiscoveryNodes currentNodes = currentState.nodes();
boolean nodesChanged = false;
ClusterState.Builder newState = ClusterState.builder(currentState);
DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentNodes);
if (currentNodes.getMasterNode() == null && joiningNodes.contains(BECOME_MASTER_TASK)) {
if (joiningNodes.size() == 1 && joiningNodes.get(0).equals(FINISH_ELECTION_TASK)) {
return results.successes(joiningNodes).build(currentState);
} else if (currentNodes.getMasterNode() == null && joiningNodes.contains(BECOME_MASTER_TASK)) {
assert joiningNodes.contains(FINISH_ELECTION_TASK) : "becoming a master but election is not finished " + joiningNodes;
// use these joins to try and become the master.
// Note that we don't have to do any validation of the amount of joining nodes - the commit
// during the cluster state publishing guarantees that we have enough
nodesBuilder.masterNodeId(currentNodes.getLocalNodeId());
ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(currentState.blocks())
.removeGlobalBlock(discoverySettings.getNoMasterBlock()).build();
newState.blocks(clusterBlocks);
nodesChanged = true;
}
if (nodesBuilder.isLocalNodeElectedMaster() == false) {
} else if (nodesBuilder.isLocalNodeElectedMaster() == false) {
logger.trace("processing node joins, but we are not the master. current master: {}", currentNodes.getMasterNode());
throw new NotMasterException("Node [" + currentNodes.getLocalNode() + "] not master for join request");
}
assert nodesBuilder.isLocalNodeElectedMaster();
// processing any joins
for (final DiscoveryNode node : joiningNodes) {
if (node.equals(BECOME_MASTER_TASK) || node.equals(FINISH_ELECTION_NOT_MASTER_TASK)) {
if (node.equals(BECOME_MASTER_TASK) || node.equals(FINISH_ELECTION_TASK)) {
// noop
} else if (currentNodes.nodeExists(node)) {
logger.debug("received a join request for an existing node [{}]", node);