Prefer joining node with conflicting transport address when becoming master (#22134)

PR #22049 changed the node update logic to never remove nodes from the cluster state when the cluster state is not published. This led to an issue electing a master (#22120) based on nodes with same transport address (but different node id) as previous nodes. The joining nodes should take precedence over conflicting ones. Note that this only applies to the action of becoming master. If a master is established and a node joins an existing master, it will be rejected if there is another node with same transport address.
This commit is contained in:
Yannick Welsch 2016-12-14 15:04:24 +01:00 committed by GitHub
parent c18f032b08
commit a511fb9ce6
2 changed files with 22 additions and 11 deletions

View File

@ -468,18 +468,26 @@ public class NodeJoinController extends AbstractComponent {
private ClusterState.Builder becomeMasterAndTrimConflictingNodes(ClusterState currentState, List<DiscoveryNode> joiningNodes) {
assert currentState.nodes().getMasterNodeId() == null : currentState;
DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentState.nodes());
DiscoveryNodes currentNodes = currentState.nodes();
DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentNodes);
nodesBuilder.masterNodeId(currentState.nodes().getLocalNodeId());
ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(currentState.blocks())
.removeGlobalBlock(discoverySettings.getNoMasterBlock()).build();
for (final DiscoveryNode joiningNode : joiningNodes) {
final DiscoveryNode existingNode = nodesBuilder.get(joiningNode.getId());
if (existingNode != null && existingNode.equals(joiningNode) == false) {
logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", existingNode, joiningNode);
nodesBuilder.remove(existingNode.getId());
final DiscoveryNode nodeWithSameId = nodesBuilder.get(joiningNode.getId());
if (nodeWithSameId != null && nodeWithSameId.equals(joiningNode) == false) {
logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameId, joiningNode);
nodesBuilder.remove(nodeWithSameId.getId());
}
final DiscoveryNode nodeWithSameAddress = currentNodes.findByAddress(joiningNode.getAddress());
if (nodeWithSameAddress != null && nodeWithSameAddress.equals(joiningNode) == false) {
logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameAddress,
joiningNode);
nodesBuilder.remove(nodeWithSameAddress.getId());
}
}
// now trim any left over dead nodes - either left there when the previous master stepped down
// or removed by us above
ClusterState tmpState = ClusterState.builder(currentState).nodes(nodesBuilder).blocks(clusterBlocks).build();

View File

@ -626,12 +626,15 @@ public class NodeJoinControllerTests extends ESTestCase {
setState(clusterService, stateBuilder.build());
final DiscoveryNode restartedNode = new DiscoveryNode(otherNode.getId(),
randomBoolean() ? otherNode.getAddress() : buildNewFakeTransportAddress(), otherNode.getAttributes(),
otherNode.getRoles(), Version.CURRENT);
// conflict on node id or address
final DiscoveryNode conflictingNode = randomBoolean() ?
new DiscoveryNode(otherNode.getId(), randomBoolean() ? otherNode.getAddress() : buildNewFakeTransportAddress(),
otherNode.getAttributes(), otherNode.getRoles(), Version.CURRENT) :
new DiscoveryNode("conflicting_address_node", otherNode.getAddress(), otherNode.getAttributes(), otherNode.getRoles(),
Version.CURRENT);
nodeJoinController.startElectionContext();
final SimpleFuture joinFuture = joinNodeAsync(restartedNode);
final SimpleFuture joinFuture = joinNodeAsync(conflictingNode);
final CountDownLatch elected = new CountDownLatch(1);
nodeJoinController.waitToBeElectedAsMaster(1, TimeValue.timeValueHours(5), new NodeJoinController.ElectionCallback() {
@Override
@ -655,9 +658,9 @@ public class NodeJoinControllerTests extends ESTestCase {
assertTrue(finalNodes.isLocalNodeElectedMaster());
assertThat(finalNodes.getLocalNode(), equalTo(masterNode));
assertThat(finalNodes.getSize(), equalTo(2));
assertThat(finalNodes.get(restartedNode.getId()), equalTo(restartedNode));
assertThat(finalNodes.get(conflictingNode.getId()), equalTo(conflictingNode));
List<ShardRouting> activeShardsOnRestartedNode =
StreamSupport.stream(finalState.getRoutingNodes().node(restartedNode.getId()).spliterator(), false)
StreamSupport.stream(finalState.getRoutingNodes().node(conflictingNode.getId()).spliterator(), false)
.filter(ShardRouting::active).collect(Collectors.toList());
assertThat(activeShardsOnRestartedNode, empty());
}