Prefer joining node with conflicting transport address when becoming master (#22134)
PR #22049 changed the node update logic to never remove nodes from the cluster state when the cluster state is not published. This led to an issue electing a master (#22120) based on nodes with same transport address (but different node id) as previous nodes. The joining nodes should take precedence over conflicting ones. Note that this only applies to the action of becoming master. If a master is established and a node joins an existing master, it will be rejected if there is another node with same transport address.
This commit is contained in:
parent
c18f032b08
commit
a511fb9ce6
|
@ -468,18 +468,26 @@ public class NodeJoinController extends AbstractComponent {
|
|||
|
||||
private ClusterState.Builder becomeMasterAndTrimConflictingNodes(ClusterState currentState, List<DiscoveryNode> joiningNodes) {
|
||||
assert currentState.nodes().getMasterNodeId() == null : currentState;
|
||||
DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentState.nodes());
|
||||
DiscoveryNodes currentNodes = currentState.nodes();
|
||||
DiscoveryNodes.Builder nodesBuilder = DiscoveryNodes.builder(currentNodes);
|
||||
nodesBuilder.masterNodeId(currentState.nodes().getLocalNodeId());
|
||||
ClusterBlocks clusterBlocks = ClusterBlocks.builder().blocks(currentState.blocks())
|
||||
.removeGlobalBlock(discoverySettings.getNoMasterBlock()).build();
|
||||
for (final DiscoveryNode joiningNode : joiningNodes) {
|
||||
final DiscoveryNode existingNode = nodesBuilder.get(joiningNode.getId());
|
||||
if (existingNode != null && existingNode.equals(joiningNode) == false) {
|
||||
logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", existingNode, joiningNode);
|
||||
nodesBuilder.remove(existingNode.getId());
|
||||
final DiscoveryNode nodeWithSameId = nodesBuilder.get(joiningNode.getId());
|
||||
if (nodeWithSameId != null && nodeWithSameId.equals(joiningNode) == false) {
|
||||
logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameId, joiningNode);
|
||||
nodesBuilder.remove(nodeWithSameId.getId());
|
||||
}
|
||||
final DiscoveryNode nodeWithSameAddress = currentNodes.findByAddress(joiningNode.getAddress());
|
||||
if (nodeWithSameAddress != null && nodeWithSameAddress.equals(joiningNode) == false) {
|
||||
logger.debug("removing existing node [{}], which conflicts with incoming join from [{}]", nodeWithSameAddress,
|
||||
joiningNode);
|
||||
nodesBuilder.remove(nodeWithSameAddress.getId());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// now trim any left over dead nodes - either left there when the previous master stepped down
|
||||
// or removed by us above
|
||||
ClusterState tmpState = ClusterState.builder(currentState).nodes(nodesBuilder).blocks(clusterBlocks).build();
|
||||
|
|
|
@ -626,12 +626,15 @@ public class NodeJoinControllerTests extends ESTestCase {
|
|||
|
||||
setState(clusterService, stateBuilder.build());
|
||||
|
||||
final DiscoveryNode restartedNode = new DiscoveryNode(otherNode.getId(),
|
||||
randomBoolean() ? otherNode.getAddress() : buildNewFakeTransportAddress(), otherNode.getAttributes(),
|
||||
otherNode.getRoles(), Version.CURRENT);
|
||||
// conflict on node id or address
|
||||
final DiscoveryNode conflictingNode = randomBoolean() ?
|
||||
new DiscoveryNode(otherNode.getId(), randomBoolean() ? otherNode.getAddress() : buildNewFakeTransportAddress(),
|
||||
otherNode.getAttributes(), otherNode.getRoles(), Version.CURRENT) :
|
||||
new DiscoveryNode("conflicting_address_node", otherNode.getAddress(), otherNode.getAttributes(), otherNode.getRoles(),
|
||||
Version.CURRENT);
|
||||
|
||||
nodeJoinController.startElectionContext();
|
||||
final SimpleFuture joinFuture = joinNodeAsync(restartedNode);
|
||||
final SimpleFuture joinFuture = joinNodeAsync(conflictingNode);
|
||||
final CountDownLatch elected = new CountDownLatch(1);
|
||||
nodeJoinController.waitToBeElectedAsMaster(1, TimeValue.timeValueHours(5), new NodeJoinController.ElectionCallback() {
|
||||
@Override
|
||||
|
@ -655,9 +658,9 @@ public class NodeJoinControllerTests extends ESTestCase {
|
|||
assertTrue(finalNodes.isLocalNodeElectedMaster());
|
||||
assertThat(finalNodes.getLocalNode(), equalTo(masterNode));
|
||||
assertThat(finalNodes.getSize(), equalTo(2));
|
||||
assertThat(finalNodes.get(restartedNode.getId()), equalTo(restartedNode));
|
||||
assertThat(finalNodes.get(conflictingNode.getId()), equalTo(conflictingNode));
|
||||
List<ShardRouting> activeShardsOnRestartedNode =
|
||||
StreamSupport.stream(finalState.getRoutingNodes().node(restartedNode.getId()).spliterator(), false)
|
||||
StreamSupport.stream(finalState.getRoutingNodes().node(conflictingNode.getId()).spliterator(), false)
|
||||
.filter(ShardRouting::active).collect(Collectors.toList());
|
||||
assertThat(activeShardsOnRestartedNode, empty());
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue