[Discovery] verify connect when sending a rejoin cluster request
When a master receives a cluster state from another node, it compares the local cluster state with the one it got. If the local one has a higher version, it sends a JoinClusterRequest to the other master to tell it step down. Because our network layer is asymmetric, we need to make sure we're connected before sending. Closes #6779
This commit is contained in:
parent
8dedbd01df
commit
c9b0816b29
|
@ -554,7 +554,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (master) {
|
if (master) {
|
||||||
logger.debug("received cluster state from [{}] which is also master but with cluster name [{}]", newClusterState.nodes().masterNode(), incomingClusterName);
|
logger.debug("received cluster state from [{}] which is also master with cluster name [{}]", newClusterState.nodes().masterNode(), incomingClusterName);
|
||||||
final ClusterState newState = newClusterState;
|
final ClusterState newState = newClusterState;
|
||||||
clusterService.submitStateUpdateTask("zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]", Priority.URGENT, new ProcessedClusterStateUpdateTask() {
|
clusterService.submitStateUpdateTask("zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]", Priority.URGENT, new ProcessedClusterStateUpdateTask() {
|
||||||
@Override
|
@Override
|
||||||
|
@ -564,12 +564,22 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
|
||||||
return rejoin(currentState, "zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]");
|
return rejoin(currentState, "zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]");
|
||||||
} else {
|
} else {
|
||||||
logger.warn("received cluster state from [{}] which is also master but with an older cluster_state, telling [{}] to rejoin the cluster", newState.nodes().masterNode(), newState.nodes().masterNode());
|
logger.warn("received cluster state from [{}] which is also master but with an older cluster_state, telling [{}] to rejoin the cluster", newState.nodes().masterNode(), newState.nodes().masterNode());
|
||||||
|
|
||||||
|
try {
|
||||||
|
// make sure we're connected to this node (connect to node does nothing if we're already connected)
|
||||||
|
// since the network connections are asymmetric, it may be that we received a state but have disconnected from the node
|
||||||
|
// in the past (after a master failure, for example)
|
||||||
|
transportService.connectToNode(newState.nodes().masterNode());
|
||||||
transportService.sendRequest(newState.nodes().masterNode(), RejoinClusterRequestHandler.ACTION, new RejoinClusterRequest(currentState.nodes().localNodeId()), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
|
transportService.sendRequest(newState.nodes().masterNode(), RejoinClusterRequestHandler.ACTION, new RejoinClusterRequest(currentState.nodes().localNodeId()), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
|
||||||
@Override
|
@Override
|
||||||
public void handleException(TransportException exp) {
|
public void handleException(TransportException exp) {
|
||||||
logger.warn("failed to send rejoin request to [{}]", exp, newState.nodes().masterNode());
|
logger.warn("failed to send rejoin request to [{}]", exp, newState.nodes().masterNode());
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
} catch (Exception e) {
|
||||||
|
logger.warn("failed to send rejoin request to [{}]", e, newState.nodes().masterNode());
|
||||||
|
}
|
||||||
|
|
||||||
return currentState;
|
return currentState;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue