[Discovery] verify connect when sending a rejoin cluster request

When a master receives a cluster state from another node, it compares the local cluster state with the one it got. If the local one has a higher version, it sends a JoinClusterRequest to the other master to tell it step down. Because our network layer is asymmetric, we need to make sure we're connected before sending.

Closes #6779
This commit is contained in:
Boaz Leskes 2014-07-08 14:22:39 +02:00
parent 8dedbd01df
commit c9b0816b29
1 changed files with 17 additions and 7 deletions

View File

@ -554,7 +554,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
return; return;
} }
if (master) { if (master) {
logger.debug("received cluster state from [{}] which is also master but with cluster name [{}]", newClusterState.nodes().masterNode(), incomingClusterName); logger.debug("received cluster state from [{}] which is also master with cluster name [{}]", newClusterState.nodes().masterNode(), incomingClusterName);
final ClusterState newState = newClusterState; final ClusterState newState = newClusterState;
clusterService.submitStateUpdateTask("zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]", Priority.URGENT, new ProcessedClusterStateUpdateTask() { clusterService.submitStateUpdateTask("zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]", Priority.URGENT, new ProcessedClusterStateUpdateTask() {
@Override @Override
@ -564,12 +564,22 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
return rejoin(currentState, "zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]"); return rejoin(currentState, "zen-disco-master_receive_cluster_state_from_another_master [" + newState.nodes().masterNode() + "]");
} else { } else {
logger.warn("received cluster state from [{}] which is also master but with an older cluster_state, telling [{}] to rejoin the cluster", newState.nodes().masterNode(), newState.nodes().masterNode()); logger.warn("received cluster state from [{}] which is also master but with an older cluster_state, telling [{}] to rejoin the cluster", newState.nodes().masterNode(), newState.nodes().masterNode());
try {
// make sure we're connected to this node (connect to node does nothing if we're already connected)
// since the network connections are asymmetric, it may be that we received a state but have disconnected from the node
// in the past (after a master failure, for example)
transportService.connectToNode(newState.nodes().masterNode());
transportService.sendRequest(newState.nodes().masterNode(), RejoinClusterRequestHandler.ACTION, new RejoinClusterRequest(currentState.nodes().localNodeId()), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) { transportService.sendRequest(newState.nodes().masterNode(), RejoinClusterRequestHandler.ACTION, new RejoinClusterRequest(currentState.nodes().localNodeId()), new EmptyTransportResponseHandler(ThreadPool.Names.SAME) {
@Override @Override
public void handleException(TransportException exp) { public void handleException(TransportException exp) {
logger.warn("failed to send rejoin request to [{}]", exp, newState.nodes().masterNode()); logger.warn("failed to send rejoin request to [{}]", exp, newState.nodes().masterNode());
} }
}); });
} catch (Exception e) {
logger.warn("failed to send rejoin request to [{}]", e, newState.nodes().masterNode());
}
return currentState; return currentState;
} }
} }