[Discovery] immediately start Master|Node fault detection pinging

After a node joins the clusters, it starts pinging the master to verify it's health. Before, the cluster join request was processed async and we had to give some time to complete. With  #6480 we changed this to wait for the join process to complete on the master. We can therefore start pinging immediately for fast detection of failures. Similar change can be made to the Node fault detection from the master side.

Closes #6706
This commit is contained in:
Boaz Leskes 2014-07-03 12:24:04 +02:00
parent f22e51ae81
commit ae16956e07
2 changed files with 9 additions and 5 deletions

View File

@ -153,8 +153,9 @@ public class MasterFaultDetection extends AbstractComponent {
masterPinger.stop(); masterPinger.stop();
} }
this.masterPinger = new MasterPinger(); this.masterPinger = new MasterPinger();
// start the ping process
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger); // we use schedule with a 0 time value to run the pinger on the pool as it will run on later
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
} }
public void stop(String reason) { public void stop(String reason) {
@ -198,7 +199,8 @@ public class MasterFaultDetection extends AbstractComponent {
masterPinger.stop(); masterPinger.stop();
} }
this.masterPinger = new MasterPinger(); this.masterPinger = new MasterPinger();
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger); // we use schedule with a 0 time value to run the pinger on the pool as it will run on later
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
} catch (Exception e) { } catch (Exception e) {
logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode); logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode);
notifyMasterFailure(masterNode, "transport disconnected (with verified connect)"); notifyMasterFailure(masterNode, "transport disconnected (with verified connect)");

View File

@ -119,7 +119,8 @@ public class NodesFaultDetection extends AbstractComponent {
} }
if (!nodesFD.containsKey(newNode)) { if (!nodesFD.containsKey(newNode)) {
nodesFD.put(newNode, new NodeFD()); nodesFD.put(newNode, new NodeFD());
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(newNode)); // we use schedule with a 0 time value to run the pinger on the pool as it will run on later
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(newNode));
} }
} }
for (DiscoveryNode removedNode : delta.removedNodes()) { for (DiscoveryNode removedNode : delta.removedNodes()) {
@ -165,7 +166,8 @@ public class NodesFaultDetection extends AbstractComponent {
try { try {
transportService.connectToNode(node); transportService.connectToNode(node);
nodesFD.put(node, new NodeFD()); nodesFD.put(node, new NodeFD());
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(node)); // we use schedule with a 0 time value to run the pinger on the pool as it will run on later
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(node));
} catch (Exception e) { } catch (Exception e) {
logger.trace("[node ] [{}] transport disconnected (with verified connect)", node); logger.trace("[node ] [{}] transport disconnected (with verified connect)", node);
notifyNodeFailure(node, "transport disconnected (with verified connect)"); notifyNodeFailure(node, "transport disconnected (with verified connect)");