[Discovery] immediately start Master|Node fault detection pinging
After a node joins the clusters, it starts pinging the master to verify it's health. Before, the cluster join request was processed async and we had to give some time to complete. With #6480 we changed this to wait for the join process to complete on the master. We can therefore start pinging immediately for fast detection of failures. Similar change can be made to the Node fault detection from the master side. Closes #6706
This commit is contained in:
parent
48c7da1fd4
commit
5302a53145
|
@ -155,8 +155,9 @@ public class MasterFaultDetection extends AbstractComponent {
|
|||
masterPinger.stop();
|
||||
}
|
||||
this.masterPinger = new MasterPinger();
|
||||
// start the ping process
|
||||
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger);
|
||||
|
||||
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
|
||||
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
|
||||
}
|
||||
|
||||
public void stop(String reason) {
|
||||
|
@ -200,7 +201,8 @@ public class MasterFaultDetection extends AbstractComponent {
|
|||
masterPinger.stop();
|
||||
}
|
||||
this.masterPinger = new MasterPinger();
|
||||
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger);
|
||||
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
|
||||
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
|
||||
} catch (Exception e) {
|
||||
logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode);
|
||||
notifyMasterFailure(masterNode, "transport disconnected (with verified connect)");
|
||||
|
|
|
@ -121,7 +121,8 @@ public class NodesFaultDetection extends AbstractComponent {
|
|||
}
|
||||
if (!nodesFD.containsKey(newNode)) {
|
||||
nodesFD.put(newNode, new NodeFD());
|
||||
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(newNode));
|
||||
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
|
||||
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(newNode));
|
||||
}
|
||||
}
|
||||
for (DiscoveryNode removedNode : delta.removedNodes()) {
|
||||
|
@ -167,7 +168,8 @@ public class NodesFaultDetection extends AbstractComponent {
|
|||
try {
|
||||
transportService.connectToNode(node);
|
||||
nodesFD.put(node, new NodeFD());
|
||||
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(node));
|
||||
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
|
||||
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(node));
|
||||
} catch (Exception e) {
|
||||
logger.trace("[node ] [{}] transport disconnected (with verified connect)", node);
|
||||
notifyNodeFailure(node, "transport disconnected (with verified connect)");
|
||||
|
|
Loading…
Reference in New Issue