Revert "[Discovery] immediately start Master|Node fault detection pinging"
In #6706 we change the master validation to start pining immediately after a new master as ellected or a node joined. The idea is to have a quicker response to failures. This does however create a problem if the new master has yet fully processed it's ellection and responds to the ping with a NoLongerMasterException. This causes the source node to remove the current master and ellect another, only to find out it's not a master either and so forth. We are moving this change to the feature/improve_zen branch, where the improvements we made will cause the situation to be handled properly.
This reverts commit ae16956e07
.
This commit is contained in:
parent
e662d3c535
commit
caf11ff2fb
|
@ -153,9 +153,8 @@ public class MasterFaultDetection extends AbstractComponent {
|
|||
masterPinger.stop();
|
||||
}
|
||||
this.masterPinger = new MasterPinger();
|
||||
|
||||
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
|
||||
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
|
||||
// start the ping process
|
||||
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger);
|
||||
}
|
||||
|
||||
public void stop(String reason) {
|
||||
|
@ -199,8 +198,7 @@ public class MasterFaultDetection extends AbstractComponent {
|
|||
masterPinger.stop();
|
||||
}
|
||||
this.masterPinger = new MasterPinger();
|
||||
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
|
||||
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
|
||||
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger);
|
||||
} catch (Exception e) {
|
||||
logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode);
|
||||
notifyMasterFailure(masterNode, "transport disconnected (with verified connect)");
|
||||
|
|
|
@ -119,8 +119,7 @@ public class NodesFaultDetection extends AbstractComponent {
|
|||
}
|
||||
if (!nodesFD.containsKey(newNode)) {
|
||||
nodesFD.put(newNode, new NodeFD());
|
||||
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
|
||||
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(newNode));
|
||||
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(newNode));
|
||||
}
|
||||
}
|
||||
for (DiscoveryNode removedNode : delta.removedNodes()) {
|
||||
|
@ -166,8 +165,7 @@ public class NodesFaultDetection extends AbstractComponent {
|
|||
try {
|
||||
transportService.connectToNode(node);
|
||||
nodesFD.put(node, new NodeFD());
|
||||
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
|
||||
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(node));
|
||||
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(node));
|
||||
} catch (Exception e) {
|
||||
logger.trace("[node ] [{}] transport disconnected (with verified connect)", node);
|
||||
notifyNodeFailure(node, "transport disconnected (with verified connect)");
|
||||
|
|
Loading…
Reference in New Issue