Revert "[Discovery] immediately start Master|Node fault detection pinging"

In #6706 we change the master validation to start pining immediately after a new master as ellected or a node joined. The idea is to have a quicker response to failures. This does however create a problem if the new master has yet fully processed it's ellection and responds to the ping with a NoLongerMasterException. This causes the source node to remove the current master and ellect another, only to find out it's not a master either and so forth. We are moving this change to the feature/improve_zen branch, where the improvements we made will cause the situation to be handled properly.

This reverts commit ae16956e07.
This commit is contained in:
Boaz Leskes 2014-07-08 13:34:24 +02:00
parent e662d3c535
commit caf11ff2fb
2 changed files with 5 additions and 9 deletions

View File

@ -153,9 +153,8 @@ public class MasterFaultDetection extends AbstractComponent {
masterPinger.stop(); masterPinger.stop();
} }
this.masterPinger = new MasterPinger(); this.masterPinger = new MasterPinger();
// start the ping process
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger);
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
} }
public void stop(String reason) { public void stop(String reason) {
@ -199,8 +198,7 @@ public class MasterFaultDetection extends AbstractComponent {
masterPinger.stop(); masterPinger.stop();
} }
this.masterPinger = new MasterPinger(); this.masterPinger = new MasterPinger();
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger);
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
} catch (Exception e) { } catch (Exception e) {
logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode); logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode);
notifyMasterFailure(masterNode, "transport disconnected (with verified connect)"); notifyMasterFailure(masterNode, "transport disconnected (with verified connect)");

View File

@ -119,8 +119,7 @@ public class NodesFaultDetection extends AbstractComponent {
} }
if (!nodesFD.containsKey(newNode)) { if (!nodesFD.containsKey(newNode)) {
nodesFD.put(newNode, new NodeFD()); nodesFD.put(newNode, new NodeFD());
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(newNode));
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(newNode));
} }
} }
for (DiscoveryNode removedNode : delta.removedNodes()) { for (DiscoveryNode removedNode : delta.removedNodes()) {
@ -166,8 +165,7 @@ public class NodesFaultDetection extends AbstractComponent {
try { try {
transportService.connectToNode(node); transportService.connectToNode(node);
nodesFD.put(node, new NodeFD()); nodesFD.put(node, new NodeFD());
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(node));
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(node));
} catch (Exception e) { } catch (Exception e) {
logger.trace("[node ] [{}] transport disconnected (with verified connect)", node); logger.trace("[node ] [{}] transport disconnected (with verified connect)", node);
notifyNodeFailure(node, "transport disconnected (with verified connect)"); notifyNodeFailure(node, "transport disconnected (with verified connect)");