mirror of
https://github.com/honeymoose/OpenSearch.git
synced 2025-02-17 18:35:25 +00:00
Revert "[Discovery] immediately start Master|Node fault detection pinging"
In #6706 we change the master validation to start pining immediately after a new master as ellected or a node joined. The idea is to have a quicker response to failures. This does however create a problem if the new master has yet fully processed it's ellection and responds to the ping with a NoLongerMasterException. This causes the source node to remove the current master and ellect another, only to find out it's not a master either and so forth. We are moving this change to the feature/improve_zen branch, where the improvements we made will cause the situation to be handled properly. This reverts commit ae16956e072bea317ea481f65f2e110dc48fde17.
This commit is contained in:
parent
e662d3c535
commit
caf11ff2fb
@ -153,9 +153,8 @@ public class MasterFaultDetection extends AbstractComponent {
|
||||
masterPinger.stop();
|
||||
}
|
||||
this.masterPinger = new MasterPinger();
|
||||
|
||||
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
|
||||
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
|
||||
// start the ping process
|
||||
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger);
|
||||
}
|
||||
|
||||
public void stop(String reason) {
|
||||
@ -199,8 +198,7 @@ public class MasterFaultDetection extends AbstractComponent {
|
||||
masterPinger.stop();
|
||||
}
|
||||
this.masterPinger = new MasterPinger();
|
||||
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
|
||||
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, masterPinger);
|
||||
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, masterPinger);
|
||||
} catch (Exception e) {
|
||||
logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode);
|
||||
notifyMasterFailure(masterNode, "transport disconnected (with verified connect)");
|
||||
|
@ -119,8 +119,7 @@ public class NodesFaultDetection extends AbstractComponent {
|
||||
}
|
||||
if (!nodesFD.containsKey(newNode)) {
|
||||
nodesFD.put(newNode, new NodeFD());
|
||||
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
|
||||
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(newNode));
|
||||
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(newNode));
|
||||
}
|
||||
}
|
||||
for (DiscoveryNode removedNode : delta.removedNodes()) {
|
||||
@ -166,8 +165,7 @@ public class NodesFaultDetection extends AbstractComponent {
|
||||
try {
|
||||
transportService.connectToNode(node);
|
||||
nodesFD.put(node, new NodeFD());
|
||||
// we use schedule with a 0 time value to run the pinger on the pool as it will run on later
|
||||
threadPool.schedule(TimeValue.timeValueMillis(0), ThreadPool.Names.SAME, new SendPingRequest(node));
|
||||
threadPool.schedule(pingInterval, ThreadPool.Names.SAME, new SendPingRequest(node));
|
||||
} catch (Exception e) {
|
||||
logger.trace("[node ] [{}] transport disconnected (with verified connect)", node);
|
||||
notifyNodeFailure(node, "transport disconnected (with verified connect)");
|
||||
|
Loading…
x
Reference in New Issue
Block a user