[Discovery] Handle ConnectionTransportException during a Master/Node fault detection ping

Both the Master and Node fault detection register themselves to be notified when a node disconnects to be able to respond to it accordingly. As such, when a ConnectionTransportException was raised on a ping request, it was not handled as it is already handled somewhere else. However, this does introduce a racing condition, if the disconnect  happen during a period where there is no current master (minimum_master_node breach) at which time the fault detection is not active. In this case, we will only discover the disconnect error during the ping request, so we have to respond accordingly.

Closes #6686
This commit is contained in:
Boaz Leskes 2014-07-02 16:27:35 +02:00
parent 3b959706b3
commit 8909a77724
2 changed files with 14 additions and 11 deletions

View File

@ -296,14 +296,13 @@ public class MasterFaultDetection extends AbstractComponent {
if (!running) {
return;
}
if (exp instanceof ConnectTransportException) {
// ignore this one, we already handle it by registering a connection listener
return;
}
synchronized (masterNodeMutex) {
// check if the master node did not get switched on us...
if (masterToPing.equals(MasterFaultDetection.this.masterNode())) {
if (exp.getCause() instanceof NoLongerMasterException) {
if (exp instanceof ConnectTransportException) {
handleTransportDisconnect(masterToPing);
return;
} else if (exp.getCause() instanceof NoLongerMasterException) {
logger.debug("[master] pinging a master {} that is no longer a master", masterNode);
notifyMasterFailure(masterToPing, "no longer master");
return;
@ -316,6 +315,7 @@ public class MasterFaultDetection extends AbstractComponent {
notifyMasterFailure(masterToPing, "do not exists on master, act as master failure");
return;
}
int retryCount = ++MasterFaultDetection.this.retryCount;
logger.trace("[master] failed to ping [{}], retry [{}] out of [{}]", exp, masterNode, retryCount, pingRetryCount);
if (retryCount >= pingRetryCount) {
@ -334,7 +334,8 @@ public class MasterFaultDetection extends AbstractComponent {
public String executor() {
return ThreadPool.Names.SAME;
}
});
}
);
}
}

View File

@ -228,15 +228,16 @@ public class NodesFaultDetection extends AbstractComponent {
if (!running) {
return;
}
if (exp instanceof ConnectTransportException) {
// ignore this one, we already handle it by registering a connection listener
return;
}
NodeFD nodeFD = nodesFD.get(node);
if (nodeFD != null) {
if (!nodeFD.running) {
return;
}
if (exp instanceof ConnectTransportException) {
handleTransportDisconnect(node);
return;
}
int retryCount = ++nodeFD.retryCount;
logger.trace("[node ] failed to ping [{}], retry [{}] out of [{}]", exp, node, retryCount, pingRetryCount);
if (retryCount >= pingRetryCount) {
@ -257,7 +258,8 @@ public class NodesFaultDetection extends AbstractComponent {
public String executor() {
return ThreadPool.Names.SAME;
}
});
}
);
}
}