default to try and connect after network disconnection with fault detection
This commit is contained in:
parent
e313379ed5
commit
de7dd3c070
|
@ -92,7 +92,7 @@ public class MasterFaultDetection extends AbstractComponent {
|
||||||
this.transportService = transportService;
|
this.transportService = transportService;
|
||||||
this.nodesProvider = nodesProvider;
|
this.nodesProvider = nodesProvider;
|
||||||
|
|
||||||
this.connectOnNetworkDisconnect = componentSettings.getAsBoolean("connect_on_network_disconnect", false);
|
this.connectOnNetworkDisconnect = componentSettings.getAsBoolean("connect_on_network_disconnect", true);
|
||||||
this.pingInterval = componentSettings.getAsTime("ping_interval", timeValueSeconds(1));
|
this.pingInterval = componentSettings.getAsTime("ping_interval", timeValueSeconds(1));
|
||||||
this.pingRetryTimeout = componentSettings.getAsTime("ping_timeout", timeValueSeconds(30));
|
this.pingRetryTimeout = componentSettings.getAsTime("ping_timeout", timeValueSeconds(30));
|
||||||
this.pingRetryCount = componentSettings.getAsInt("ping_retries", 3);
|
this.pingRetryCount = componentSettings.getAsInt("ping_retries", 3);
|
||||||
|
@ -196,6 +196,12 @@ public class MasterFaultDetection extends AbstractComponent {
|
||||||
if (connectOnNetworkDisconnect) {
|
if (connectOnNetworkDisconnect) {
|
||||||
try {
|
try {
|
||||||
transportService.connectToNode(node);
|
transportService.connectToNode(node);
|
||||||
|
// if all is well, make sure we restart the pinger
|
||||||
|
if (masterPinger != null) {
|
||||||
|
masterPinger.stop();
|
||||||
|
}
|
||||||
|
this.masterPinger = new MasterPinger();
|
||||||
|
threadPool.schedule(masterPinger, pingInterval);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode);
|
logger.trace("[master] [{}] transport disconnected (with verified connect)", masterNode);
|
||||||
notifyMasterFailure(masterNode, "transport disconnected (with verified connect)");
|
notifyMasterFailure(masterNode, "transport disconnected (with verified connect)");
|
||||||
|
@ -285,6 +291,10 @@ public class MasterFaultDetection extends AbstractComponent {
|
||||||
if (!running) {
|
if (!running) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (exp instanceof ConnectTransportException) {
|
||||||
|
// ignore this one, we already handle it by registering a connection listener
|
||||||
|
return;
|
||||||
|
}
|
||||||
synchronized (masterNodeMutex) {
|
synchronized (masterNodeMutex) {
|
||||||
// check if the master node did not get switched on us...
|
// check if the master node did not get switched on us...
|
||||||
if (masterToPing.equals(MasterFaultDetection.this.masterNode())) {
|
if (masterToPing.equals(MasterFaultDetection.this.masterNode())) {
|
||||||
|
|
|
@ -84,7 +84,7 @@ public class NodesFaultDetection extends AbstractComponent {
|
||||||
this.threadPool = threadPool;
|
this.threadPool = threadPool;
|
||||||
this.transportService = transportService;
|
this.transportService = transportService;
|
||||||
|
|
||||||
this.connectOnNetworkDisconnect = componentSettings.getAsBoolean("connect_on_network_disconnect", false);
|
this.connectOnNetworkDisconnect = componentSettings.getAsBoolean("connect_on_network_disconnect", true);
|
||||||
this.pingInterval = componentSettings.getAsTime("ping_interval", timeValueSeconds(1));
|
this.pingInterval = componentSettings.getAsTime("ping_interval", timeValueSeconds(1));
|
||||||
this.pingRetryTimeout = componentSettings.getAsTime("ping_timeout", timeValueSeconds(30));
|
this.pingRetryTimeout = componentSettings.getAsTime("ping_timeout", timeValueSeconds(30));
|
||||||
this.pingRetryCount = componentSettings.getAsInt("ping_retries", 3);
|
this.pingRetryCount = componentSettings.getAsInt("ping_retries", 3);
|
||||||
|
@ -163,9 +163,12 @@ public class NodesFaultDetection extends AbstractComponent {
|
||||||
if (!running) {
|
if (!running) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
nodeFD.running = false;
|
||||||
if (connectOnNetworkDisconnect) {
|
if (connectOnNetworkDisconnect) {
|
||||||
try {
|
try {
|
||||||
transportService.connectToNode(node);
|
transportService.connectToNode(node);
|
||||||
|
nodesFD.put(node, new NodeFD());
|
||||||
|
threadPool.schedule(new SendPingRequest(node), pingInterval);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
logger.trace("[node ] [{}] transport disconnected (with verified connect)", node);
|
logger.trace("[node ] [{}] transport disconnected (with verified connect)", node);
|
||||||
notifyNodeFailure(node, "transport disconnected (with verified connect)");
|
notifyNodeFailure(node, "transport disconnected (with verified connect)");
|
||||||
|
@ -210,6 +213,9 @@ public class NodesFaultDetection extends AbstractComponent {
|
||||||
}
|
}
|
||||||
NodeFD nodeFD = nodesFD.get(node);
|
NodeFD nodeFD = nodesFD.get(node);
|
||||||
if (nodeFD != null) {
|
if (nodeFD != null) {
|
||||||
|
if (!nodeFD.running) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
nodeFD.retryCount = 0;
|
nodeFD.retryCount = 0;
|
||||||
threadPool.schedule(SendPingRequest.this, pingInterval);
|
threadPool.schedule(SendPingRequest.this, pingInterval);
|
||||||
}
|
}
|
||||||
|
@ -220,8 +226,15 @@ public class NodesFaultDetection extends AbstractComponent {
|
||||||
if (!running) {
|
if (!running) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
if (exp instanceof ConnectTransportException) {
|
||||||
|
// ignore this one, we already handle it by registering a connection listener
|
||||||
|
return;
|
||||||
|
}
|
||||||
NodeFD nodeFD = nodesFD.get(node);
|
NodeFD nodeFD = nodesFD.get(node);
|
||||||
if (nodeFD != null) {
|
if (nodeFD != null) {
|
||||||
|
if (!nodeFD.running) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
int retryCount = ++nodeFD.retryCount;
|
int retryCount = ++nodeFD.retryCount;
|
||||||
logger.trace("[node ] failed to ping [{}], retry [{}] out of [{}]", exp, node, retryCount, pingRetryCount);
|
logger.trace("[node ] failed to ping [{}], retry [{}] out of [{}]", exp, node, retryCount, pingRetryCount);
|
||||||
if (retryCount >= pingRetryCount) {
|
if (retryCount >= pingRetryCount) {
|
||||||
|
@ -247,6 +260,7 @@ public class NodesFaultDetection extends AbstractComponent {
|
||||||
|
|
||||||
static class NodeFD {
|
static class NodeFD {
|
||||||
volatile int retryCount;
|
volatile int retryCount;
|
||||||
|
volatile boolean running = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
private class FDConnectionListener implements TransportConnectionListener {
|
private class FDConnectionListener implements TransportConnectionListener {
|
||||||
|
|
Loading…
Reference in New Issue