Have a dedicated join timeout that is higher than ping.timeout for node join

Using ping.timeout, which defaults to 3s, to use as a timeout value on the join request a node makes to the master once its discovered can be too small, specifically when there is a large cluster state involved (and by definition, all the buffers and such on the nio layer will be "cold"). Introduce a dedicated join.timeout setting, that by default is 10x the ping.timeout (so 30s by default).
closes #6342
This commit is contained in:
Shay Banon 2014-05-30 00:49:46 +02:00
parent 0e2d33b4a4
commit 9c98bb3554
2 changed files with 8 additions and 4 deletions

View File

@ -72,7 +72,9 @@ As part of the initial ping process a master of the cluster is either
elected or joined to. This is done automatically. The
`discovery.zen.ping_timeout` (which defaults to `3s`) allows to
configure the election to handle cases of slow or congested networks
(higher values assure less chance of failure).
(higher values assure less chance of failure). Once a node joins, it
will send a join request to the master (`discovery.zen.join_timeout`)
with a timeout defaulting at 10 times the ping timeout.
Nodes can be excluded from becoming a master by setting `node.master` to
`false`. Note, once a node is a client node (`node.client` set to

View File

@ -93,6 +93,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
private final TimeValue pingTimeout;
private final TimeValue joinTimeout;
// a flag that should be used only for testing
private final boolean sendLeaveRequest;
@ -134,12 +135,13 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
// also support direct discovery.zen settings, for cases when it gets extended
this.pingTimeout = settings.getAsTime("discovery.zen.ping.timeout", settings.getAsTime("discovery.zen.ping_timeout", componentSettings.getAsTime("ping_timeout", componentSettings.getAsTime("initial_ping_timeout", timeValueSeconds(3)))));
this.joinTimeout = settings.getAsTime("discovery.zen.join_timeout", TimeValue.timeValueMillis(pingTimeout.millis() * 10));
this.sendLeaveRequest = componentSettings.getAsBoolean("send_leave_request", true);
this.masterElectionFilterClientNodes = settings.getAsBoolean("discovery.zen.master_election.filter_client", true);
this.masterElectionFilterDataNodes = settings.getAsBoolean("discovery.zen.master_election.filter_data", false);
logger.debug("using ping.timeout [{}], master_election.filter_client [{}], master_election.filter_data [{}]", pingTimeout, masterElectionFilterClientNodes, masterElectionFilterDataNodes);
logger.debug("using ping.timeout [{}], join.timeout [{}], master_election.filter_client [{}], master_election.filter_data [{}]", pingTimeout, joinTimeout, masterElectionFilterClientNodes, masterElectionFilterDataNodes);
this.electMaster = new ElectMasterService(settings);
nodeSettingsService.addListener(new ApplySettings());
@ -343,7 +345,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
}
// send join request
try {
membership.sendJoinRequestBlocking(masterNode, localNode, pingTimeout);
membership.sendJoinRequestBlocking(masterNode, localNode, joinTimeout);
} catch (Exception e) {
if (e instanceof ElasticsearchException) {
logger.info("failed to send join request to master [{}], reason [{}]", masterNode, ((ElasticsearchException) e).getDetailedMessage());
@ -713,7 +715,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
// validate the join request, will throw a failure if it fails, which will get back to the
// node calling the join request
membership.sendValidateJoinRequestBlocking(node, state, pingTimeout);
membership.sendValidateJoinRequestBlocking(node, state, joinTimeout);
clusterService.submitStateUpdateTask("zen-disco-receive(join from node[" + node + "])", Priority.IMMEDIATE, new ClusterStateUpdateTask() {
@Override