Have a dedicated join timeout that is higher than ping.timeout for node join
Using ping.timeout, which defaults to 3s, to use as a timeout value on the join request a node makes to the master once its discovered can be too small, specifically when there is a large cluster state involved (and by definition, all the buffers and such on the nio layer will be "cold"). Introduce a dedicated join.timeout setting, that by default is 10x the ping.timeout (so 30s by default). closes #6342
This commit is contained in:
parent
0e2d33b4a4
commit
9c98bb3554
|
@ -72,7 +72,9 @@ As part of the initial ping process a master of the cluster is either
|
||||||
elected or joined to. This is done automatically. The
|
elected or joined to. This is done automatically. The
|
||||||
`discovery.zen.ping_timeout` (which defaults to `3s`) allows to
|
`discovery.zen.ping_timeout` (which defaults to `3s`) allows to
|
||||||
configure the election to handle cases of slow or congested networks
|
configure the election to handle cases of slow or congested networks
|
||||||
(higher values assure less chance of failure).
|
(higher values assure less chance of failure). Once a node joins, it
|
||||||
|
will send a join request to the master (`discovery.zen.join_timeout`)
|
||||||
|
with a timeout defaulting at 10 times the ping timeout.
|
||||||
|
|
||||||
Nodes can be excluded from becoming a master by setting `node.master` to
|
Nodes can be excluded from becoming a master by setting `node.master` to
|
||||||
`false`. Note, once a node is a client node (`node.client` set to
|
`false`. Note, once a node is a client node (`node.client` set to
|
||||||
|
|
|
@ -93,6 +93,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
|
||||||
|
|
||||||
|
|
||||||
private final TimeValue pingTimeout;
|
private final TimeValue pingTimeout;
|
||||||
|
private final TimeValue joinTimeout;
|
||||||
|
|
||||||
// a flag that should be used only for testing
|
// a flag that should be used only for testing
|
||||||
private final boolean sendLeaveRequest;
|
private final boolean sendLeaveRequest;
|
||||||
|
@ -134,12 +135,13 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
|
||||||
|
|
||||||
// also support direct discovery.zen settings, for cases when it gets extended
|
// also support direct discovery.zen settings, for cases when it gets extended
|
||||||
this.pingTimeout = settings.getAsTime("discovery.zen.ping.timeout", settings.getAsTime("discovery.zen.ping_timeout", componentSettings.getAsTime("ping_timeout", componentSettings.getAsTime("initial_ping_timeout", timeValueSeconds(3)))));
|
this.pingTimeout = settings.getAsTime("discovery.zen.ping.timeout", settings.getAsTime("discovery.zen.ping_timeout", componentSettings.getAsTime("ping_timeout", componentSettings.getAsTime("initial_ping_timeout", timeValueSeconds(3)))));
|
||||||
|
this.joinTimeout = settings.getAsTime("discovery.zen.join_timeout", TimeValue.timeValueMillis(pingTimeout.millis() * 10));
|
||||||
this.sendLeaveRequest = componentSettings.getAsBoolean("send_leave_request", true);
|
this.sendLeaveRequest = componentSettings.getAsBoolean("send_leave_request", true);
|
||||||
|
|
||||||
this.masterElectionFilterClientNodes = settings.getAsBoolean("discovery.zen.master_election.filter_client", true);
|
this.masterElectionFilterClientNodes = settings.getAsBoolean("discovery.zen.master_election.filter_client", true);
|
||||||
this.masterElectionFilterDataNodes = settings.getAsBoolean("discovery.zen.master_election.filter_data", false);
|
this.masterElectionFilterDataNodes = settings.getAsBoolean("discovery.zen.master_election.filter_data", false);
|
||||||
|
|
||||||
logger.debug("using ping.timeout [{}], master_election.filter_client [{}], master_election.filter_data [{}]", pingTimeout, masterElectionFilterClientNodes, masterElectionFilterDataNodes);
|
logger.debug("using ping.timeout [{}], join.timeout [{}], master_election.filter_client [{}], master_election.filter_data [{}]", pingTimeout, joinTimeout, masterElectionFilterClientNodes, masterElectionFilterDataNodes);
|
||||||
|
|
||||||
this.electMaster = new ElectMasterService(settings);
|
this.electMaster = new ElectMasterService(settings);
|
||||||
nodeSettingsService.addListener(new ApplySettings());
|
nodeSettingsService.addListener(new ApplySettings());
|
||||||
|
@ -343,7 +345,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
|
||||||
}
|
}
|
||||||
// send join request
|
// send join request
|
||||||
try {
|
try {
|
||||||
membership.sendJoinRequestBlocking(masterNode, localNode, pingTimeout);
|
membership.sendJoinRequestBlocking(masterNode, localNode, joinTimeout);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
if (e instanceof ElasticsearchException) {
|
if (e instanceof ElasticsearchException) {
|
||||||
logger.info("failed to send join request to master [{}], reason [{}]", masterNode, ((ElasticsearchException) e).getDetailedMessage());
|
logger.info("failed to send join request to master [{}], reason [{}]", masterNode, ((ElasticsearchException) e).getDetailedMessage());
|
||||||
|
@ -713,7 +715,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent<Discovery> implemen
|
||||||
|
|
||||||
// validate the join request, will throw a failure if it fails, which will get back to the
|
// validate the join request, will throw a failure if it fails, which will get back to the
|
||||||
// node calling the join request
|
// node calling the join request
|
||||||
membership.sendValidateJoinRequestBlocking(node, state, pingTimeout);
|
membership.sendValidateJoinRequestBlocking(node, state, joinTimeout);
|
||||||
|
|
||||||
clusterService.submitStateUpdateTask("zen-disco-receive(join from node[" + node + "])", Priority.IMMEDIATE, new ClusterStateUpdateTask() {
|
clusterService.submitStateUpdateTask("zen-disco-receive(join from node[" + node + "])", Priority.IMMEDIATE, new ClusterStateUpdateTask() {
|
||||||
@Override
|
@Override
|
||||||
|
|
Loading…
Reference in New Issue