From 9c98bb3554367c7a09aa6b01c0b7859d8ed3c018 Mon Sep 17 00:00:00 2001 From: Shay Banon Date: Fri, 30 May 2014 00:49:46 +0200 Subject: [PATCH] Have a dedicated join timeout that is higher than ping.timeout for node join Using ping.timeout, which defaults to 3s, to use as a timeout value on the join request a node makes to the master once its discovered can be too small, specifically when there is a large cluster state involved (and by definition, all the buffers and such on the nio layer will be "cold"). Introduce a dedicated join.timeout setting, that by default is 10x the ping.timeout (so 30s by default). closes #6342 --- docs/reference/modules/discovery/zen.asciidoc | 4 +++- .../org/elasticsearch/discovery/zen/ZenDiscovery.java | 8 +++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/docs/reference/modules/discovery/zen.asciidoc b/docs/reference/modules/discovery/zen.asciidoc index 5edc33fea5d..a019e7a6f48 100644 --- a/docs/reference/modules/discovery/zen.asciidoc +++ b/docs/reference/modules/discovery/zen.asciidoc @@ -72,7 +72,9 @@ As part of the initial ping process a master of the cluster is either elected or joined to. This is done automatically. The `discovery.zen.ping_timeout` (which defaults to `3s`) allows to configure the election to handle cases of slow or congested networks -(higher values assure less chance of failure). +(higher values assure less chance of failure). Once a node joins, it + will send a join request to the master (`discovery.zen.join_timeout`) + with a timeout defaulting at 10 times the ping timeout. Nodes can be excluded from becoming a master by setting `node.master` to `false`. Note, once a node is a client node (`node.client` set to diff --git a/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java b/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java index b72436cf212..1c8f8667d41 100644 --- a/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java +++ b/src/main/java/org/elasticsearch/discovery/zen/ZenDiscovery.java @@ -93,6 +93,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen private final TimeValue pingTimeout; + private final TimeValue joinTimeout; // a flag that should be used only for testing private final boolean sendLeaveRequest; @@ -134,12 +135,13 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen // also support direct discovery.zen settings, for cases when it gets extended this.pingTimeout = settings.getAsTime("discovery.zen.ping.timeout", settings.getAsTime("discovery.zen.ping_timeout", componentSettings.getAsTime("ping_timeout", componentSettings.getAsTime("initial_ping_timeout", timeValueSeconds(3))))); + this.joinTimeout = settings.getAsTime("discovery.zen.join_timeout", TimeValue.timeValueMillis(pingTimeout.millis() * 10)); this.sendLeaveRequest = componentSettings.getAsBoolean("send_leave_request", true); this.masterElectionFilterClientNodes = settings.getAsBoolean("discovery.zen.master_election.filter_client", true); this.masterElectionFilterDataNodes = settings.getAsBoolean("discovery.zen.master_election.filter_data", false); - logger.debug("using ping.timeout [{}], master_election.filter_client [{}], master_election.filter_data [{}]", pingTimeout, masterElectionFilterClientNodes, masterElectionFilterDataNodes); + logger.debug("using ping.timeout [{}], join.timeout [{}], master_election.filter_client [{}], master_election.filter_data [{}]", pingTimeout, joinTimeout, masterElectionFilterClientNodes, masterElectionFilterDataNodes); this.electMaster = new ElectMasterService(settings); nodeSettingsService.addListener(new ApplySettings()); @@ -343,7 +345,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen } // send join request try { - membership.sendJoinRequestBlocking(masterNode, localNode, pingTimeout); + membership.sendJoinRequestBlocking(masterNode, localNode, joinTimeout); } catch (Exception e) { if (e instanceof ElasticsearchException) { logger.info("failed to send join request to master [{}], reason [{}]", masterNode, ((ElasticsearchException) e).getDetailedMessage()); @@ -713,7 +715,7 @@ public class ZenDiscovery extends AbstractLifecycleComponent implemen // validate the join request, will throw a failure if it fails, which will get back to the // node calling the join request - membership.sendValidateJoinRequestBlocking(node, state, pingTimeout); + membership.sendValidateJoinRequestBlocking(node, state, joinTimeout); clusterService.submitStateUpdateTask("zen-disco-receive(join from node[" + node + "])", Priority.IMMEDIATE, new ClusterStateUpdateTask() { @Override