Decrease leader and follower check timeout (#38298)

Reduces the leader and follower check timeout to 3 * 10 = 30s instead of 3 * 30 = 90s, with 30s still being a very long time for a node to be completely unresponsive.
2019-02-04 15:11:12 +01:00 · 2019-02-04 15:11:12 +01:00 · ece8c659c5
parent 9b64558efb
commit ece8c659c5
3 changed files with 4 additions and 4 deletions
--- a/docs/reference/modules/discovery/discovery-settings.asciidoc
+++ b/docs/reference/modules/discovery/discovery-settings.asciidoc
@ -44,7 +44,7 @@ Discovery and cluster formation are affected by the following settings:
 `cluster.fault_detection.follower_check.timeout`::

    Sets how long the elected master waits for a response to a follower check
-    before considering it to have failed. Defaults to `30s`.
+    before considering it to have failed. Defaults to `10s`.

 `cluster.fault_detection.follower_check.retry_count`::

@ -60,7 +60,7 @@ Discovery and cluster formation are affected by the following settings:
 `cluster.fault_detection.leader_check.timeout`::

    Sets how long each node waits for a response to a leader check from the
-    elected master before considering it to have failed. Defaults to `30s`.
+    elected master before considering it to have failed. Defaults to `10s`.

 `cluster.fault_detection.leader_check.retry_count`::

--- a/server/src/main/java/org/elasticsearch/cluster/coordination/FollowersChecker.java
+++ b/server/src/main/java/org/elasticsearch/cluster/coordination/FollowersChecker.java
@ -77,7 +77,7 @@ public class FollowersChecker {
    // the timeout for each check sent to each node
    public static final Setting<TimeValue> FOLLOWER_CHECK_TIMEOUT_SETTING =
        Setting.timeSetting("cluster.fault_detection.follower_check.timeout",
-            TimeValue.timeValueMillis(30000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope);
+            TimeValue.timeValueMillis(10000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope);

    // the number of failed checks that must happen before the follower is considered to have failed.
    public static final Setting<Integer> FOLLOWER_CHECK_RETRY_COUNT_SETTING =
--- a/server/src/main/java/org/elasticsearch/cluster/coordination/LeaderChecker.java
+++ b/server/src/main/java/org/elasticsearch/cluster/coordination/LeaderChecker.java
@ -71,7 +71,7 @@ public class LeaderChecker {
    // the timeout for each check sent to the leader
    public static final Setting<TimeValue> LEADER_CHECK_TIMEOUT_SETTING =
        Setting.timeSetting("cluster.fault_detection.leader_check.timeout",
-            TimeValue.timeValueMillis(30000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope);
+            TimeValue.timeValueMillis(10000), TimeValue.timeValueMillis(1), Setting.Property.NodeScope);

    // the number of failed checks that must happen before the leader is considered to have failed.
    public static final Setting<Integer> LEADER_CHECK_RETRY_COUNT_SETTING =