From 54b514d62a148c120ed902fb4f66d454a25bd1c4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E6=AF=9B=E8=9B=A4=E4=B8=9D?= Date: Wed, 17 Jul 2019 22:08:52 +0800 Subject: [PATCH] HBASE-22700:incorrect timeout in recommended ZooKeeper configuration Signed-off-by: Guanghao Zhang --- hbase-common/src/main/resources/hbase-default.xml | 2 +- src/main/asciidoc/_chapters/configuration.adoc | 2 +- src/main/asciidoc/_chapters/hbase-default.adoc | 2 +- src/main/asciidoc/_chapters/schema_design.adoc | 1 + 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/hbase-common/src/main/resources/hbase-default.xml b/hbase-common/src/main/resources/hbase-default.xml index a1bba0a02c7..1a76626ca34 100644 --- a/hbase-common/src/main/resources/hbase-default.xml +++ b/hbase-common/src/main/resources/hbase-default.xml @@ -380,7 +380,7 @@ possible configurations would overwhelm and obscure the important. But, a region server that connects to an ensemble managed with a different configuration will be subjected that ensemble's maxSessionTimeout. So, even though HBase might propose using 90 seconds, the ensemble can have a max timeout lower than this and it will take - precedence. The current default that ZK ships with is 40 seconds, which is lower than + precedence. The current default maxSessionTimeout that ZK ships with is 40 seconds, which is lower than HBase's. diff --git a/src/main/asciidoc/_chapters/configuration.adoc b/src/main/asciidoc/_chapters/configuration.adoc index dc1ccbb0f75..d091ec57a4b 100644 --- a/src/main/asciidoc/_chapters/configuration.adoc +++ b/src/main/asciidoc/_chapters/configuration.adoc @@ -749,7 +749,7 @@ See link:https://issues.apache.org/jira/browse/HBASE-6389[HBASE-6389 Modify the [[sect.zookeeper.session.timeout]] ===== `zookeeper.session.timeout` -The default timeout is three minutes (specified in milliseconds). This means that if a server crashes, it will be three minutes before the Master notices the crash and starts recovery. +The default timeout is 90 seconds (specified in milliseconds). This means that if a server crashes, it will be 90 seconds before the Master notices the crash and starts recovery. You might need to tune the timeout down to a minute or even less so the Master notices failures sooner. Before changing this value, be sure you have your JVM garbage collection configuration under control, otherwise, a long garbage collection that lasts beyond the ZooKeeper session timeout will take out your RegionServer. (You might be fine with this -- you probably want recovery to start on the server if a RegionServer has been in GC for a long period of time). diff --git a/src/main/asciidoc/_chapters/hbase-default.adoc b/src/main/asciidoc/_chapters/hbase-default.adoc index 0e806e13c9f..9ef177541f2 100644 --- a/src/main/asciidoc/_chapters/hbase-default.adoc +++ b/src/main/asciidoc/_chapters/hbase-default.adoc @@ -465,7 +465,7 @@ ZooKeeper session timeout in milliseconds. It is used in two different ways. session timeout will be the one specified by this configuration. But, a region server that connects to an ensemble managed with a different configuration will be subjected that ensemble's maxSessionTimeout. So, even though HBase might propose using 90 seconds, the ensemble can have a max timeout lower than this and - it will take precedence. The current default that ZK ships with is 40 seconds, which is lower than HBase's. + it will take precedence. The current default maxSessionTimeout that ZK ships with is 40 seconds, which is lower than HBase's. + .Default diff --git a/src/main/asciidoc/_chapters/schema_design.adoc b/src/main/asciidoc/_chapters/schema_design.adoc index fdbd18468c2..f76dd75994b 100644 --- a/src/main/asciidoc/_chapters/schema_design.adoc +++ b/src/main/asciidoc/_chapters/schema_design.adoc @@ -1142,6 +1142,7 @@ Disable Nagle’s algorithm. Delayed ACKs can add up to ~200ms to RPC round trip Detect regionserver failure as fast as reasonable. Set the following parameters: * In `hbase-site.xml`, set `zookeeper.session.timeout` to 30 seconds or less to bound failure detection (20-30 seconds is a good start). +- Notice: the `sessionTimeout` of zookeeper is limited between 2 times and 20 times the `tickTime`(the basic time unit in milliseconds used by ZooKeeper.the default value is 2000ms.It is used to do heartbeats and the minimum session timeout will be twice the tickTime). * Detect and avoid unhealthy or failed HDFS DataNodes: in `hdfs-site.xml` and `hbase-site.xml`, set the following parameters: - `dfs.namenode.avoid.read.stale.datanode = true` - `dfs.namenode.avoid.write.stale.datanode = true`