From f31bd00974f526b5a0b244679b9c5f66ee8d2c8c Mon Sep 17 00:00:00 2001 From: Jean-Daniel Cryans Date: Wed, 24 Nov 2010 06:04:55 +0000 Subject: [PATCH] HBASE-3273 Set the ZK default timeout to 3 minutes git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1038489 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + src/docbkx/book.xml | 27 ++++++++++++++++++- .../apache/hadoop/hbase/master/HMaster.java | 2 +- .../hbase/master/HMasterCommandLine.java | 1 - .../hadoop/hbase/zookeeper/HQuorumPeer.java | 4 +++ .../apache/hadoop/hbase/zookeeper/ZKUtil.java | 2 +- src/main/resources/hbase-default.xml | 2 +- 7 files changed, 34 insertions(+), 5 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 394ef893dda..8be01b43308 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1186,6 +1186,7 @@ Release 0.90.0 - Unreleased HBASE-3230 Refresh our hadoop jar and update zookeeper to just-released 3.3.2 HBASE-3231 Update to zookeeper 3.3.2. + HBASE-3273 Set the ZK default timeout to 3 minutes NEW FEATURES diff --git a/src/docbkx/book.xml b/src/docbkx/book.xml index e4c01514516..4cd0f9c6403 100644 --- a/src/docbkx/book.xml +++ b/src/docbkx/book.xml @@ -299,7 +299,10 @@ Usually you'll want to use the latest version available except the problematic u in branch-0.20-append to see list of patches involved. HBase bundles the Apache branch-0.20-append Hadoop. Replace the Hadoop jar bundled with HBase with that you have - installed on your cluster to avoid version mismatch issues. + installed on your cluster to avoid version mismatch issues; + for example, versions of CDH do not have HDFS-724 whereas + Hadoops branch-0.20-append branch does have HDFS-724. This + patch changes the RPC version because protocol was changed.
ssh @@ -984,6 +987,28 @@ to ensure well-formedness of your document after an edit session.
Recommended Configuations +
<varname>zookeeper.session.timeout</varname> + The default timeout is three minutes (specified in milliseconds). This means + that if a server crashes, it will be three minutes before the Master notices + the crash and starts recovery. You might like to tune the timeout down to + a minute or even less so the Master notices failures the sooner. + Before changing this value, be sure you have your JVM garbage collection + configuration under control otherwise, a long garbage collection that lasts + beyond the zookeeper session timeout will take out + your RegionServer (You might be fine with this -- you probably want recovery to start + on the server if a RegionServer has been in GC for a long period of time). + + To change this configuration, edit hbase-site.xml, + copy the changed file around the cluster and restart. + + We set this value high to save our having to field noob questions up on the mailing lists asking + why a RegionServer went down during a massive import. The usual cause is that their JVM is untuned and + they are running into long GC pauses. Our thinking is that + while users are getting familiar with HBase, we'd save them having to know all of its + intricacies. Later when they've built some confidence, then they can play + with configuration such as this. + +
Configuration for large memory machines diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 99fa77ee5b4..cc2da4d942e 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -239,7 +239,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { while (!amm.isActiveMaster()) { LOG.debug("Waiting for master address ZNode to be written " + "(Also watching cluster state node)"); - Thread.sleep(c.getInt("zookeeper.session.timeout", 60 * 1000)); + Thread.sleep(c.getInt("zookeeper.session.timeout", 180 * 1000)); } } diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java b/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java index 56fab0bbb06..f0f85e2e8e6 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMasterCommandLine.java @@ -121,7 +121,6 @@ public class HMasterCommandLine extends ServerCommandLine { if (zkClientPort == 0) { throw new IOException("No config value for hbase.zookeeper.property.clientPort"); } - zooKeeperCluster.setTickTime(conf.getInt("hbase.zookeeper.property.tickTime", 3000)); zooKeeperCluster.setClientPort(zkClientPort); int clientPort = zooKeeperCluster.startup(zkDataPath); if (clientPort != zkClientPort) { diff --git a/src/main/java/org/apache/hadoop/hbase/zookeeper/HQuorumPeer.java b/src/main/java/org/apache/hadoop/hbase/zookeeper/HQuorumPeer.java index d0a0dbcac1d..d551c6fc74a 100644 --- a/src/main/java/org/apache/hadoop/hbase/zookeeper/HQuorumPeer.java +++ b/src/main/java/org/apache/hadoop/hbase/zookeeper/HQuorumPeer.java @@ -125,6 +125,10 @@ public class HQuorumPeer { } } + // Set the max session timeout from the provided client-side timeout + properties.setProperty("maxSessionTimeout", + conf.get("zookeeper.session.timeout", "180000")); + if (myId == -1) { throw new IOException("Could not find my address: " + myAddress + " in list of ZooKeeper quorum servers"); diff --git a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java index 1807fe03d53..1b0430ba16e 100644 --- a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java +++ b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java @@ -91,7 +91,7 @@ public class ZKUtil { if(ensemble == null) { throw new IOException("Unable to determine ZooKeeper ensemble"); } - int timeout = conf.getInt("zookeeper.session.timeout", 60 * 1000); + int timeout = conf.getInt("zookeeper.session.timeout", 180 * 1000); LOG.debug(descriptor + " opening connection to ZooKeeper with ensemble (" + ensemble + ")"); return new ZooKeeper(ensemble, timeout, watcher); diff --git a/src/main/resources/hbase-default.xml b/src/main/resources/hbase-default.xml index c55b7b734d4..0889566b2f4 100644 --- a/src/main/resources/hbase-default.xml +++ b/src/main/resources/hbase-default.xml @@ -451,7 +451,7 @@ zookeeper.session.timeout - 60000 + 180000 ZooKeeper session timeout. HBase passes this to the zk quorum as suggested maximum time for a session. See http://hadoop.apache.org/zookeeper/docs/current/zookeeperProgrammers.html#ch_zkSessions