HBASE-3273 Set the ZK default timeout to 3 minutes
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1038489 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
700b396a3b
commit
f31bd00974
|
@ -1186,6 +1186,7 @@ Release 0.90.0 - Unreleased
|
|||
HBASE-3230 Refresh our hadoop jar and update zookeeper to
|
||||
just-released 3.3.2
|
||||
HBASE-3231 Update to zookeeper 3.3.2.
|
||||
HBASE-3273 Set the ZK default timeout to 3 minutes
|
||||
|
||||
|
||||
NEW FEATURES
|
||||
|
|
|
@ -299,7 +299,10 @@ Usually you'll want to use the latest version available except the problematic u
|
|||
in branch-0.20-append to see list of patches involved.</para>
|
||||
<para>HBase bundles the Apache branch-0.20-append Hadoop.
|
||||
Replace the Hadoop jar bundled with HBase with that you have
|
||||
installed on your cluster to avoid version mismatch issues.
|
||||
installed on your cluster to avoid version mismatch issues;
|
||||
for example, versions of CDH do not have HDFS-724 whereas
|
||||
Hadoops branch-0.20-append branch does have HDFS-724. This
|
||||
patch changes the RPC version because protocol was changed.
|
||||
</para>
|
||||
</section>
|
||||
<section xml:id="ssh"> <title>ssh</title>
|
||||
|
@ -984,6 +987,28 @@ to ensure well-formedness of your document after an edit session.
|
|||
</section>
|
||||
|
||||
<section xml:id="recommended_configurations"><title>Recommended Configuations</title>
|
||||
<section xml:id="zookeeper.session.timeout"><title><varname>zookeeper.session.timeout</varname></title>
|
||||
<para>The default timeout is three minutes (specified in milliseconds). This means
|
||||
that if a server crashes, it will be three minutes before the Master notices
|
||||
the crash and starts recovery. You might like to tune the timeout down to
|
||||
a minute or even less so the Master notices failures the sooner.
|
||||
Before changing this value, be sure you have your JVM garbage collection
|
||||
configuration under control otherwise, a long garbage collection that lasts
|
||||
beyond the zookeeper session timeout will take out
|
||||
your RegionServer (You might be fine with this -- you probably want recovery to start
|
||||
on the server if a RegionServer has been in GC for a long period of time).</para>
|
||||
|
||||
<para>To change this configuration, edit <filename>hbase-site.xml</filename>,
|
||||
copy the changed file around the cluster and restart.</para>
|
||||
|
||||
<para>We set this value high to save our having to field noob questions up on the mailing lists asking
|
||||
why a RegionServer went down during a massive import. The usual cause is that their JVM is untuned and
|
||||
they are running into long GC pauses. Our thinking is that
|
||||
while users are getting familiar with HBase, we'd save them having to know all of its
|
||||
intricacies. Later when they've built some confidence, then they can play
|
||||
with configuration such as this.
|
||||
</para>
|
||||
</section>
|
||||
<section xml:id="big_memory">
|
||||
<title>Configuration for large memory machines</title>
|
||||
<para>
|
||||
|
|
|
@ -239,7 +239,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server {
|
|||
while (!amm.isActiveMaster()) {
|
||||
LOG.debug("Waiting for master address ZNode to be written " +
|
||||
"(Also watching cluster state node)");
|
||||
Thread.sleep(c.getInt("zookeeper.session.timeout", 60 * 1000));
|
||||
Thread.sleep(c.getInt("zookeeper.session.timeout", 180 * 1000));
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -121,7 +121,6 @@ public class HMasterCommandLine extends ServerCommandLine {
|
|||
if (zkClientPort == 0) {
|
||||
throw new IOException("No config value for hbase.zookeeper.property.clientPort");
|
||||
}
|
||||
zooKeeperCluster.setTickTime(conf.getInt("hbase.zookeeper.property.tickTime", 3000));
|
||||
zooKeeperCluster.setClientPort(zkClientPort);
|
||||
int clientPort = zooKeeperCluster.startup(zkDataPath);
|
||||
if (clientPort != zkClientPort) {
|
||||
|
|
|
@ -125,6 +125,10 @@ public class HQuorumPeer {
|
|||
}
|
||||
}
|
||||
|
||||
// Set the max session timeout from the provided client-side timeout
|
||||
properties.setProperty("maxSessionTimeout",
|
||||
conf.get("zookeeper.session.timeout", "180000"));
|
||||
|
||||
if (myId == -1) {
|
||||
throw new IOException("Could not find my address: " + myAddress +
|
||||
" in list of ZooKeeper quorum servers");
|
||||
|
|
|
@ -91,7 +91,7 @@ public class ZKUtil {
|
|||
if(ensemble == null) {
|
||||
throw new IOException("Unable to determine ZooKeeper ensemble");
|
||||
}
|
||||
int timeout = conf.getInt("zookeeper.session.timeout", 60 * 1000);
|
||||
int timeout = conf.getInt("zookeeper.session.timeout", 180 * 1000);
|
||||
LOG.debug(descriptor + " opening connection to ZooKeeper with ensemble (" +
|
||||
ensemble + ")");
|
||||
return new ZooKeeper(ensemble, timeout, watcher);
|
||||
|
|
|
@ -451,7 +451,7 @@
|
|||
</property>
|
||||
<property>
|
||||
<name>zookeeper.session.timeout</name>
|
||||
<value>60000</value>
|
||||
<value>180000</value>
|
||||
<description>ZooKeeper session timeout.
|
||||
HBase passes this to the zk quorum as suggested maximum time for a
|
||||
session. See http://hadoop.apache.org/zookeeper/docs/current/zookeeperProgrammers.html#ch_zkSessions
|
||||
|
|
Loading…
Reference in New Issue