HBASE-3095 Client needs to reconnect if it expires its zk session

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1031176 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jean-Daniel Cryans 2010-11-04 19:37:26 +00:00
parent 0ea21f78bc
commit 03b45ecae9
4 changed files with 56 additions and 9 deletions

View File

@ -647,6 +647,7 @@ Release 0.21.0 - Unreleased
(Stefan Seelmann via Stack)
HBASE-2471 Splitting logs, we'll make an output file though the
region no longer exists
HBASE-3095 Client needs to reconnect if it expires its zk session
IMPROVEMENTS

View File

@ -235,18 +235,18 @@ public class HConnectionManager {
private ZooKeeperWatcher zooKeeper;
// ZooKeeper-based master address tracker
private MasterAddressTracker masterAddressTracker;
private RootRegionTracker rootRegionTracker;
private final Object metaRegionLock = new Object();
private final Object userRegionLock = new Object();
private final Configuration conf;
// Known region HServerAddress.toString() -> HRegionInterface
private final Map<String, HRegionInterface> servers =
new ConcurrentHashMap<String, HRegionInterface>();
private final RootRegionTracker rootRegionTracker;
/**
* Map of table to table {@link HRegionLocation}s. The table key is made
* by doing a {@link Bytes#mapKey(byte[])} of the table's name.
@ -289,6 +289,14 @@ public class HConnectionManager {
this.prefetchRegionLimit = conf.getInt("hbase.client.prefetch.limit",
10);
setupZookeeperTrackers();
this.master = null;
this.masterChecked = false;
}
private synchronized void setupZookeeperTrackers()
throws ZooKeeperConnectionException{
// initialize zookeeper and master address manager
this.zooKeeper = getZooKeeperWatcher();
masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this);
@ -297,9 +305,17 @@ public class HConnectionManager {
this.rootRegionTracker = new RootRegionTracker(this.zooKeeper, this);
this.rootRegionTracker.start();
}
this.master = null;
this.masterChecked = false;
private synchronized void resetZooKeeperTrackers()
throws ZooKeeperConnectionException {
LOG.info("Trying to reconnect to zookeeper");
masterAddressTracker.stop();
masterAddressTracker = null;
rootRegionTracker.stop();
rootRegionTracker = null;
this.zooKeeper = null;
setupZookeeperTrackers();
}
public Configuration getConfiguration() {
@ -1322,6 +1338,21 @@ public class HConnectionManager {
@Override
public void abort(final String msg, Throwable t) {
if (t instanceof KeeperException.SessionExpiredException) {
try {
LOG.info("This client just lost it's session with ZooKeeper, trying" +
" to reconnect.");
resetZooKeeperTrackers();
LOG.info("Reconnected successfully. This disconnect could have been" +
" caused by a network partition or a long-running GC pause," +
" either way it's recommended that you verify your environment.");
return;
} catch (ZooKeeperConnectionException e) {
LOG.error("Could not reconnect to ZooKeeper after session" +
" expiration, aborting");
t = e;
}
}
if (t != null) LOG.fatal(msg, t);
else LOG.fatal(msg);
this.closed = true;

View File

@ -284,7 +284,10 @@ public class ZooKeeperWatcher implements Watcher, Abortable {
/**
* Called when there is a connection-related event via the Watcher callback.
*
* If Disconnected or Expired, this should shutdown the cluster.
* If Disconnected or Expired, this should shutdown the cluster. But, since
* we send a KeeperException.SessionExpiredException along with the abort
* call, it's possible for the Abortable to catch it and try to create a new
* session with ZooKeeper. This is what the client does in HCM.
*
* @param event
*/
@ -322,7 +325,8 @@ public class ZooKeeperWatcher implements Watcher, Abortable {
"ZooKeeper, aborting");
// TODO: One thought is to add call to ZooKeeperListener so say,
// ZooKeperNodeTracker can zero out its data values.
if (this.abortable != null) this.abortable.abort(msg, null);
if (this.abortable != null) this.abortable.abort(msg,
new KeeperException.SessionExpiredException());
break;
}
}

View File

@ -105,9 +105,20 @@ public class TestZooKeeper {
Thread.sleep(sessionTimeout * 3L);
// provoke session expiration by doing something with ZK
ZKUtil.dump(connectionZK);
// Check that the old ZK conenction is closed, means we did expire
System.err.println("ZooKeeper should have timed out");
LOG.info("state=" + connectionZK.getZooKeeper().getState());
Assert.assertTrue(connectionZK.getZooKeeper().getState().equals(States.CLOSED));
Assert.assertTrue(connectionZK.getZooKeeper().getState().equals(
States.CLOSED));
// Check that the client recovered
ZooKeeperWatcher newConnectionZK = connection.getZooKeeperWatcher();
LOG.info("state=" + newConnectionZK.getZooKeeper().getState());
Assert.assertTrue(newConnectionZK.getZooKeeper().getState().equals(
States.CONNECTED));
}
@Test