HBASE-3095 Client needs to reconnect if it expires its zk session
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1031176 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0ea21f78bc
commit
03b45ecae9
|
@ -647,6 +647,7 @@ Release 0.21.0 - Unreleased
|
|||
(Stefan Seelmann via Stack)
|
||||
HBASE-2471 Splitting logs, we'll make an output file though the
|
||||
region no longer exists
|
||||
HBASE-3095 Client needs to reconnect if it expires its zk session
|
||||
|
||||
|
||||
IMPROVEMENTS
|
||||
|
|
|
@ -235,18 +235,18 @@ public class HConnectionManager {
|
|||
private ZooKeeperWatcher zooKeeper;
|
||||
// ZooKeeper-based master address tracker
|
||||
private MasterAddressTracker masterAddressTracker;
|
||||
|
||||
private RootRegionTracker rootRegionTracker;
|
||||
|
||||
private final Object metaRegionLock = new Object();
|
||||
|
||||
private final Object userRegionLock = new Object();
|
||||
|
||||
private final Configuration conf;
|
||||
|
||||
// Known region HServerAddress.toString() -> HRegionInterface
|
||||
|
||||
private final Map<String, HRegionInterface> servers =
|
||||
new ConcurrentHashMap<String, HRegionInterface>();
|
||||
|
||||
private final RootRegionTracker rootRegionTracker;
|
||||
|
||||
/**
|
||||
* Map of table to table {@link HRegionLocation}s. The table key is made
|
||||
* by doing a {@link Bytes#mapKey(byte[])} of the table's name.
|
||||
|
@ -289,6 +289,14 @@ public class HConnectionManager {
|
|||
this.prefetchRegionLimit = conf.getInt("hbase.client.prefetch.limit",
|
||||
10);
|
||||
|
||||
setupZookeeperTrackers();
|
||||
|
||||
this.master = null;
|
||||
this.masterChecked = false;
|
||||
}
|
||||
|
||||
private synchronized void setupZookeeperTrackers()
|
||||
throws ZooKeeperConnectionException{
|
||||
// initialize zookeeper and master address manager
|
||||
this.zooKeeper = getZooKeeperWatcher();
|
||||
masterAddressTracker = new MasterAddressTracker(this.zooKeeper, this);
|
||||
|
@ -297,9 +305,17 @@ public class HConnectionManager {
|
|||
|
||||
this.rootRegionTracker = new RootRegionTracker(this.zooKeeper, this);
|
||||
this.rootRegionTracker.start();
|
||||
}
|
||||
|
||||
this.master = null;
|
||||
this.masterChecked = false;
|
||||
private synchronized void resetZooKeeperTrackers()
|
||||
throws ZooKeeperConnectionException {
|
||||
LOG.info("Trying to reconnect to zookeeper");
|
||||
masterAddressTracker.stop();
|
||||
masterAddressTracker = null;
|
||||
rootRegionTracker.stop();
|
||||
rootRegionTracker = null;
|
||||
this.zooKeeper = null;
|
||||
setupZookeeperTrackers();
|
||||
}
|
||||
|
||||
public Configuration getConfiguration() {
|
||||
|
@ -1322,6 +1338,21 @@ public class HConnectionManager {
|
|||
|
||||
@Override
|
||||
public void abort(final String msg, Throwable t) {
|
||||
if (t instanceof KeeperException.SessionExpiredException) {
|
||||
try {
|
||||
LOG.info("This client just lost it's session with ZooKeeper, trying" +
|
||||
" to reconnect.");
|
||||
resetZooKeeperTrackers();
|
||||
LOG.info("Reconnected successfully. This disconnect could have been" +
|
||||
" caused by a network partition or a long-running GC pause," +
|
||||
" either way it's recommended that you verify your environment.");
|
||||
return;
|
||||
} catch (ZooKeeperConnectionException e) {
|
||||
LOG.error("Could not reconnect to ZooKeeper after session" +
|
||||
" expiration, aborting");
|
||||
t = e;
|
||||
}
|
||||
}
|
||||
if (t != null) LOG.fatal(msg, t);
|
||||
else LOG.fatal(msg);
|
||||
this.closed = true;
|
||||
|
|
|
@ -284,7 +284,10 @@ public class ZooKeeperWatcher implements Watcher, Abortable {
|
|||
/**
|
||||
* Called when there is a connection-related event via the Watcher callback.
|
||||
*
|
||||
* If Disconnected or Expired, this should shutdown the cluster.
|
||||
* If Disconnected or Expired, this should shutdown the cluster. But, since
|
||||
* we send a KeeperException.SessionExpiredException along with the abort
|
||||
* call, it's possible for the Abortable to catch it and try to create a new
|
||||
* session with ZooKeeper. This is what the client does in HCM.
|
||||
*
|
||||
* @param event
|
||||
*/
|
||||
|
@ -322,7 +325,8 @@ public class ZooKeeperWatcher implements Watcher, Abortable {
|
|||
"ZooKeeper, aborting");
|
||||
// TODO: One thought is to add call to ZooKeeperListener so say,
|
||||
// ZooKeperNodeTracker can zero out its data values.
|
||||
if (this.abortable != null) this.abortable.abort(msg, null);
|
||||
if (this.abortable != null) this.abortable.abort(msg,
|
||||
new KeeperException.SessionExpiredException());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -105,9 +105,20 @@ public class TestZooKeeper {
|
|||
|
||||
Thread.sleep(sessionTimeout * 3L);
|
||||
|
||||
// provoke session expiration by doing something with ZK
|
||||
ZKUtil.dump(connectionZK);
|
||||
|
||||
// Check that the old ZK conenction is closed, means we did expire
|
||||
System.err.println("ZooKeeper should have timed out");
|
||||
LOG.info("state=" + connectionZK.getZooKeeper().getState());
|
||||
Assert.assertTrue(connectionZK.getZooKeeper().getState().equals(States.CLOSED));
|
||||
Assert.assertTrue(connectionZK.getZooKeeper().getState().equals(
|
||||
States.CLOSED));
|
||||
|
||||
// Check that the client recovered
|
||||
ZooKeeperWatcher newConnectionZK = connection.getZooKeeperWatcher();
|
||||
LOG.info("state=" + newConnectionZK.getZooKeeper().getState());
|
||||
Assert.assertTrue(newConnectionZK.getZooKeeper().getState().equals(
|
||||
States.CONNECTED));
|
||||
}
|
||||
|
||||
@Test
|
||||
|
|
Loading…
Reference in New Issue