HBASE-8435 Test for zk leak does not account for unsynchronized access to zk watcher: ADDENDUM

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1483117 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2013-05-15 22:52:19 +00:00
parent 6ebd8e3f2a
commit 86aad7e82c
3 changed files with 43 additions and 8 deletions

View File

@ -604,9 +604,8 @@ public class HConnectionManager {
this.clusterId = this.registry.getClusterId();
if (clusterId == null) {
clusterId = HConstants.CLUSTER_ID_DEFAULT;
LOG.debug("clusterid came back null, using default " + clusterId);
}
LOG.info("ClusterId is " + clusterId);
}
@Override

View File

@ -26,6 +26,7 @@ import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.zookeeper.KeeperException;
@ -69,10 +70,28 @@ class ZooKeeperRegistry implements Registry {
}
}
private String clusterId = null;
@Override
public String getClusterId() {
// TODO Auto-generated method stub
return null;
if (this.clusterId != null) return this.clusterId;
// No synchronized here, worse case we will retrieve it twice, that's
// not an issue.
ZooKeeperKeepAliveConnection zkw = null;
try {
zkw = hci.getKeepAliveZooKeeperWatcher();
this.clusterId = ZKClusterId.readClusterIdZNode(zkw);
if (this.clusterId == null) {
LOG.info("ClusterId read in ZooKeeper is null");
}
} catch (KeeperException e) {
LOG.warn("Can't retrieve clusterId from Zookeeper", e);
} catch (IOException e) {
LOG.warn("Can't retrieve clusterId from Zookeeper", e);
} finally {
if (zkw != null) zkw.close();
}
return this.clusterId;
}
@Override

View File

@ -803,13 +803,21 @@ public class TestHCM {
}
/**
* Tests that a closed connection does not have a live zookeeper
* Tests that a destroyed connection does not have a live zookeeper.
* Below is timing based. We put up a connection to a table and then close the connection while
* having a background thread running that is forcing close of the connection to try and
* provoke a close catastrophe; we are hoping for a car crash so we can see if we are leaking
* zk connections.
* @throws Exception
*/
@Test
public void testDeleteForZKConnLeak() throws Exception {
TEST_UTIL.createTable(TABLE_NAME4, FAM_NAM);
final Configuration config = TEST_UTIL.getConfiguration();
final Configuration config = HBaseConfiguration.create(TEST_UTIL.getConfiguration());
config.setInt("zookeeper.recovery.retry", 1);
config.setInt("zookeeper.recovery.retry.intervalmill", 1000);
config.setInt("hbase.rpc.timeout", 2000);
config.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2);
ThreadPoolExecutor pool = new ThreadPoolExecutor(1, 10,
5, TimeUnit.SECONDS,
@ -822,21 +830,30 @@ public class TestHCM {
while (!Thread.interrupted()) {
try {
HConnection conn = HConnectionManager.getConnection(config);
LOG.info("Connection " + conn);
HConnectionManager.deleteStaleConnection(conn);
LOG.info("Connection closed " + conn);
// TODO: This sleep time should be less than the time that it takes to open and close
// a table. Ideally we would do a few runs first to measure. For now this is
// timing based; hopefully we hit the bad condition.
Threads.sleep(10);
} catch (Exception e) {
}
}
}
});
// use connection multiple times
for (int i = 0; i < 50; i++) {
// Use connection multiple times.
for (int i = 0; i < 30; i++) {
HConnection c1 = null;
try {
c1 = HConnectionManager.getConnection(config);
LOG.info("HTable connection " + i + " " + c1);
HTable table = new HTable(TABLE_NAME4, c1, pool);
table.close();
LOG.info("HTable connection " + i + " closed " + c1);
} catch (Exception e) {
LOG.info("We actually want this to happen!!!! So we can see if we are leaking zk", e);
} finally {
if (c1 != null) {
if (c1.isClosed()) {