From 86aad7e82c81173bd62f30f603425790baa2f9b6 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Wed, 15 May 2013 22:52:19 +0000 Subject: [PATCH] HBASE-8435 Test for zk leak does not account for unsynchronized access to zk watcher: ADDENDUM git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1483117 13f79535-47bb-0310-9956-ffa450edef68 --- .../hbase/client/HConnectionManager.java | 3 +-- .../hbase/client/ZooKeeperRegistry.java | 23 +++++++++++++++-- .../apache/hadoop/hbase/client/TestHCM.java | 25 ++++++++++++++++--- 3 files changed, 43 insertions(+), 8 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java index 763b79fcc7c..6455c07b0d4 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java @@ -604,9 +604,8 @@ public class HConnectionManager { this.clusterId = this.registry.getClusterId(); if (clusterId == null) { clusterId = HConstants.CLUSTER_ID_DEFAULT; + LOG.debug("clusterid came back null, using default " + clusterId); } - - LOG.info("ClusterId is " + clusterId); } @Override diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZooKeeperRegistry.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZooKeeperRegistry.java index dde074093f7..5f6f45cb2b8 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZooKeeperRegistry.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZooKeeperRegistry.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker; +import org.apache.hadoop.hbase.zookeeper.ZKClusterId; import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.zookeeper.KeeperException; @@ -69,10 +70,28 @@ class ZooKeeperRegistry implements Registry { } } + private String clusterId = null; + @Override public String getClusterId() { - // TODO Auto-generated method stub - return null; + if (this.clusterId != null) return this.clusterId; + // No synchronized here, worse case we will retrieve it twice, that's + // not an issue. + ZooKeeperKeepAliveConnection zkw = null; + try { + zkw = hci.getKeepAliveZooKeeperWatcher(); + this.clusterId = ZKClusterId.readClusterIdZNode(zkw); + if (this.clusterId == null) { + LOG.info("ClusterId read in ZooKeeper is null"); + } + } catch (KeeperException e) { + LOG.warn("Can't retrieve clusterId from Zookeeper", e); + } catch (IOException e) { + LOG.warn("Can't retrieve clusterId from Zookeeper", e); + } finally { + if (zkw != null) zkw.close(); + } + return this.clusterId; } @Override diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHCM.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHCM.java index 554e45ffe64..a3ef681513a 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHCM.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestHCM.java @@ -803,13 +803,21 @@ public class TestHCM { } /** - * Tests that a closed connection does not have a live zookeeper + * Tests that a destroyed connection does not have a live zookeeper. + * Below is timing based. We put up a connection to a table and then close the connection while + * having a background thread running that is forcing close of the connection to try and + * provoke a close catastrophe; we are hoping for a car crash so we can see if we are leaking + * zk connections. * @throws Exception */ @Test public void testDeleteForZKConnLeak() throws Exception { TEST_UTIL.createTable(TABLE_NAME4, FAM_NAM); - final Configuration config = TEST_UTIL.getConfiguration(); + final Configuration config = HBaseConfiguration.create(TEST_UTIL.getConfiguration()); + config.setInt("zookeeper.recovery.retry", 1); + config.setInt("zookeeper.recovery.retry.intervalmill", 1000); + config.setInt("hbase.rpc.timeout", 2000); + config.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2); ThreadPoolExecutor pool = new ThreadPoolExecutor(1, 10, 5, TimeUnit.SECONDS, @@ -822,21 +830,30 @@ public class TestHCM { while (!Thread.interrupted()) { try { HConnection conn = HConnectionManager.getConnection(config); + LOG.info("Connection " + conn); HConnectionManager.deleteStaleConnection(conn); + LOG.info("Connection closed " + conn); + // TODO: This sleep time should be less than the time that it takes to open and close + // a table. Ideally we would do a few runs first to measure. For now this is + // timing based; hopefully we hit the bad condition. + Threads.sleep(10); } catch (Exception e) { } } } }); - // use connection multiple times - for (int i = 0; i < 50; i++) { + // Use connection multiple times. + for (int i = 0; i < 30; i++) { HConnection c1 = null; try { c1 = HConnectionManager.getConnection(config); + LOG.info("HTable connection " + i + " " + c1); HTable table = new HTable(TABLE_NAME4, c1, pool); table.close(); + LOG.info("HTable connection " + i + " closed " + c1); } catch (Exception e) { + LOG.info("We actually want this to happen!!!! So we can see if we are leaking zk", e); } finally { if (c1 != null) { if (c1.isClosed()) {