HBASE-8435 Test for zk leak does not account for unsynchronized access to zk watcher: ADDENDUM
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1483117 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
6ebd8e3f2a
commit
86aad7e82c
|
@ -604,9 +604,8 @@ public class HConnectionManager {
|
||||||
this.clusterId = this.registry.getClusterId();
|
this.clusterId = this.registry.getClusterId();
|
||||||
if (clusterId == null) {
|
if (clusterId == null) {
|
||||||
clusterId = HConstants.CLUSTER_ID_DEFAULT;
|
clusterId = HConstants.CLUSTER_ID_DEFAULT;
|
||||||
|
LOG.debug("clusterid came back null, using default " + clusterId);
|
||||||
}
|
}
|
||||||
|
|
||||||
LOG.info("ClusterId is " + clusterId);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -26,6 +26,7 @@ import org.apache.hadoop.hbase.HRegionLocation;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.util.Bytes;
|
import org.apache.hadoop.hbase.util.Bytes;
|
||||||
import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
|
import org.apache.hadoop.hbase.zookeeper.MetaRegionTracker;
|
||||||
|
import org.apache.hadoop.hbase.zookeeper.ZKClusterId;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
|
import org.apache.hadoop.hbase.zookeeper.ZKTableReadOnly;
|
||||||
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
|
||||||
import org.apache.zookeeper.KeeperException;
|
import org.apache.zookeeper.KeeperException;
|
||||||
|
@ -69,10 +70,28 @@ class ZooKeeperRegistry implements Registry {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private String clusterId = null;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getClusterId() {
|
public String getClusterId() {
|
||||||
// TODO Auto-generated method stub
|
if (this.clusterId != null) return this.clusterId;
|
||||||
return null;
|
// No synchronized here, worse case we will retrieve it twice, that's
|
||||||
|
// not an issue.
|
||||||
|
ZooKeeperKeepAliveConnection zkw = null;
|
||||||
|
try {
|
||||||
|
zkw = hci.getKeepAliveZooKeeperWatcher();
|
||||||
|
this.clusterId = ZKClusterId.readClusterIdZNode(zkw);
|
||||||
|
if (this.clusterId == null) {
|
||||||
|
LOG.info("ClusterId read in ZooKeeper is null");
|
||||||
|
}
|
||||||
|
} catch (KeeperException e) {
|
||||||
|
LOG.warn("Can't retrieve clusterId from Zookeeper", e);
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn("Can't retrieve clusterId from Zookeeper", e);
|
||||||
|
} finally {
|
||||||
|
if (zkw != null) zkw.close();
|
||||||
|
}
|
||||||
|
return this.clusterId;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -803,13 +803,21 @@ public class TestHCM {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Tests that a closed connection does not have a live zookeeper
|
* Tests that a destroyed connection does not have a live zookeeper.
|
||||||
|
* Below is timing based. We put up a connection to a table and then close the connection while
|
||||||
|
* having a background thread running that is forcing close of the connection to try and
|
||||||
|
* provoke a close catastrophe; we are hoping for a car crash so we can see if we are leaking
|
||||||
|
* zk connections.
|
||||||
* @throws Exception
|
* @throws Exception
|
||||||
*/
|
*/
|
||||||
@Test
|
@Test
|
||||||
public void testDeleteForZKConnLeak() throws Exception {
|
public void testDeleteForZKConnLeak() throws Exception {
|
||||||
TEST_UTIL.createTable(TABLE_NAME4, FAM_NAM);
|
TEST_UTIL.createTable(TABLE_NAME4, FAM_NAM);
|
||||||
final Configuration config = TEST_UTIL.getConfiguration();
|
final Configuration config = HBaseConfiguration.create(TEST_UTIL.getConfiguration());
|
||||||
|
config.setInt("zookeeper.recovery.retry", 1);
|
||||||
|
config.setInt("zookeeper.recovery.retry.intervalmill", 1000);
|
||||||
|
config.setInt("hbase.rpc.timeout", 2000);
|
||||||
|
config.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, 2);
|
||||||
|
|
||||||
ThreadPoolExecutor pool = new ThreadPoolExecutor(1, 10,
|
ThreadPoolExecutor pool = new ThreadPoolExecutor(1, 10,
|
||||||
5, TimeUnit.SECONDS,
|
5, TimeUnit.SECONDS,
|
||||||
|
@ -822,21 +830,30 @@ public class TestHCM {
|
||||||
while (!Thread.interrupted()) {
|
while (!Thread.interrupted()) {
|
||||||
try {
|
try {
|
||||||
HConnection conn = HConnectionManager.getConnection(config);
|
HConnection conn = HConnectionManager.getConnection(config);
|
||||||
|
LOG.info("Connection " + conn);
|
||||||
HConnectionManager.deleteStaleConnection(conn);
|
HConnectionManager.deleteStaleConnection(conn);
|
||||||
|
LOG.info("Connection closed " + conn);
|
||||||
|
// TODO: This sleep time should be less than the time that it takes to open and close
|
||||||
|
// a table. Ideally we would do a few runs first to measure. For now this is
|
||||||
|
// timing based; hopefully we hit the bad condition.
|
||||||
|
Threads.sleep(10);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// use connection multiple times
|
// Use connection multiple times.
|
||||||
for (int i = 0; i < 50; i++) {
|
for (int i = 0; i < 30; i++) {
|
||||||
HConnection c1 = null;
|
HConnection c1 = null;
|
||||||
try {
|
try {
|
||||||
c1 = HConnectionManager.getConnection(config);
|
c1 = HConnectionManager.getConnection(config);
|
||||||
|
LOG.info("HTable connection " + i + " " + c1);
|
||||||
HTable table = new HTable(TABLE_NAME4, c1, pool);
|
HTable table = new HTable(TABLE_NAME4, c1, pool);
|
||||||
table.close();
|
table.close();
|
||||||
|
LOG.info("HTable connection " + i + " closed " + c1);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
LOG.info("We actually want this to happen!!!! So we can see if we are leaking zk", e);
|
||||||
} finally {
|
} finally {
|
||||||
if (c1 != null) {
|
if (c1 != null) {
|
||||||
if (c1.isClosed()) {
|
if (c1.isClosed()) {
|
||||||
|
|
Loading…
Reference in New Issue