From 5e4f28c2892052325498c48024445bc9fb329462 Mon Sep 17 00:00:00 2001 From: Zhihong Yu Date: Fri, 29 Jul 2011 13:46:11 +0000 Subject: [PATCH] HBASE-4138 If zookeeper.znode.parent is not specifed explicitly in Client code then HTable object loops continuously waiting for the root region by using /hbase as the base node.(ramkrishna.s.vasudevan) git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1152220 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 3 +++ .../hbase/client/HConnectionManager.java | 11 ++++++++- .../apache/hadoop/hbase/master/HMaster.java | 4 ++-- .../hbase/regionserver/HRegionServer.java | 6 +++++ .../hbase/zookeeper/RootRegionTracker.java | 8 ++++++- .../hbase/zookeeper/ZooKeeperNodeTracker.java | 24 +++++++++++++++++++ .../hbase/zookeeper/ZooKeeperWatcher.java | 21 ++++++++++++++-- .../hbase/catalog/TestCatalogTracker.java | 4 ++-- .../hbase/master/TestRestartCluster.java | 4 ++-- .../handler/TestOpenRegionHandler.java | 2 +- .../hbase/replication/TestReplication.java | 4 ++-- .../hadoop/hbase/zookeeper/TestZKTable.java | 2 +- 12 files changed, 79 insertions(+), 14 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index d1ed19766c3..5bcadfbf3a7 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -178,6 +178,9 @@ Release 0.91.0 - Unreleased HBASE-4083 If Enable table is not completed and is partial, then scanning of the table is not working (ramkrishna.s.vasudevan) HBASE-4144 RS does not abort if the initialization of RS fails (ramkrishna.s.vasudevan) + HBASE-4138 If zookeeper.znode.parent is not specifed explicitly in Client + code then HTable object loops continuously waiting for the root region + by using /hbase as the base node.(ramkrishna.s.vasudevan) IMPROVEMENTS HBASE-3290 Max Compaction Size (Nicolas Spiegelberg via Stack) diff --git a/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java b/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java index 9e4b0218973..6fb53817ce6 100644 --- a/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java +++ b/src/main/java/org/apache/hadoop/hbase/client/HConnectionManager.java @@ -570,7 +570,7 @@ public class HConnectionManager { return master; } } - + checkIfBaseNodeAvailable(); ServerName sn = null; synchronized (this.masterLock) { for (int tries = 0; @@ -632,6 +632,15 @@ public class HConnectionManager { return this.master; } + private void checkIfBaseNodeAvailable() throws MasterNotRunningException { + if (false == masterAddressTracker.checkIfBaseNodeAvailable()) { + String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. " + + "There could be a mismatch with the one configured in the master."; + LOG.error(errorMsg); + throw new MasterNotRunningException(errorMsg); + } + } + public boolean isMasterRunning() throws MasterNotRunningException, ZooKeeperConnectionException { if (this.master == null) { diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index b727c7c8b57..8beeb68d8cb 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -238,7 +238,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { if (this.conf.get("mapred.task.id") == null) { this.conf.set("mapred.task.id", "hb_m_" + this.serverName.toString()); } - this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":" + isa.getPort(), this); + this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":" + isa.getPort(), this, true); this.metrics = new MasterMetrics(getServerName().toString()); } @@ -1218,7 +1218,7 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { private boolean tryRecoveringExpiredZKSession() throws InterruptedException, IOException, KeeperException { this.zooKeeper = new ZooKeeperWatcher(conf, MASTER + ":" - + this.serverName.getPort(), this); + + this.serverName.getPort(), this, true); MonitoredTask status = TaskMonitor.get().createStatus("Recovering expired ZK session"); diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 34ca38040b3..e87eb3ef3fa 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -555,6 +555,12 @@ public class HRegionServer implements HRegionInterface, HBaseRPCErrorHandler, */ private void blockAndCheckIfStopped(ZooKeeperNodeTracker tracker) throws IOException, InterruptedException { + if (false == tracker.checkIfBaseNodeAvailable()) { + String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. " + + "There could be a mismatch with the one configured in the master."; + LOG.error(errorMsg); + abort(errorMsg); + } while (tracker.blockUntilAvailable(this.msgInterval) == null) { if (this.stopped) { throw new IOException("Received the shutdown message while waiting."); diff --git a/src/main/java/org/apache/hadoop/hbase/zookeeper/RootRegionTracker.java b/src/main/java/org/apache/hadoop/hbase/zookeeper/RootRegionTracker.java index 1b1226454cb..51f77258fc0 100644 --- a/src/main/java/org/apache/hadoop/hbase/zookeeper/RootRegionTracker.java +++ b/src/main/java/org/apache/hadoop/hbase/zookeeper/RootRegionTracker.java @@ -71,6 +71,12 @@ public class RootRegionTracker extends ZooKeeperNodeTracker { */ public ServerName waitRootRegionLocation(long timeout) throws InterruptedException { + if (false == checkIfBaseNodeAvailable()) { + String errorMsg = "Check the value configured in 'zookeeper.znode.parent'. " + + "There could be a mismatch with the one configured in the master."; + LOG.error(errorMsg); + throw new IllegalArgumentException(errorMsg); + } return dataToServerName(super.blockUntilAvailable(timeout)); } @@ -95,4 +101,4 @@ public class RootRegionTracker extends ZooKeeperNodeTracker { int port = Addressing.parsePort(str); return new ServerName(hostname, port, -1L); } -} \ No newline at end of file +} diff --git a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java index 131aba33aeb..a88be2981a1 100644 --- a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java +++ b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperNodeTracker.java @@ -19,6 +19,8 @@ */ package org.apache.hadoop.hbase.zookeeper; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.Abortable; import org.apache.zookeeper.KeeperException; @@ -32,6 +34,8 @@ import org.apache.zookeeper.KeeperException; * RegionServers. */ public abstract class ZooKeeperNodeTracker extends ZooKeeperListener { + + static final Log LOG = LogFactory.getLog(ZooKeeperNodeTracker.class); /** Path of node being tracked */ protected final String node; @@ -179,4 +183,24 @@ public abstract class ZooKeeperNodeTracker extends ZooKeeperListener { nodeCreated(path); } } + + /** + * Checks if the baseznode set as per the property 'zookeeper.znode.parent' + * exists. + * @return true if baseznode exists. + * false if doesnot exists. + */ + public boolean checkIfBaseNodeAvailable() { + try { + if (ZKUtil.checkExists(watcher, watcher.baseZNode) == -1) { + return false; + } + } catch (KeeperException e) { + abortable + .abort( + "Exception while checking if basenode exists.", + e); + } + return true; + } } diff --git a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java index 30b83173a85..c00a9b1e024 100644 --- a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java +++ b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java @@ -90,6 +90,18 @@ public class ZooKeeperWatcher implements Watcher, Abortable { private final Exception constructorCaller; + + /** + * Instantiate a ZooKeeper connection and watcher. + * @param descriptor Descriptive string that is added to zookeeper sessionid + * and used as identifier for this instance. + * @throws IOException + * @throws ZooKeeperConnectionException + */ + public ZooKeeperWatcher(Configuration conf, String descriptor, + Abortable abortable) throws ZooKeeperConnectionException, IOException { + this(conf, descriptor, abortable, false); + } /** * Instantiate a ZooKeeper connection and watcher. * @param descriptor Descriptive string that is added to zookeeper sessionid @@ -98,7 +110,7 @@ public class ZooKeeperWatcher implements Watcher, Abortable { * @throws ZooKeeperConnectionException */ public ZooKeeperWatcher(Configuration conf, String descriptor, - Abortable abortable) + Abortable abortable, boolean canCreateBaseZNode) throws IOException, ZooKeeperConnectionException { this.conf = conf; // Capture a stack trace now. Will print it out later if problem so we can @@ -115,9 +127,14 @@ public class ZooKeeperWatcher implements Watcher, Abortable { this.abortable = abortable; setNodeNames(conf); this.recoverableZooKeeper = ZKUtil.connect(conf, quorum, this, descriptor); + if (canCreateBaseZNode) { + createBaseZNodes(); + } + } + + private void createBaseZNodes() throws ZooKeeperConnectionException { try { // Create all the necessary "directories" of znodes - // TODO: Move this to an init method somewhere so not everyone calls it? ZKUtil.createAndFailSilent(this, baseZNode); ZKUtil.createAndFailSilent(this, assignmentZNode); ZKUtil.createAndFailSilent(this, rsZNode); diff --git a/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java b/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java index 8e53fd82735..632f14759f4 100644 --- a/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java +++ b/src/test/java/org/apache/hadoop/hbase/catalog/TestCatalogTracker.java @@ -84,7 +84,7 @@ public class TestCatalogTracker { } }; this.watcher = new ZooKeeperWatcher(UTIL.getConfiguration(), - this.getClass().getSimpleName(), this.abortable); + this.getClass().getSimpleName(), this.abortable, true); } @After public void after() { @@ -356,4 +356,4 @@ public class TestCatalogTracker { this.ct.waitForRoot(); } } -} \ No newline at end of file +} diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java b/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java index c0ea6495378..88e50496726 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestRestartCluster.java @@ -67,7 +67,7 @@ public class TestRestartCluster { @Test (timeout=300000) public void testRestartClusterAfterKill() throws Exception { UTIL.startMiniZKCluster(); - zooKeeper = new ZooKeeperWatcher(UTIL.getConfiguration(), "cluster1", null); + zooKeeper = new ZooKeeperWatcher(UTIL.getConfiguration(), "cluster1", null, true); // create the unassigned region, throw up a region opened state for META String unassignedZNode = zooKeeper.assignmentZNode; @@ -133,4 +133,4 @@ public class TestRestartCluster { UTIL.waitTableAvailable(TABLE, 30000); } } -} \ No newline at end of file +} diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java b/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java index aa48c227834..ab129684726 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/handler/TestOpenRegionHandler.java @@ -76,7 +76,7 @@ public class TestOpenRegionHandler { final ZooKeeperWatcher zk; MockServer() throws ZooKeeperConnectionException, IOException { - this.zk = new ZooKeeperWatcher(HTU.getConfiguration(), NAME.toString(), this); + this.zk = new ZooKeeperWatcher(HTU.getConfiguration(), NAME.toString(), this, true); } @Override diff --git a/src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java b/src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java index 325fcb0085c..9fe797c5750 100644 --- a/src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java +++ b/src/test/java/org/apache/hadoop/hbase/replication/TestReplication.java @@ -104,7 +104,7 @@ public class TestReplication { utility1 = new HBaseTestingUtility(conf1); utility1.startMiniZKCluster(); MiniZooKeeperCluster miniZK = utility1.getZkCluster(); - zkw1 = new ZooKeeperWatcher(conf1, "cluster1", null); + zkw1 = new ZooKeeperWatcher(conf1, "cluster1", null, true); admin = new ReplicationAdmin(conf1); LOG.info("Setup first Zk"); @@ -116,7 +116,7 @@ public class TestReplication { utility2 = new HBaseTestingUtility(conf2); utility2.setZkCluster(miniZK); - zkw2 = new ZooKeeperWatcher(conf2, "cluster2", null); + zkw2 = new ZooKeeperWatcher(conf2, "cluster2", null, true); slaveClusterKey = conf2.get(HConstants.ZOOKEEPER_QUORUM)+":" + conf2.get("hbase.zookeeper.property.clientPort")+":/2"; diff --git a/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKTable.java b/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKTable.java index 32095eab2f5..a8bc94915cd 100644 --- a/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKTable.java +++ b/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKTable.java @@ -60,7 +60,7 @@ public class TestZKTable { } }; ZooKeeperWatcher zkw = new ZooKeeperWatcher(TEST_UTIL.getConfiguration(), - name, abortable); + name, abortable, true); ZKTable zkt = new ZKTable(zkw); assertTrue(zkt.isEnabledTable(name)); assertFalse(zkt.isDisablingTable(name));