From 225383d32105ff9893c4275543f693c21e86a852 Mon Sep 17 00:00:00 2001 From: tedyu Date: Mon, 8 Aug 2016 08:42:05 -0700 Subject: [PATCH] HBASE-16367 Race between master and region server initialization may lead to premature server abort --- .../java/org/apache/hadoop/hbase/master/HMaster.java | 3 +++ .../hadoop/hbase/regionserver/HRegionServer.java | 12 ++++++++++++ 2 files changed, 15 insertions(+) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index dcbf1c8e2fb..7dc5df21867 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -35,6 +35,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; +import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; import java.util.regex.Pattern; @@ -448,6 +449,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server { // Some unit tests don't need a cluster, so no zookeeper at all if (!conf.getBoolean("hbase.testing.nocluster", false)) { + setInitLatch(new CountDownLatch(1)); activeMasterManager = new ActiveMasterManager(zooKeeper, this.serverName, this); int infoPort = putUpJettyServer(); startActiveMasterManager(infoPort); @@ -695,6 +697,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server { // publish cluster ID status.setStatus("Publishing Cluster ID in ZooKeeper"); ZKClusterId.setClusterId(this.zooKeeper, fileSystemManager.getClusterId()); + this.initLatch.countDown(); this.serverManager = createServerManager(this, this); setupClusterConnection(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 2b8f84027fb..5643a836e8c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -44,6 +44,8 @@ import java.util.TreeSet; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ConcurrentSkipListMap; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.locks.ReentrantReadWriteLock; @@ -227,6 +229,7 @@ public class HRegionServer extends HasThread implements protected MemStoreFlusher cacheFlusher; protected HeapMemoryManager hMemManager; + protected CountDownLatch initLatch = null; /** * Cluster connection to be shared by services. @@ -640,6 +643,10 @@ public class HRegionServer extends HasThread implements choreService.scheduleChore(compactedFileDischarger); } + protected void setInitLatch(CountDownLatch latch) { + this.initLatch = latch; + } + /* * Returns true if configured hostname should be used */ @@ -784,6 +791,8 @@ public class HRegionServer extends HasThread implements * @throws IOException * @throws InterruptedException */ + @edu.umd.cs.findbugs.annotations.SuppressWarnings(value="RV_RETURN_VALUE_IGNORED_BAD_PRACTICE", + justification="cluster Id znode read would give us correct response") private void initializeZooKeeper() throws IOException, InterruptedException { // Create the master address tracker, register with zk, and start it. Then // block until a master is available. No point in starting up if no master @@ -794,6 +803,9 @@ public class HRegionServer extends HasThread implements // when ready. blockAndCheckIfStopped(this.clusterStatusTracker); + if (this.initLatch != null) { + this.initLatch.await(50, TimeUnit.SECONDS); + } // Retrieve clusterId // Since cluster status is now up // ID should have already been set by HMaster