diff --git a/CHANGES.txt b/CHANGES.txt index 10bd2da1d05..8013b0e4bcf 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -451,6 +451,8 @@ Release 0.92.0 - Unreleased HBASE-4553 The update of .tableinfo is not atomic; we remove then rename HBASE-4725 NPE in AM#updateTimers HBASE-4745 LRU statistics thread should be a daemon + HBASE-4749 TestMasterFailover#testMasterFailoverWithMockedRITOnDeadRS + occasionally fails TESTS HBASE-4450 test for number of blocks read: to serve as baseline for expected diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 0d0e4c56694..7abe5cb3e86 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1319,8 +1319,10 @@ implements HMasterInterface, HMasterRegionInterface, MasterServices, Server { LOG.info(why); this.stopped = true; // If we are a backup master, we need to interrupt wait - synchronized (this.activeMasterManager.clusterHasActiveMaster) { - this.activeMasterManager.clusterHasActiveMaster.notifyAll(); + if (this.activeMasterManager != null) { + synchronized (this.activeMasterManager.clusterHasActiveMaster) { + this.activeMasterManager.clusterHasActiveMaster.notifyAll(); + } } } diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java index 722c086d3e2..ac1b01c8301 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java @@ -50,6 +50,7 @@ import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.JVMClusterUtil; +import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; import org.apache.hadoop.hbase.zookeeper.ZKAssign; @@ -706,10 +707,13 @@ public class TestMasterFailover { assertEquals(2, cluster.countServedRegions()); // The first RS will stay online - HRegionServer hrs = cluster.getRegionServer(0); + List regionservers = + cluster.getRegionServerThreads(); + HRegionServer hrs = regionservers.get(0).getRegionServer(); // The second RS is going to be hard-killed - HRegionServer hrsDead = cluster.getRegionServer(1); + RegionServerThread hrsDeadThread = regionservers.get(1); + HRegionServer hrsDead = hrsDeadThread.getRegionServer(); ServerName deadServerName = hrsDead.getServerName(); // we'll need some regions to already be assigned out properly on live RS @@ -916,7 +920,11 @@ public class TestMasterFailover { hrsDead.abort("Killing for unit test"); log("RS " + deadServerName + " killed"); - // Start up a new master + // Start up a new master. Wait until regionserver is completely down + // before starting new master because of hbase-4511. + while (hrsDeadThread.isAlive()) { + Threads.sleep(10); + } log("Starting up a new master"); master = cluster.startMaster().getMaster(); log("Waiting for master to be ready");