From 64c6a071d7d2ea1dd1827b0241f30615c0e8f082 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Sat, 5 Jun 2010 05:22:33 +0000 Subject: [PATCH] HBASE-2614 killing server in TestMasterTransitions causes NPEs and test deadlock git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@951652 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + .../apache/hadoop/hbase/master/BaseScanner.java | 1 + .../org/apache/hadoop/hbase/master/HMaster.java | 3 +++ .../hadoop/hbase/master/RegionManager.java | 16 ++++------------ .../hadoop/hbase/regionserver/HRegionServer.java | 4 +++- .../apache/hadoop/hbase/util/JVMClusterUtil.java | 7 ++++++- .../hbase/master/TestMasterTransitions.java | 4 ++++ 7 files changed, 22 insertions(+), 14 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 331c690f00c..ab2232bdbc1 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -373,6 +373,7 @@ Release 0.21.0 - Unreleased HBASE-2657 TestTableResource is broken in trunk HBASE-2662 TestScannerResource.testScannerResource broke in trunk HBASE-2667 TestHLog.testSplit failing in trunk + HBASE-2614 killing server in TestMasterTransitions causes NPEs and test deadlock IMPROVEMENTS HBASE-1760 Cleanup TODOs in HTable diff --git a/src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java b/src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java index ef349afe66b..c3935accd49 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java +++ b/src/main/java/org/apache/hadoop/hbase/master/BaseScanner.java @@ -589,6 +589,7 @@ abstract class BaseScanner extends Chore implements HConstants { synchronized(scannerLock){ if (isAlive()) { super.interrupt(); + LOG.info("Interrupted"); } } } diff --git a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index bac0efa4235..5946eee5a65 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -447,6 +447,9 @@ public class HMaster extends Thread implements HConstants, HMasterInterface, if (this.serverManager.numServers() == 0) { startShutdown(); break; + } else { + LOG.debug("Waiting on " + + this.serverManager.getServersToServerInfo().keySet().toString()); } } final HServerAddress root = this.regionManager.getRootRegionLocation(); diff --git a/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java b/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java index d33d25e6a91..71597afcb47 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/RegionManager.java @@ -592,17 +592,8 @@ public class RegionManager implements HConstants { * regions can shut down. */ public void stopScanners() { - if (LOG.isDebugEnabled()) { - LOG.debug("telling root scanner to stop"); - } - rootScannerThread.interruptAndStop(); - if (LOG.isDebugEnabled()) { - LOG.debug("telling meta scanner to stop"); - } - metaScannerThread.interruptAndStop(); - if (LOG.isDebugEnabled()) { - LOG.debug("meta and root scanners notified"); - } + this.rootScannerThread.interruptAndStop(); + this.metaScannerThread.interruptAndStop(); } /** Stop the region assigner */ @@ -1152,7 +1143,8 @@ public class RegionManager implements HConstants { */ public void waitForRootRegionLocation() { synchronized (rootRegionLocation) { - while (!master.isClosed() && rootRegionLocation.get() == null) { + while (!master.getShutdownRequested().get() && + !master.isClosed() && rootRegionLocation.get() == null) { // rootRegionLocation will be filled in when we get an 'open region' // regionServerReport message from the HRegionServer that has been // allocated the ROOT region below. diff --git a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index a44663d483c..9909f2c10a7 100644 --- a/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -1121,7 +1121,9 @@ public class HRegionServer implements HConstants, HRegionInterface, public void abort() { this.abortRequested = true; this.reservedSpace.clear(); - LOG.info("Dump of metrics: " + this.metrics.toString()); + if (this.metrics != null) { + LOG.info("Dump of metrics: " + this.metrics.toString()); + } stop(); } diff --git a/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java b/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java index baabc76b356..280b91da4a9 100644 --- a/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java +++ b/src/main/java/org/apache/hadoop/hbase/util/JVMClusterUtil.java @@ -55,7 +55,12 @@ public class JVMClusterUtil { * to be used. */ public void waitForServerOnline() { - while (!regionServer.isOnline()) { + // The server is marked online after the init method completes inside of + // the HRS#run method. HRS#init can fail for whatever region. In those + // cases, we'll jump out of the run without setting online flag. Check + // stopRequested so we don't wait here a flag that will never be flipped. + while (!this.regionServer.isOnline() && + !this.regionServer.isStopRequested()) { try { Thread.sleep(1000); } catch (InterruptedException e) { diff --git a/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java b/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java index 56b2007e7ef..30333cde654 100644 --- a/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java +++ b/src/test/java/org/apache/hadoop/hbase/master/TestMasterTransitions.java @@ -74,6 +74,10 @@ public class TestMasterTransitions { */ @BeforeClass public static void beforeAllTests() throws Exception { TEST_UTIL.getConfiguration().setBoolean("dfs.support.append", true); + // Parcel out the regions, don't give them out in big lumps. We've only + // a few in this test. Let a couple of cycles pass is more realistic and + // gives stuff a chance to work. + TEST_UTIL.getConfiguration().setInt("hbase.regions.percheckin", 2); // Start a cluster of two regionservers. TEST_UTIL.startMiniCluster(2); // Create a table of three families. This will assign a region.