diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 8d27ec95e1b..0ae24c7d76c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -1530,6 +1530,11 @@ public class HMaster extends HRegionServer implements MasterServices { LOG.debug("Stopping service threads"); + // stop procedure executor prior to other services such as server manager and assignment + // manager, as these services are important for some running procedures. See HBASE-24117 for + // example. + stopProcedureExecutor(); + if (this.quotaManager != null) { this.quotaManager.stop(); } @@ -1544,8 +1549,6 @@ public class HMaster extends HRegionServer implements MasterServices { this.assignmentManager.stop(); } - stopProcedureExecutor(); - if (masterRegion != null) { masterRegion.close(isAborted()); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java index 076c2668d2e..6ca8c0cd520 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -24,6 +24,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionInfoBuilder; @@ -485,6 +486,12 @@ public class ServerCrashProcedure // UPDATE: HBCKServerCrashProcedure overrides isMatchingRegionLocation; this check can get // in the way of our clearing out 'Unknown Servers'. if (!isMatchingRegionLocation(regionNode)) { + // See HBASE-24117, though we have already changed the shutdown order, it is still worth + // double checking here to confirm that we do not skip assignment incorrectly. + if (!am.isRunning()) { + throw new DoNotRetryIOException( + "AssignmentManager has been stopped, can not process assignment any more"); + } LOG.info("{} found {} whose regionLocation no longer matches {}, skipping assign...", this, regionNode, serverName); continue; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestCloseRegionWhileRSCrash.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestCloseRegionWhileRSCrash.java index 4761991e75b..75f73e5087b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestCloseRegionWhileRSCrash.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/assignment/TestCloseRegionWhileRSCrash.java @@ -163,7 +163,7 @@ public class TestCloseRegionWhileRSCrash { UTIL.shutdownMiniCluster(); } - @org.junit.Ignore @Test // Until root-cause of flakeyness, HBASE-24117, is addressed. + @Test public void testRetryBackoff() throws IOException, InterruptedException { HRegionServer srcRs = UTIL.getRSForFirstRegionInTable(TABLE_NAME); RegionInfo region = srcRs.getRegions(TABLE_NAME).get(0).getRegionInfo();