HBASE-24117 Shutdown AssignmentManager before ProcedureExecutor may cause SCP to accidentally skip assigning a region (#1865)
Signed-off-by: Michael Stack <stack@apache.org>
This commit is contained in:
parent
6d96694a25
commit
c5dacfbbea
|
@ -1502,6 +1502,11 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
|
||||
LOG.debug("Stopping service threads");
|
||||
|
||||
// stop procedure executor prior to other services such as server manager and assignment
|
||||
// manager, as these services are important for some running procedures. See HBASE-24117 for
|
||||
// example.
|
||||
stopProcedureExecutor();
|
||||
|
||||
if (this.quotaManager != null) {
|
||||
this.quotaManager.stop();
|
||||
}
|
||||
|
@ -1516,8 +1521,6 @@ public class HMaster extends HRegionServer implements MasterServices {
|
|||
this.assignmentManager.stop();
|
||||
}
|
||||
|
||||
stopProcedureExecutor();
|
||||
|
||||
if (masterRegion != null) {
|
||||
masterRegion.close(isAborted());
|
||||
}
|
||||
|
|
|
@ -24,6 +24,7 @@ import java.io.IOException;
|
|||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import org.apache.hadoop.hbase.DoNotRetryIOException;
|
||||
import org.apache.hadoop.hbase.ServerName;
|
||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
||||
|
@ -485,6 +486,12 @@ public class ServerCrashProcedure
|
|||
// UPDATE: HBCKServerCrashProcedure overrides isMatchingRegionLocation; this check can get
|
||||
// in the way of our clearing out 'Unknown Servers'.
|
||||
if (!isMatchingRegionLocation(regionNode)) {
|
||||
// See HBASE-24117, though we have already changed the shutdown order, it is still worth
|
||||
// double checking here to confirm that we do not skip assignment incorrectly.
|
||||
if (!am.isRunning()) {
|
||||
throw new DoNotRetryIOException(
|
||||
"AssignmentManager has been stopped, can not process assignment any more");
|
||||
}
|
||||
LOG.info("{} found {} whose regionLocation no longer matches {}, skipping assign...",
|
||||
this, regionNode, serverName);
|
||||
continue;
|
||||
|
|
|
@ -163,7 +163,7 @@ public class TestCloseRegionWhileRSCrash {
|
|||
UTIL.shutdownMiniCluster();
|
||||
}
|
||||
|
||||
@org.junit.Ignore @Test // Until root-cause of flakeyness, HBASE-24117, is addressed.
|
||||
@Test
|
||||
public void testRetryBackoff() throws IOException, InterruptedException {
|
||||
HRegionServer srcRs = UTIL.getRSForFirstRegionInTable(TABLE_NAME);
|
||||
RegionInfo region = srcRs.getRegions(TABLE_NAME).get(0).getRegionInfo();
|
||||
|
|
Loading…
Reference in New Issue