HBASE-24117 Shutdown AssignmentManager before ProcedureExecutor may cause SCP to accidentally skip assigning a region (#1865)
Signed-off-by: Michael Stack <stack@apache.org>
This commit is contained in:
parent
2131456e54
commit
3d50e73871
|
@ -1530,6 +1530,11 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
|
|
||||||
LOG.debug("Stopping service threads");
|
LOG.debug("Stopping service threads");
|
||||||
|
|
||||||
|
// stop procedure executor prior to other services such as server manager and assignment
|
||||||
|
// manager, as these services are important for some running procedures. See HBASE-24117 for
|
||||||
|
// example.
|
||||||
|
stopProcedureExecutor();
|
||||||
|
|
||||||
if (this.quotaManager != null) {
|
if (this.quotaManager != null) {
|
||||||
this.quotaManager.stop();
|
this.quotaManager.stop();
|
||||||
}
|
}
|
||||||
|
@ -1544,8 +1549,6 @@ public class HMaster extends HRegionServer implements MasterServices {
|
||||||
this.assignmentManager.stop();
|
this.assignmentManager.stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
stopProcedureExecutor();
|
|
||||||
|
|
||||||
if (masterRegion != null) {
|
if (masterRegion != null) {
|
||||||
masterRegion.close(isAborted());
|
masterRegion.close(isAborted());
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,6 +24,7 @@ import java.io.IOException;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
import org.apache.hadoop.hbase.DoNotRetryIOException;
|
||||||
import org.apache.hadoop.hbase.ServerName;
|
import org.apache.hadoop.hbase.ServerName;
|
||||||
import org.apache.hadoop.hbase.client.RegionInfo;
|
import org.apache.hadoop.hbase.client.RegionInfo;
|
||||||
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
import org.apache.hadoop.hbase.client.RegionInfoBuilder;
|
||||||
|
@ -485,6 +486,12 @@ public class ServerCrashProcedure
|
||||||
// UPDATE: HBCKServerCrashProcedure overrides isMatchingRegionLocation; this check can get
|
// UPDATE: HBCKServerCrashProcedure overrides isMatchingRegionLocation; this check can get
|
||||||
// in the way of our clearing out 'Unknown Servers'.
|
// in the way of our clearing out 'Unknown Servers'.
|
||||||
if (!isMatchingRegionLocation(regionNode)) {
|
if (!isMatchingRegionLocation(regionNode)) {
|
||||||
|
// See HBASE-24117, though we have already changed the shutdown order, it is still worth
|
||||||
|
// double checking here to confirm that we do not skip assignment incorrectly.
|
||||||
|
if (!am.isRunning()) {
|
||||||
|
throw new DoNotRetryIOException(
|
||||||
|
"AssignmentManager has been stopped, can not process assignment any more");
|
||||||
|
}
|
||||||
LOG.info("{} found {} whose regionLocation no longer matches {}, skipping assign...",
|
LOG.info("{} found {} whose regionLocation no longer matches {}, skipping assign...",
|
||||||
this, regionNode, serverName);
|
this, regionNode, serverName);
|
||||||
continue;
|
continue;
|
||||||
|
|
|
@ -163,7 +163,7 @@ public class TestCloseRegionWhileRSCrash {
|
||||||
UTIL.shutdownMiniCluster();
|
UTIL.shutdownMiniCluster();
|
||||||
}
|
}
|
||||||
|
|
||||||
@org.junit.Ignore @Test // Until root-cause of flakeyness, HBASE-24117, is addressed.
|
@Test
|
||||||
public void testRetryBackoff() throws IOException, InterruptedException {
|
public void testRetryBackoff() throws IOException, InterruptedException {
|
||||||
HRegionServer srcRs = UTIL.getRSForFirstRegionInTable(TABLE_NAME);
|
HRegionServer srcRs = UTIL.getRSForFirstRegionInTable(TABLE_NAME);
|
||||||
RegionInfo region = srcRs.getRegions(TABLE_NAME).get(0).getRegionInfo();
|
RegionInfo region = srcRs.getRegions(TABLE_NAME).get(0).getRegionInfo();
|
||||||
|
|
Loading…
Reference in New Issue