HBASE-12480 Regions in FAILED_OPEN/FAILED_CLOSE should be processed on master failover
This commit is contained in:
parent
72a6a670ac
commit
4ac457a7bc
|
@ -450,8 +450,9 @@ public class AssignmentManager {
|
|||
Map<String, RegionState> regionsInTransition = regionStates.getRegionsInTransition();
|
||||
if (!regionsInTransition.isEmpty()) {
|
||||
for (RegionState regionState: regionsInTransition.values()) {
|
||||
ServerName serverName = regionState.getServerName();
|
||||
if (!regionState.getRegion().isMetaRegion()
|
||||
&& onlineServers.contains(regionState.getServerName())) {
|
||||
&& serverName != null && onlineServers.contains(serverName)) {
|
||||
LOG.debug("Found " + regionState + " in RITs");
|
||||
failover = true;
|
||||
break;
|
||||
|
@ -1694,18 +1695,23 @@ public class AssignmentManager {
|
|||
/**
|
||||
* Processes list of regions in transition at startup
|
||||
*/
|
||||
void processRegionsInTransition(Collection<RegionState> regionStates) {
|
||||
void processRegionsInTransition(Collection<RegionState> regionsInTransition) {
|
||||
// We need to send RPC call again for PENDING_OPEN/PENDING_CLOSE regions
|
||||
// in case the RPC call is not sent out yet before the master was shut down
|
||||
// since we update the state before we send the RPC call. We can't update
|
||||
// the state after the RPC call. Otherwise, we don't know what's happened
|
||||
// to the region if the master dies right after the RPC call is out.
|
||||
for (RegionState regionState: regionStates) {
|
||||
if (!serverManager.isServerOnline(regionState.getServerName())) {
|
||||
for (RegionState regionState: regionsInTransition) {
|
||||
LOG.info("Processing " + regionState);
|
||||
ServerName serverName = regionState.getServerName();
|
||||
// Server could be null in case of FAILED_OPEN when master cannot find a region plan. In that
|
||||
// case, try assigning it here.
|
||||
if (serverName != null && !serverManager.getOnlineServers().containsKey(serverName)) {
|
||||
LOG.info("Server " + serverName + " isn't online. SSH will handle this");
|
||||
continue; // SSH will handle it
|
||||
}
|
||||
HRegionInfo regionInfo = regionState.getRegion();
|
||||
RegionState.State state = regionState.getState();
|
||||
LOG.info("Processing " + regionState);
|
||||
switch (state) {
|
||||
case CLOSED:
|
||||
invokeAssign(regionState.getRegion());
|
||||
|
@ -1716,6 +1722,10 @@ public class AssignmentManager {
|
|||
case PENDING_CLOSE:
|
||||
retrySendRegionClose(regionState);
|
||||
break;
|
||||
case FAILED_CLOSE:
|
||||
case FAILED_OPEN:
|
||||
invokeUnAssign(regionInfo);
|
||||
break;
|
||||
default:
|
||||
// No process for other states
|
||||
}
|
||||
|
|
|
@ -217,7 +217,7 @@ public class TestMasterFailover {
|
|||
HMaster master = masterThreads.get(0).getMaster();
|
||||
assertTrue(master.isActiveMaster());
|
||||
assertTrue(master.isInitialized());
|
||||
|
||||
|
||||
// Create a table with a region online
|
||||
Table onlineTable = TEST_UTIL.createTable(TableName.valueOf("onlineTable"), "family");
|
||||
onlineTable.close();
|
||||
|
@ -260,7 +260,36 @@ public class TestMasterFailover {
|
|||
oldState = new RegionState(hriOffline, State.OFFLINE);
|
||||
newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
|
||||
stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
|
||||
|
||||
|
||||
HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
|
||||
createRegion(failedClose, rootdir, conf, offlineTable);
|
||||
MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
|
||||
|
||||
oldState = new RegionState(failedClose, State.PENDING_CLOSE);
|
||||
newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
|
||||
stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
|
||||
|
||||
HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
|
||||
createRegion(failedOpen, rootdir, conf, offlineTable);
|
||||
MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
|
||||
|
||||
// Simulate a region transitioning to failed open when the region server reports the
|
||||
// transition as FAILED_OPEN
|
||||
oldState = new RegionState(failedOpen, State.PENDING_OPEN);
|
||||
newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
|
||||
stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
|
||||
|
||||
HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
|
||||
LOG.info("Failed open NUll server " + failedOpenNullServer.getEncodedName());
|
||||
createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
|
||||
MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
|
||||
|
||||
// Simulate a region transitioning to failed open when the master couldn't find a plan for
|
||||
// the region
|
||||
oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
|
||||
newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
|
||||
stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
|
||||
|
||||
// Stop the master
|
||||
log("Aborting master");
|
||||
cluster.abortMaster(0);
|
||||
|
@ -283,6 +312,9 @@ public class TestMasterFailover {
|
|||
// Both pending_open (RPC sent/not yet) regions should be online
|
||||
assertTrue(regionStates.isRegionOnline(hriOffline));
|
||||
assertTrue(regionStates.isRegionOnline(hriOnline));
|
||||
assertTrue(regionStates.isRegionOnline(failedClose));
|
||||
assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
|
||||
assertTrue(regionStates.isRegionOnline(failedOpen));
|
||||
|
||||
log("Done with verification, shutting down cluster");
|
||||
|
||||
|
|
Loading…
Reference in New Issue