HBASE-12480 Regions in FAILED_OPEN/FAILED_CLOSE should be processed on master failover
This commit is contained in:
parent
908779b887
commit
4ff742742b
|
@ -550,8 +550,9 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
if (!regionsInTransition.isEmpty()) {
|
if (!regionsInTransition.isEmpty()) {
|
||||||
Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet();
|
Set<ServerName> onlineServers = serverManager.getOnlineServers().keySet();
|
||||||
for (RegionState regionState: regionsInTransition.values()) {
|
for (RegionState regionState: regionsInTransition.values()) {
|
||||||
|
ServerName serverName = regionState.getServerName();
|
||||||
if (!regionState.getRegion().isMetaRegion()
|
if (!regionState.getRegion().isMetaRegion()
|
||||||
&& onlineServers.contains(regionState.getServerName())) {
|
&& serverName != null && onlineServers.contains(serverName)) {
|
||||||
LOG.debug("Found " + regionState + " in RITs");
|
LOG.debug("Found " + regionState + " in RITs");
|
||||||
failover = true;
|
failover = true;
|
||||||
break;
|
break;
|
||||||
|
@ -2986,15 +2987,22 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
// the state after the RPC call. Otherwise, we don't know what's happened
|
// the state after the RPC call. Otherwise, we don't know what's happened
|
||||||
// to the region if the master dies right after the RPC call is out.
|
// to the region if the master dies right after the RPC call is out.
|
||||||
Map<String, RegionState> rits = regionStates.getRegionsInTransition();
|
Map<String, RegionState> rits = regionStates.getRegionsInTransition();
|
||||||
for (RegionState regionState: rits.values()) {
|
for (RegionState regionState : rits.values()) {
|
||||||
if (!serverManager.isServerOnline(regionState.getServerName())) {
|
|
||||||
continue; // SSH will handle it
|
|
||||||
}
|
|
||||||
State state = regionState.getState();
|
|
||||||
LOG.info("Processing " + regionState);
|
LOG.info("Processing " + regionState);
|
||||||
|
ServerName serverName = regionState.getServerName();
|
||||||
|
// Server could be null in case of FAILED_OPEN when master cannot find a region plan. In that
|
||||||
|
// case, try assigning it here.
|
||||||
|
if (serverName != null
|
||||||
|
&& !serverManager.getOnlineServers().containsKey(serverName)) {
|
||||||
|
LOG.info("Server " + serverName + " isn't online. SSH will handle this");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
HRegionInfo regionInfo = regionState.getRegion();
|
||||||
|
State state = regionState.getState();
|
||||||
|
|
||||||
switch (state) {
|
switch (state) {
|
||||||
case CLOSED:
|
case CLOSED:
|
||||||
invokeAssign(regionState.getRegion());
|
invokeAssign(regionInfo);
|
||||||
break;
|
break;
|
||||||
case PENDING_OPEN:
|
case PENDING_OPEN:
|
||||||
retrySendRegionOpen(regionState);
|
retrySendRegionOpen(regionState);
|
||||||
|
@ -3002,6 +3010,10 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
case PENDING_CLOSE:
|
case PENDING_CLOSE:
|
||||||
retrySendRegionClose(regionState);
|
retrySendRegionClose(regionState);
|
||||||
break;
|
break;
|
||||||
|
case FAILED_CLOSE:
|
||||||
|
case FAILED_OPEN:
|
||||||
|
invokeUnAssign(regionInfo);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
// No process for other states
|
// No process for other states
|
||||||
}
|
}
|
||||||
|
|
|
@ -1176,7 +1176,7 @@ public class TestMasterFailover {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Test region in pending_open/close when master failover
|
* Test region in pending_open/close and failed_open/close when master failover
|
||||||
*/
|
*/
|
||||||
@Test (timeout=180000)
|
@Test (timeout=180000)
|
||||||
@SuppressWarnings("deprecation")
|
@SuppressWarnings("deprecation")
|
||||||
|
@ -1247,6 +1247,37 @@ public class TestMasterFailover {
|
||||||
newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
|
newState = new RegionState(hriOffline, State.PENDING_OPEN, newState.getServerName());
|
||||||
stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
|
stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
|
||||||
|
|
||||||
|
HRegionInfo failedClose = new HRegionInfo(offlineTable.getTableName(), null, null);
|
||||||
|
createRegion(failedClose, rootdir, conf, offlineTable);
|
||||||
|
MetaTableAccessor.addRegionToMeta(master.getConnection(), failedClose);
|
||||||
|
|
||||||
|
oldState = new RegionState(failedClose, State.PENDING_CLOSE);
|
||||||
|
newState = new RegionState(failedClose, State.FAILED_CLOSE, newState.getServerName());
|
||||||
|
stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
|
||||||
|
|
||||||
|
|
||||||
|
HRegionInfo failedOpen = new HRegionInfo(offlineTable.getTableName(), null, null);
|
||||||
|
createRegion(failedOpen, rootdir, conf, offlineTable);
|
||||||
|
MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpen);
|
||||||
|
|
||||||
|
// Simulate a region transitioning to failed open when the region server reports the
|
||||||
|
// transition as FAILED_OPEN
|
||||||
|
oldState = new RegionState(failedOpen, State.PENDING_OPEN);
|
||||||
|
newState = new RegionState(failedOpen, State.FAILED_OPEN, newState.getServerName());
|
||||||
|
stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
|
||||||
|
|
||||||
|
HRegionInfo failedOpenNullServer = new HRegionInfo(offlineTable.getTableName(), null, null);
|
||||||
|
createRegion(failedOpenNullServer, rootdir, conf, offlineTable);
|
||||||
|
MetaTableAccessor.addRegionToMeta(master.getConnection(), failedOpenNullServer);
|
||||||
|
|
||||||
|
// Simulate a region transitioning to failed open when the master couldn't find a plan for
|
||||||
|
// the region
|
||||||
|
oldState = new RegionState(failedOpenNullServer, State.OFFLINE);
|
||||||
|
newState = new RegionState(failedOpenNullServer, State.FAILED_OPEN, null);
|
||||||
|
stateStore.updateRegionState(HConstants.NO_SEQNUM, newState, oldState);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Stop the master
|
// Stop the master
|
||||||
log("Aborting master");
|
log("Aborting master");
|
||||||
cluster.abortMaster(0);
|
cluster.abortMaster(0);
|
||||||
|
@ -1269,6 +1300,9 @@ public class TestMasterFailover {
|
||||||
// Both pending_open (RPC sent/not yet) regions should be online
|
// Both pending_open (RPC sent/not yet) regions should be online
|
||||||
assertTrue(regionStates.isRegionOnline(hriOffline));
|
assertTrue(regionStates.isRegionOnline(hriOffline));
|
||||||
assertTrue(regionStates.isRegionOnline(hriOnline));
|
assertTrue(regionStates.isRegionOnline(hriOnline));
|
||||||
|
assertTrue(regionStates.isRegionOnline(failedClose));
|
||||||
|
assertTrue(regionStates.isRegionOnline(failedOpenNullServer));
|
||||||
|
assertTrue(regionStates.isRegionOnline(failedOpen));
|
||||||
|
|
||||||
log("Done with verification, shutting down cluster");
|
log("Done with verification, shutting down cluster");
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue