HBASE-10895: unassign a region fails due to the hosting region server is in FailedServerList

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1584947 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
jeffreyz 2014-04-05 00:58:55 +00:00
parent b2ef1ce01c
commit e524b5b330
2 changed files with 33 additions and 19 deletions

View File

@ -1698,10 +1698,10 @@ public class AssignmentManager extends ZooKeeperListener {
if (t instanceof RemoteException) {
t = ((RemoteException)t).unwrapRemoteException();
}
boolean logRetries = true;
if (t instanceof NotServingRegionException
|| t instanceof RegionServerStoppedException
|| t instanceof ServerNotRunningYetException
|| t instanceof FailedServerException) {
|| t instanceof ServerNotRunningYetException) {
LOG.debug("Offline " + region.getRegionNameAsString()
+ ", it's not any more on " + server, t);
if (transitionInZK) {
@ -1711,34 +1711,48 @@ public class AssignmentManager extends ZooKeeperListener {
regionOffline(region);
}
return;
} else if (state != null
&& t instanceof RegionAlreadyInTransitionException) {
} else if ((t instanceof FailedServerException) || (state != null &&
t instanceof RegionAlreadyInTransitionException)) {
long sleepTime = 0;
Configuration conf = this.server.getConfiguration();
if(t instanceof FailedServerException) {
sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
} else {
// RS is already processing this region, only need to update the timestamp
LOG.debug("update " + state + " the timestamp.");
state.updateTimestampToNow();
if (maxWaitTime < 0) {
maxWaitTime = EnvironmentEdgeManager.currentTimeMillis()
+ this.server.getConfiguration().getLong(ALREADY_IN_TRANSITION_WAITTIME,
maxWaitTime =
EnvironmentEdgeManager.currentTimeMillis()
+ conf.getLong(ALREADY_IN_TRANSITION_WAITTIME,
DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
}
try {
long now = EnvironmentEdgeManager.currentTimeMillis();
if (now < maxWaitTime) {
LOG.debug("Region is already in transition; "
+ "waiting up to " + (maxWaitTime - now) + "ms", t);
Thread.sleep(100);
sleepTime = 100;
i--; // reset the try count
logRetries = false;
}
}
try {
if (sleepTime > 0) {
Thread.sleep(sleepTime);
}
} catch (InterruptedException ie) {
LOG.warn("Failed to unassign "
+ region.getRegionNameAsString() + " since interrupted", ie);
Thread.currentThread().interrupt();
if (!tomActivated) {
if (!tomActivated && state != null) {
regionStates.updateRegionState(region, State.FAILED_CLOSE);
}
return;
}
} else {
}
if (logRetries) {
LOG.info("Server " + server + " returned " + t + " for "
+ region.getRegionNameAsString() + ", try=" + i
+ " of " + this.maximumAttempts, t);

View File

@ -141,7 +141,7 @@ public class TestAssignmentManagerOnCluster {
@Test (timeout=120000)
public void testAssignRegionOnRestartedServer() throws Exception {
String table = "testAssignRegionOnRestartedServer";
TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 40);
TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
@ -754,11 +754,11 @@ public class TestAssignmentManagerOnCluster {
// You can't assign a dead region before SSH
am.assign(hri, true, true);
RegionState state = regionStates.getRegionState(hri);
assertTrue(state.isOffline());
assertTrue(state.isFailedClose());
// You can't unassign a dead region before SSH either
am.unassign(hri, true);
assertTrue(state.isOffline());
assertTrue(state.isFailedClose());
// Enable SSH so that log can be split
master.enableSSH(true);