HBASE-10895: unassign a region fails due to the hosting region server is in FailedServerList

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1584947 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
jeffreyz 2014-04-05 00:58:55 +00:00
parent b2ef1ce01c
commit e524b5b330
2 changed files with 33 additions and 19 deletions

View File

@ -1698,10 +1698,10 @@ public class AssignmentManager extends ZooKeeperListener {
if (t instanceof RemoteException) { if (t instanceof RemoteException) {
t = ((RemoteException)t).unwrapRemoteException(); t = ((RemoteException)t).unwrapRemoteException();
} }
boolean logRetries = true;
if (t instanceof NotServingRegionException if (t instanceof NotServingRegionException
|| t instanceof RegionServerStoppedException || t instanceof RegionServerStoppedException
|| t instanceof ServerNotRunningYetException || t instanceof ServerNotRunningYetException) {
|| t instanceof FailedServerException) {
LOG.debug("Offline " + region.getRegionNameAsString() LOG.debug("Offline " + region.getRegionNameAsString()
+ ", it's not any more on " + server, t); + ", it's not any more on " + server, t);
if (transitionInZK) { if (transitionInZK) {
@ -1711,34 +1711,48 @@ public class AssignmentManager extends ZooKeeperListener {
regionOffline(region); regionOffline(region);
} }
return; return;
} else if (state != null } else if ((t instanceof FailedServerException) || (state != null &&
&& t instanceof RegionAlreadyInTransitionException) { t instanceof RegionAlreadyInTransitionException)) {
// RS is already processing this region, only need to update the timestamp long sleepTime = 0;
LOG.debug("update " + state + " the timestamp."); Configuration conf = this.server.getConfiguration();
state.updateTimestampToNow(); if(t instanceof FailedServerException) {
if (maxWaitTime < 0) { sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
maxWaitTime = EnvironmentEdgeManager.currentTimeMillis() RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
+ this.server.getConfiguration().getLong(ALREADY_IN_TRANSITION_WAITTIME, } else {
DEFAULT_ALREADY_IN_TRANSITION_WAITTIME); // RS is already processing this region, only need to update the timestamp
} LOG.debug("update " + state + " the timestamp.");
try { state.updateTimestampToNow();
if (maxWaitTime < 0) {
maxWaitTime =
EnvironmentEdgeManager.currentTimeMillis()
+ conf.getLong(ALREADY_IN_TRANSITION_WAITTIME,
DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
}
long now = EnvironmentEdgeManager.currentTimeMillis(); long now = EnvironmentEdgeManager.currentTimeMillis();
if (now < maxWaitTime) { if (now < maxWaitTime) {
LOG.debug("Region is already in transition; " LOG.debug("Region is already in transition; "
+ "waiting up to " + (maxWaitTime - now) + "ms", t); + "waiting up to " + (maxWaitTime - now) + "ms", t);
Thread.sleep(100); sleepTime = 100;
i--; // reset the try count i--; // reset the try count
logRetries = false;
}
}
try {
if (sleepTime > 0) {
Thread.sleep(sleepTime);
} }
} catch (InterruptedException ie) { } catch (InterruptedException ie) {
LOG.warn("Failed to unassign " LOG.warn("Failed to unassign "
+ region.getRegionNameAsString() + " since interrupted", ie); + region.getRegionNameAsString() + " since interrupted", ie);
Thread.currentThread().interrupt(); Thread.currentThread().interrupt();
if (!tomActivated) { if (!tomActivated && state != null) {
regionStates.updateRegionState(region, State.FAILED_CLOSE); regionStates.updateRegionState(region, State.FAILED_CLOSE);
} }
return; return;
} }
} else { }
if (logRetries) {
LOG.info("Server " + server + " returned " + t + " for " LOG.info("Server " + server + " returned " + t + " for "
+ region.getRegionNameAsString() + ", try=" + i + region.getRegionNameAsString() + ", try=" + i
+ " of " + this.maximumAttempts, t); + " of " + this.maximumAttempts, t);

View File

@ -141,7 +141,7 @@ public class TestAssignmentManagerOnCluster {
@Test (timeout=120000) @Test (timeout=120000)
public void testAssignRegionOnRestartedServer() throws Exception { public void testAssignRegionOnRestartedServer() throws Exception {
String table = "testAssignRegionOnRestartedServer"; String table = "testAssignRegionOnRestartedServer";
TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 40); TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
TEST_UTIL.getMiniHBaseCluster().stopMaster(0); TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
@ -754,11 +754,11 @@ public class TestAssignmentManagerOnCluster {
// You can't assign a dead region before SSH // You can't assign a dead region before SSH
am.assign(hri, true, true); am.assign(hri, true, true);
RegionState state = regionStates.getRegionState(hri); RegionState state = regionStates.getRegionState(hri);
assertTrue(state.isOffline()); assertTrue(state.isFailedClose());
// You can't unassign a dead region before SSH either // You can't unassign a dead region before SSH either
am.unassign(hri, true); am.unassign(hri, true);
assertTrue(state.isOffline()); assertTrue(state.isFailedClose());
// Enable SSH so that log can be split // Enable SSH so that log can be split
master.enableSSH(true); master.enableSSH(true);