HBASE-10895: unassign a region fails due to the hosting region server is in FailedServerList
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1584947 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
b2ef1ce01c
commit
e524b5b330
|
@ -1698,10 +1698,10 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
if (t instanceof RemoteException) {
|
if (t instanceof RemoteException) {
|
||||||
t = ((RemoteException)t).unwrapRemoteException();
|
t = ((RemoteException)t).unwrapRemoteException();
|
||||||
}
|
}
|
||||||
|
boolean logRetries = true;
|
||||||
if (t instanceof NotServingRegionException
|
if (t instanceof NotServingRegionException
|
||||||
|| t instanceof RegionServerStoppedException
|
|| t instanceof RegionServerStoppedException
|
||||||
|| t instanceof ServerNotRunningYetException
|
|| t instanceof ServerNotRunningYetException) {
|
||||||
|| t instanceof FailedServerException) {
|
|
||||||
LOG.debug("Offline " + region.getRegionNameAsString()
|
LOG.debug("Offline " + region.getRegionNameAsString()
|
||||||
+ ", it's not any more on " + server, t);
|
+ ", it's not any more on " + server, t);
|
||||||
if (transitionInZK) {
|
if (transitionInZK) {
|
||||||
|
@ -1711,34 +1711,48 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
regionOffline(region);
|
regionOffline(region);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
} else if (state != null
|
} else if ((t instanceof FailedServerException) || (state != null &&
|
||||||
&& t instanceof RegionAlreadyInTransitionException) {
|
t instanceof RegionAlreadyInTransitionException)) {
|
||||||
// RS is already processing this region, only need to update the timestamp
|
long sleepTime = 0;
|
||||||
LOG.debug("update " + state + " the timestamp.");
|
Configuration conf = this.server.getConfiguration();
|
||||||
state.updateTimestampToNow();
|
if(t instanceof FailedServerException) {
|
||||||
if (maxWaitTime < 0) {
|
sleepTime = 1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
|
||||||
maxWaitTime = EnvironmentEdgeManager.currentTimeMillis()
|
RpcClient.FAILED_SERVER_EXPIRY_DEFAULT);
|
||||||
+ this.server.getConfiguration().getLong(ALREADY_IN_TRANSITION_WAITTIME,
|
} else {
|
||||||
DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
|
// RS is already processing this region, only need to update the timestamp
|
||||||
}
|
LOG.debug("update " + state + " the timestamp.");
|
||||||
try {
|
state.updateTimestampToNow();
|
||||||
|
if (maxWaitTime < 0) {
|
||||||
|
maxWaitTime =
|
||||||
|
EnvironmentEdgeManager.currentTimeMillis()
|
||||||
|
+ conf.getLong(ALREADY_IN_TRANSITION_WAITTIME,
|
||||||
|
DEFAULT_ALREADY_IN_TRANSITION_WAITTIME);
|
||||||
|
}
|
||||||
long now = EnvironmentEdgeManager.currentTimeMillis();
|
long now = EnvironmentEdgeManager.currentTimeMillis();
|
||||||
if (now < maxWaitTime) {
|
if (now < maxWaitTime) {
|
||||||
LOG.debug("Region is already in transition; "
|
LOG.debug("Region is already in transition; "
|
||||||
+ "waiting up to " + (maxWaitTime - now) + "ms", t);
|
+ "waiting up to " + (maxWaitTime - now) + "ms", t);
|
||||||
Thread.sleep(100);
|
sleepTime = 100;
|
||||||
i--; // reset the try count
|
i--; // reset the try count
|
||||||
|
logRetries = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
if (sleepTime > 0) {
|
||||||
|
Thread.sleep(sleepTime);
|
||||||
}
|
}
|
||||||
} catch (InterruptedException ie) {
|
} catch (InterruptedException ie) {
|
||||||
LOG.warn("Failed to unassign "
|
LOG.warn("Failed to unassign "
|
||||||
+ region.getRegionNameAsString() + " since interrupted", ie);
|
+ region.getRegionNameAsString() + " since interrupted", ie);
|
||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
if (!tomActivated) {
|
if (!tomActivated && state != null) {
|
||||||
regionStates.updateRegionState(region, State.FAILED_CLOSE);
|
regionStates.updateRegionState(region, State.FAILED_CLOSE);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else {
|
}
|
||||||
|
|
||||||
|
if (logRetries) {
|
||||||
LOG.info("Server " + server + " returned " + t + " for "
|
LOG.info("Server " + server + " returned " + t + " for "
|
||||||
+ region.getRegionNameAsString() + ", try=" + i
|
+ region.getRegionNameAsString() + ", try=" + i
|
||||||
+ " of " + this.maximumAttempts, t);
|
+ " of " + this.maximumAttempts, t);
|
||||||
|
|
|
@ -141,7 +141,7 @@ public class TestAssignmentManagerOnCluster {
|
||||||
@Test (timeout=120000)
|
@Test (timeout=120000)
|
||||||
public void testAssignRegionOnRestartedServer() throws Exception {
|
public void testAssignRegionOnRestartedServer() throws Exception {
|
||||||
String table = "testAssignRegionOnRestartedServer";
|
String table = "testAssignRegionOnRestartedServer";
|
||||||
TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 40);
|
TEST_UTIL.getMiniHBaseCluster().getConf().setInt("hbase.assignment.maximum.attempts", 20);
|
||||||
TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
|
TEST_UTIL.getMiniHBaseCluster().stopMaster(0);
|
||||||
TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
|
TEST_UTIL.getMiniHBaseCluster().startMaster(); //restart the master so that conf take into affect
|
||||||
|
|
||||||
|
@ -754,11 +754,11 @@ public class TestAssignmentManagerOnCluster {
|
||||||
// You can't assign a dead region before SSH
|
// You can't assign a dead region before SSH
|
||||||
am.assign(hri, true, true);
|
am.assign(hri, true, true);
|
||||||
RegionState state = regionStates.getRegionState(hri);
|
RegionState state = regionStates.getRegionState(hri);
|
||||||
assertTrue(state.isOffline());
|
assertTrue(state.isFailedClose());
|
||||||
|
|
||||||
// You can't unassign a dead region before SSH either
|
// You can't unassign a dead region before SSH either
|
||||||
am.unassign(hri, true);
|
am.unassign(hri, true);
|
||||||
assertTrue(state.isOffline());
|
assertTrue(state.isFailedClose());
|
||||||
|
|
||||||
// Enable SSH so that log can be split
|
// Enable SSH so that log can be split
|
||||||
master.enableSSH(true);
|
master.enableSSH(true);
|
||||||
|
|
Loading…
Reference in New Issue