HBASE-10833: Region assignment may fail during cluster start up
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1582110 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
28efa14cc3
commit
26eee4c501
|
@ -62,6 +62,8 @@ import org.apache.hadoop.hbase.exceptions.DeserializationException;
|
||||||
import org.apache.hadoop.hbase.executor.EventHandler;
|
import org.apache.hadoop.hbase.executor.EventHandler;
|
||||||
import org.apache.hadoop.hbase.executor.EventType;
|
import org.apache.hadoop.hbase.executor.EventType;
|
||||||
import org.apache.hadoop.hbase.executor.ExecutorService;
|
import org.apache.hadoop.hbase.executor.ExecutorService;
|
||||||
|
import org.apache.hadoop.hbase.ipc.RpcClient;
|
||||||
|
import org.apache.hadoop.hbase.ipc.RpcClient.FailedServerException;
|
||||||
import org.apache.hadoop.hbase.ipc.RpcClient.FailedServerException;
|
import org.apache.hadoop.hbase.ipc.RpcClient.FailedServerException;
|
||||||
import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
|
import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException;
|
||||||
import org.apache.hadoop.hbase.master.RegionState.State;
|
import org.apache.hadoop.hbase.master.RegionState.State;
|
||||||
|
@ -1848,12 +1850,14 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
final boolean setOfflineInZK, final boolean forceNewPlan) {
|
final boolean setOfflineInZK, final boolean forceNewPlan) {
|
||||||
long startTime = EnvironmentEdgeManager.currentTimeMillis();
|
long startTime = EnvironmentEdgeManager.currentTimeMillis();
|
||||||
try {
|
try {
|
||||||
|
Configuration conf = server.getConfiguration();
|
||||||
RegionState currentState = state;
|
RegionState currentState = state;
|
||||||
int versionOfOfflineNode = -1;
|
int versionOfOfflineNode = -1;
|
||||||
RegionPlan plan = null;
|
RegionPlan plan = null;
|
||||||
long maxWaitTime = -1;
|
long maxWaitTime = -1;
|
||||||
HRegionInfo region = state.getRegion();
|
HRegionInfo region = state.getRegion();
|
||||||
RegionOpeningState regionOpenState;
|
RegionOpeningState regionOpenState;
|
||||||
|
Throwable previousException = null;
|
||||||
for (int i = 1; i <= maximumAttempts; i++) {
|
for (int i = 1; i <= maximumAttempts; i++) {
|
||||||
if (server.isStopped() || server.isAborted()) {
|
if (server.isStopped() || server.isAborted()) {
|
||||||
LOG.info("Skip assigning " + region.getRegionNameAsString()
|
LOG.info("Skip assigning " + region.getRegionNameAsString()
|
||||||
|
@ -1952,6 +1956,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
if (t instanceof RemoteException) {
|
if (t instanceof RemoteException) {
|
||||||
t = ((RemoteException) t).unwrapRemoteException();
|
t = ((RemoteException) t).unwrapRemoteException();
|
||||||
}
|
}
|
||||||
|
previousException = t;
|
||||||
|
|
||||||
// Should we wait a little before retrying? If the server is starting it's yes.
|
// Should we wait a little before retrying? If the server is starting it's yes.
|
||||||
// If the region is already in transition, it's yes as well: we want to be sure that
|
// If the region is already in transition, it's yes as well: we want to be sure that
|
||||||
|
@ -2052,6 +2057,22 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
currentState = regionStates.updateRegionState(region, State.OFFLINE);
|
currentState = regionStates.updateRegionState(region, State.OFFLINE);
|
||||||
versionOfOfflineNode = -1;
|
versionOfOfflineNode = -1;
|
||||||
plan = newPlan;
|
plan = newPlan;
|
||||||
|
} else if(plan.getDestination().equals(newPlan.getDestination()) &&
|
||||||
|
previousException instanceof FailedServerException) {
|
||||||
|
try {
|
||||||
|
LOG.info("Trying to re-assign " + region.getRegionNameAsString() +
|
||||||
|
" to the same failed server.");
|
||||||
|
Thread.sleep(1 + conf.getInt(RpcClient.FAILED_SERVER_EXPIRY_KEY,
|
||||||
|
RpcClient.FAILED_SERVER_EXPIRY_DEFAULT));
|
||||||
|
} catch (InterruptedException ie) {
|
||||||
|
LOG.warn("Failed to assign "
|
||||||
|
+ region.getRegionNameAsString() + " since interrupted", ie);
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
if (!tomActivated) {
|
||||||
|
regionStates.updateRegionState(region, State.FAILED_OPEN);
|
||||||
|
}
|
||||||
|
return;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue