HBASE-12464 meta table region assignment stuck in the FAILED_OPEN state due to region server not fully ready to serve (Stephen Jiang)
This commit is contained in:
parent
84ed9f6ba4
commit
deacb117f6
|
@ -168,7 +168,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
|
||||
/**
|
||||
* The sleep time for which the assignment will wait before retrying in case of hbase:meta assignment
|
||||
* failure due to lack of availability of region plan
|
||||
* failure due to lack of availability of region plan or bad region plan
|
||||
*/
|
||||
private final long sleepTimeBeforeRetryingMetaAssignment;
|
||||
|
||||
|
@ -1996,6 +1996,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
+ ", the server is stopped/aborted");
|
||||
return;
|
||||
}
|
||||
|
||||
if (plan == null) { // Get a server for the region at first
|
||||
try {
|
||||
plan = getRegionPlan(region, forceNewPlan);
|
||||
|
@ -2003,18 +2004,23 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
LOG.warn("Failed to get region plan", e);
|
||||
}
|
||||
}
|
||||
|
||||
if (plan == null) {
|
||||
LOG.warn("Unable to determine a plan to assign " + region);
|
||||
|
||||
// For meta region, we have to keep retrying until succeeding
|
||||
if (region.isMetaRegion()) {
|
||||
try {
|
||||
Thread.sleep(this.sleepTimeBeforeRetryingMetaAssignment);
|
||||
if (i == maximumAttempts) i = 1;
|
||||
if (i == maximumAttempts) {
|
||||
i = 0; // re-set attempt count to 0 for at least 1 retry
|
||||
|
||||
LOG.warn("Unable to determine a plan to assign a hbase:meta region " + region +
|
||||
" after maximumAttempts (" + this.maximumAttempts +
|
||||
"). Reset attempts count and continue retrying.");
|
||||
}
|
||||
waitForRetryingMetaAssignment();
|
||||
continue;
|
||||
} catch (InterruptedException e) {
|
||||
LOG.error("Got exception while waiting for hbase:meta assignment");
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
}
|
||||
|
||||
regionStates.updateRegionState(region, State.FAILED_OPEN);
|
||||
return;
|
||||
}
|
||||
|
@ -2148,10 +2154,20 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
}
|
||||
|
||||
if (i == this.maximumAttempts) {
|
||||
// For meta region, we have to keep retrying until succeeding
|
||||
if (region.isMetaRegion()) {
|
||||
i = 0; // re-set attempt count to 0 for at least 1 retry
|
||||
LOG.warn(assignMsg +
|
||||
", trying to assign a hbase:meta region reached to maximumAttempts (" +
|
||||
this.maximumAttempts + "). Reset attempt counts and continue retrying.");
|
||||
waitForRetryingMetaAssignment();
|
||||
}
|
||||
else {
|
||||
// Don't reset the region state or get a new plan any more.
|
||||
// This is the last try.
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// If region opened on destination of present plan, reassigning to new
|
||||
// RS may cause double assignments. In case of RegionAlreadyInTransitionException
|
||||
|
@ -2341,6 +2357,18 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
return existingPlan;
|
||||
}
|
||||
|
||||
/**
|
||||
* Wait for some time before retrying meta table region assignment
|
||||
*/
|
||||
private void waitForRetryingMetaAssignment() {
|
||||
try {
|
||||
Thread.sleep(this.sleepTimeBeforeRetryingMetaAssignment);
|
||||
} catch (InterruptedException e) {
|
||||
LOG.error("Got exception while waiting for hbase:meta assignment");
|
||||
Thread.currentThread().interrupt();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Unassigns the specified region.
|
||||
* <p>
|
||||
|
@ -3397,12 +3425,20 @@ public class AssignmentManager extends ZooKeeperListener {
|
|||
// name, and failedOpenTracker is updated only in this block
|
||||
failedOpenTracker.put(encodedName, failedOpenCount);
|
||||
}
|
||||
if (failedOpenCount.incrementAndGet() >= maximumAttempts) {
|
||||
if (failedOpenCount.incrementAndGet() >= maximumAttempts && !hri.isMetaRegion()) {
|
||||
regionStates.updateRegionState(hri, State.FAILED_OPEN);
|
||||
// remove the tracking info to save memory, also reset
|
||||
// the count for next open initiative
|
||||
failedOpenTracker.remove(encodedName);
|
||||
} else {
|
||||
if (hri.isMetaRegion() && failedOpenCount.get() >= maximumAttempts) {
|
||||
// Log a warning message if a meta region failedOpenCount exceeds maximumAttempts
|
||||
// so that we are aware of potential problem if it persists for a long time.
|
||||
LOG.warn("Failed to open the hbase:meta region " +
|
||||
hri.getRegionNameAsString() + " after" +
|
||||
failedOpenCount.get() + " retries. Continue retrying.");
|
||||
}
|
||||
|
||||
// Handle this the same as if it were opened and then closed.
|
||||
RegionState regionState = regionStates.updateRegionState(hri, State.CLOSED);
|
||||
if (regionState != null) {
|
||||
|
|
Loading…
Reference in New Issue