HBASE-12464 meta table region assignment stuck in the FAILED_OPEN state due to region server not fully ready to serve (Stephen Jiang)
This commit is contained in:
parent
84ed9f6ba4
commit
deacb117f6
|
@ -168,7 +168,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The sleep time for which the assignment will wait before retrying in case of hbase:meta assignment
|
* The sleep time for which the assignment will wait before retrying in case of hbase:meta assignment
|
||||||
* failure due to lack of availability of region plan
|
* failure due to lack of availability of region plan or bad region plan
|
||||||
*/
|
*/
|
||||||
private final long sleepTimeBeforeRetryingMetaAssignment;
|
private final long sleepTimeBeforeRetryingMetaAssignment;
|
||||||
|
|
||||||
|
@ -1996,6 +1996,7 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
+ ", the server is stopped/aborted");
|
+ ", the server is stopped/aborted");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (plan == null) { // Get a server for the region at first
|
if (plan == null) { // Get a server for the region at first
|
||||||
try {
|
try {
|
||||||
plan = getRegionPlan(region, forceNewPlan);
|
plan = getRegionPlan(region, forceNewPlan);
|
||||||
|
@ -2003,18 +2004,23 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
LOG.warn("Failed to get region plan", e);
|
LOG.warn("Failed to get region plan", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (plan == null) {
|
if (plan == null) {
|
||||||
LOG.warn("Unable to determine a plan to assign " + region);
|
LOG.warn("Unable to determine a plan to assign " + region);
|
||||||
|
|
||||||
|
// For meta region, we have to keep retrying until succeeding
|
||||||
if (region.isMetaRegion()) {
|
if (region.isMetaRegion()) {
|
||||||
try {
|
if (i == maximumAttempts) {
|
||||||
Thread.sleep(this.sleepTimeBeforeRetryingMetaAssignment);
|
i = 0; // re-set attempt count to 0 for at least 1 retry
|
||||||
if (i == maximumAttempts) i = 1;
|
|
||||||
|
LOG.warn("Unable to determine a plan to assign a hbase:meta region " + region +
|
||||||
|
" after maximumAttempts (" + this.maximumAttempts +
|
||||||
|
"). Reset attempts count and continue retrying.");
|
||||||
|
}
|
||||||
|
waitForRetryingMetaAssignment();
|
||||||
continue;
|
continue;
|
||||||
} catch (InterruptedException e) {
|
|
||||||
LOG.error("Got exception while waiting for hbase:meta assignment");
|
|
||||||
Thread.currentThread().interrupt();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
regionStates.updateRegionState(region, State.FAILED_OPEN);
|
regionStates.updateRegionState(region, State.FAILED_OPEN);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -2148,10 +2154,20 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i == this.maximumAttempts) {
|
if (i == this.maximumAttempts) {
|
||||||
|
// For meta region, we have to keep retrying until succeeding
|
||||||
|
if (region.isMetaRegion()) {
|
||||||
|
i = 0; // re-set attempt count to 0 for at least 1 retry
|
||||||
|
LOG.warn(assignMsg +
|
||||||
|
", trying to assign a hbase:meta region reached to maximumAttempts (" +
|
||||||
|
this.maximumAttempts + "). Reset attempt counts and continue retrying.");
|
||||||
|
waitForRetryingMetaAssignment();
|
||||||
|
}
|
||||||
|
else {
|
||||||
// Don't reset the region state or get a new plan any more.
|
// Don't reset the region state or get a new plan any more.
|
||||||
// This is the last try.
|
// This is the last try.
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// If region opened on destination of present plan, reassigning to new
|
// If region opened on destination of present plan, reassigning to new
|
||||||
// RS may cause double assignments. In case of RegionAlreadyInTransitionException
|
// RS may cause double assignments. In case of RegionAlreadyInTransitionException
|
||||||
|
@ -2341,6 +2357,18 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
return existingPlan;
|
return existingPlan;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Wait for some time before retrying meta table region assignment
|
||||||
|
*/
|
||||||
|
private void waitForRetryingMetaAssignment() {
|
||||||
|
try {
|
||||||
|
Thread.sleep(this.sleepTimeBeforeRetryingMetaAssignment);
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
LOG.error("Got exception while waiting for hbase:meta assignment");
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unassigns the specified region.
|
* Unassigns the specified region.
|
||||||
* <p>
|
* <p>
|
||||||
|
@ -3397,12 +3425,20 @@ public class AssignmentManager extends ZooKeeperListener {
|
||||||
// name, and failedOpenTracker is updated only in this block
|
// name, and failedOpenTracker is updated only in this block
|
||||||
failedOpenTracker.put(encodedName, failedOpenCount);
|
failedOpenTracker.put(encodedName, failedOpenCount);
|
||||||
}
|
}
|
||||||
if (failedOpenCount.incrementAndGet() >= maximumAttempts) {
|
if (failedOpenCount.incrementAndGet() >= maximumAttempts && !hri.isMetaRegion()) {
|
||||||
regionStates.updateRegionState(hri, State.FAILED_OPEN);
|
regionStates.updateRegionState(hri, State.FAILED_OPEN);
|
||||||
// remove the tracking info to save memory, also reset
|
// remove the tracking info to save memory, also reset
|
||||||
// the count for next open initiative
|
// the count for next open initiative
|
||||||
failedOpenTracker.remove(encodedName);
|
failedOpenTracker.remove(encodedName);
|
||||||
} else {
|
} else {
|
||||||
|
if (hri.isMetaRegion() && failedOpenCount.get() >= maximumAttempts) {
|
||||||
|
// Log a warning message if a meta region failedOpenCount exceeds maximumAttempts
|
||||||
|
// so that we are aware of potential problem if it persists for a long time.
|
||||||
|
LOG.warn("Failed to open the hbase:meta region " +
|
||||||
|
hri.getRegionNameAsString() + " after" +
|
||||||
|
failedOpenCount.get() + " retries. Continue retrying.");
|
||||||
|
}
|
||||||
|
|
||||||
// Handle this the same as if it were opened and then closed.
|
// Handle this the same as if it were opened and then closed.
|
||||||
RegionState regionState = regionStates.updateRegionState(hri, State.CLOSED);
|
RegionState regionState = regionStates.updateRegionState(hri, State.CLOSED);
|
||||||
if (regionState != null) {
|
if (regionState != null) {
|
||||||
|
|
Loading…
Reference in New Issue