From deacb117f63acd92bd567db92596c38dda430c90 Mon Sep 17 00:00:00 2001 From: tedyu Date: Thu, 20 Nov 2014 12:01:40 -0800 Subject: [PATCH] HBASE-12464 meta table region assignment stuck in the FAILED_OPEN state due to region server not fully ready to serve (Stephen Jiang) --- .../hbase/master/AssignmentManager.java | 60 +++++++++++++++---- 1 file changed, 48 insertions(+), 12 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 7ac3d588607..034c4f615e9 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -168,7 +168,7 @@ public class AssignmentManager extends ZooKeeperListener { /** * The sleep time for which the assignment will wait before retrying in case of hbase:meta assignment - * failure due to lack of availability of region plan + * failure due to lack of availability of region plan or bad region plan */ private final long sleepTimeBeforeRetryingMetaAssignment; @@ -1996,6 +1996,7 @@ public class AssignmentManager extends ZooKeeperListener { + ", the server is stopped/aborted"); return; } + if (plan == null) { // Get a server for the region at first try { plan = getRegionPlan(region, forceNewPlan); @@ -2003,18 +2004,23 @@ public class AssignmentManager extends ZooKeeperListener { LOG.warn("Failed to get region plan", e); } } + if (plan == null) { LOG.warn("Unable to determine a plan to assign " + region); + + // For meta region, we have to keep retrying until succeeding if (region.isMetaRegion()) { - try { - Thread.sleep(this.sleepTimeBeforeRetryingMetaAssignment); - if (i == maximumAttempts) i = 1; - continue; - } catch (InterruptedException e) { - LOG.error("Got exception while waiting for hbase:meta assignment"); - Thread.currentThread().interrupt(); + if (i == maximumAttempts) { + i = 0; // re-set attempt count to 0 for at least 1 retry + + LOG.warn("Unable to determine a plan to assign a hbase:meta region " + region + + " after maximumAttempts (" + this.maximumAttempts + + "). Reset attempts count and continue retrying."); } + waitForRetryingMetaAssignment(); + continue; } + regionStates.updateRegionState(region, State.FAILED_OPEN); return; } @@ -2148,9 +2154,19 @@ public class AssignmentManager extends ZooKeeperListener { } if (i == this.maximumAttempts) { - // Don't reset the region state or get a new plan any more. - // This is the last try. - continue; + // For meta region, we have to keep retrying until succeeding + if (region.isMetaRegion()) { + i = 0; // re-set attempt count to 0 for at least 1 retry + LOG.warn(assignMsg + + ", trying to assign a hbase:meta region reached to maximumAttempts (" + + this.maximumAttempts + "). Reset attempt counts and continue retrying."); + waitForRetryingMetaAssignment(); + } + else { + // Don't reset the region state or get a new plan any more. + // This is the last try. + continue; + } } // If region opened on destination of present plan, reassigning to new @@ -2341,6 +2357,18 @@ public class AssignmentManager extends ZooKeeperListener { return existingPlan; } + /** + * Wait for some time before retrying meta table region assignment + */ + private void waitForRetryingMetaAssignment() { + try { + Thread.sleep(this.sleepTimeBeforeRetryingMetaAssignment); + } catch (InterruptedException e) { + LOG.error("Got exception while waiting for hbase:meta assignment"); + Thread.currentThread().interrupt(); + } + } + /** * Unassigns the specified region. *

@@ -3397,12 +3425,20 @@ public class AssignmentManager extends ZooKeeperListener { // name, and failedOpenTracker is updated only in this block failedOpenTracker.put(encodedName, failedOpenCount); } - if (failedOpenCount.incrementAndGet() >= maximumAttempts) { + if (failedOpenCount.incrementAndGet() >= maximumAttempts && !hri.isMetaRegion()) { regionStates.updateRegionState(hri, State.FAILED_OPEN); // remove the tracking info to save memory, also reset // the count for next open initiative failedOpenTracker.remove(encodedName); } else { + if (hri.isMetaRegion() && failedOpenCount.get() >= maximumAttempts) { + // Log a warning message if a meta region failedOpenCount exceeds maximumAttempts + // so that we are aware of potential problem if it persists for a long time. + LOG.warn("Failed to open the hbase:meta region " + + hri.getRegionNameAsString() + " after" + + failedOpenCount.get() + " retries. Continue retrying."); + } + // Handle this the same as if it were opened and then closed. RegionState regionState = regionStates.updateRegionState(hri, State.CLOSED); if (regionState != null) {