From 929c09a9ba262d015770721f8d7e0ca41c632a95 Mon Sep 17 00:00:00 2001 From: Zhihong Yu Date: Sun, 16 Sep 2012 02:53:25 +0000 Subject: [PATCH] HBASE-6438 RegionAlreadyInTransitionException needs to give more info to avoid assignment inconsistencies (Rajesh) git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1385210 13f79535-47bb-0310-9956-ffa450edef68 --- .../hbase/master/AssignmentManager.java | 55 +++++++++++++------ 1 file changed, 38 insertions(+), 17 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index bbdd84e3da0..0a7316fee15 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -1537,12 +1537,14 @@ public class AssignmentManager extends ZooKeeperListener { private void assign(final HRegionInfo region, final RegionState state, final boolean setOfflineInZK, final boolean forceNewPlan, boolean hijack) { + boolean regionAlreadyInTransitionException = false; for (int i = 0; i < this.maximumAssignmentAttempts; i++) { int versionOfOfflineNode = -1; if (setOfflineInZK) { // get the version of the znode after setting it to OFFLINE. // versionOfOfflineNode will be -1 if the znode was not set to OFFLINE - versionOfOfflineNode = setOfflineInZooKeeper(state, hijack); + versionOfOfflineNode = setOfflineInZooKeeper(state, hijack, + regionAlreadyInTransitionException); if (versionOfOfflineNode != -1) { if (isDisabledorDisablingRegionInRIT(region)) { return; @@ -1595,23 +1597,35 @@ public class AssignmentManager extends ZooKeeperListener { if (t instanceof RemoteException) { t = ((RemoteException) t).unwrapRemoteException(); if (t instanceof RegionAlreadyInTransitionException) { - String errorMsg = "Failed assignment in: " + plan.getDestination() - + " due to " + t.getMessage(); - LOG.error(errorMsg, t); - return; + regionAlreadyInTransitionException = true; + if (LOG.isDebugEnabled()) { + LOG.debug("Failed assignment in: " + plan.getDestination() + " due to " + + t.getMessage()); + } } } - LOG.warn("Failed assignment of " + - state.getRegion().getRegionNameAsString() + " to " + - plan.getDestination() + ", trying to assign elsewhere instead; " + - "retry=" + i, t); + LOG.warn("Failed assignment of " + + state.getRegion().getRegionNameAsString() + + " to " + + plan.getDestination() + + ", trying to assign " + + (regionAlreadyInTransitionException ? "to the same region server" + + " because of RegionAlreadyInTransitionException;" : "elsewhere instead; ") + + "retry=" + i, t); // Clean out plan we failed execute and one that doesn't look like it'll // succeed anyways; we need a new plan! // Transition back to OFFLINE regionStates.updateRegionState( state.getRegion(), RegionState.State.OFFLINE); - // Force a new plan and reassign. Will return null if no servers. - if (getRegionPlan(state, plan.getDestination(), true) == null) { + // If region opened on destination of present plan, reassigning to new + // RS may cause double assignments. In case of RegionAlreadyInTransitionException + // reassigning to same RS. + RegionPlan newPlan = plan; + if (!regionAlreadyInTransitionException) { + // Force a new plan and reassign. Will return null if no servers. + newPlan = getRegionPlan(state, plan.getDestination(), true); + } + if (newPlan == null) { this.timeoutMonitor.setAllRegionServersOffline(true); LOG.warn("Unable to find a viable location to assign region " + state.getRegion().getRegionNameAsString()); @@ -1662,17 +1676,24 @@ public class AssignmentManager extends ZooKeeperListener { * @param state * @param hijack * - true if needs to be hijacked and reassigned, false otherwise. + * @param regionAlreadyInTransitionException + * - true if we need to retry assignment because of RegionAlreadyInTransitionException. * @return the version of the offline node if setting of the OFFLINE node was * successful, -1 otherwise. */ - int setOfflineInZooKeeper(final RegionState state, - boolean hijack) { + int setOfflineInZooKeeper(final RegionState state, boolean hijack, + boolean regionAlreadyInTransitionException) { // In case of reassignment the current state in memory need not be - // OFFLINE. + // OFFLINE. if (!hijack && !state.isClosed() && !state.isOffline()) { - String msg = "Unexpected state : " + state + " .. Cannot transit it to OFFLINE."; - this.master.abort(msg, new IllegalStateException(msg)); - return -1; + if (!regionAlreadyInTransitionException ) { + String msg = "Unexpected state : " + state + " .. Cannot transit it to OFFLINE."; + this.master.abort(msg, new IllegalStateException(msg)); + return -1; + } else { + LOG.debug("Unexpected state : " + state + + " but retrying to assign because RegionAlreadyInTransitionException."); + } } boolean allowZNodeCreation = false; // Under reassignment if the current state is PENDING_OPEN