From cbb6aac2a7ee941bdaa87b7f9b141e875f4568f8 Mon Sep 17 00:00:00 2001 From: Michael Stack Date: Mon, 29 Aug 2011 17:51:44 +0000 Subject: [PATCH] HBASE-4124 ZK restarted while a region is being assigned, new active HM re-assigns it but the RS warns 'already online on this server' git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1162919 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 3 +++ .../hbase/master/AssignmentManager.java | 27 ++++++++++++++----- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index 211654e30c3..53f885ca864 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -496,6 +496,9 @@ Release 0.90.5 - Unreleased HBASE-4253 TestScannerTimeOut.test3686a and TestHTablePool. testReturnDifferentTable() failure because of using new HTable(tablename) (ramkrishna.s.vasudevan) + HBASE-4124 ZK restarted while a region is being assigned, new active HM + re-assigns it but the RS warns 'already online on this server' + (Gaojinchao) IMPROVEMENT HBASE-4205 Enhance HTable javadoc (Eric Charles) diff --git a/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index 29858b2c13b..90e1a3c8b75 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -369,8 +369,11 @@ public class AssignmentManager extends ZooKeeperListener { synchronized (regionsInTransition) { switch (data.getEventType()) { case RS_ZK_REGION_CLOSING: - if (isOnDeadServer(regionInfo, deadServers)) { - // If was on dead server, its closed now. Force to OFFLINE and this + // If zk node of the region was updated by a live server skip this + // region and just add it into RIT. + if (isOnDeadServer(regionInfo, deadServers) && + (data.getOrigin() == null || !serverManager.isServerOnline(data.getOrigin()))) { + // If was on dead server, its closed now. Force to OFFLINE and this // will get it reassigned if appropriate forceOffline(regionInfo, data); } else { @@ -416,10 +419,10 @@ public class AssignmentManager extends ZooKeeperListener { LOG.warn("Region in transition " + regionInfo.getEncodedName() + " references a null server; letting RIT timeout so will be " + "assigned elsewhere"); - break; - } - if (isOnDeadServer(regionInfo, deadServers)) { - // If was on a dead server, then its not open any more; needs handling. + } else if (isOnDeadServer(regionInfo, deadServers) && + !serverManager.isServerOnline(sn)) { + // If was on a dead server, then its not open any more; needs + // handling. forceOffline(regionInfo, data); } else { new OpenedRegionHandler(master, this, regionInfo, sn).process(); @@ -1957,6 +1960,18 @@ public class AssignmentManager extends ZooKeeperListener { Result result = region.getSecond(); // If region was in transition (was in zk) force it offline for reassign try { + RegionTransitionData data = ZKAssign.getData(watcher, + regionInfo.getEncodedName()); + + // If zk node of this region has been updated by a live server, + // we consider that this region is being handled. + // So we should skip it and process it in processRegionsInTransition. + if (data != null && data.getOrigin() != null && + serverManager.isServerOnline(data.getOrigin())) { + LOG.info("The region " + regionInfo.getEncodedName() + + "is being handled on " + data.getOrigin()); + continue; + } // Process with existing RS shutdown code boolean assign = ServerShutdownHandler.processDeadRegion(regionInfo, result, this,