From 1415130b41be49186b0aa4a8ae4e5855a5c35721 Mon Sep 17 00:00:00 2001 From: Jean-Daniel Cryans Date: Fri, 23 Jul 2010 22:44:21 +0000 Subject: [PATCH] HBASE-2866 Region permanently offlined git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@967290 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + .../hbase/master/ZKUnassignedWatcher.java | 1 + .../hbase/zookeeper/ZooKeeperWrapper.java | 44 ++++++++++++------- 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index a5ce08295a7..d3e7d5f9553 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -449,6 +449,7 @@ Release 0.21.0 - Unreleased HBASE-2859 Cleanup deprecated stuff in TestHLog (Alex Newman via Stack) HBASE-2858 TestReplication.queueFailover fails half the time HBASE-2863 HBASE-2553 removed an important edge case + HBASE-2866 Region permanently offlined IMPROVEMENTS HBASE-1760 Cleanup TODOs in HTable diff --git a/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java b/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java index 75eb21e421d..acd23d11458 100644 --- a/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java +++ b/src/main/java/org/apache/hadoop/hbase/master/ZKUnassignedWatcher.java @@ -169,6 +169,7 @@ public class ZKUnassignedWatcher implements Watcher { String region = zNodePath.substring( zNodePath.indexOf(rgnInTransitNode) + rgnInTransitNode.length() + 1); HBaseEventType rsEvent = HBaseEventType.fromByte(data[0]); + LOG.debug("Got event type [ " + rsEvent + " ] for region " + region); // if the node was CLOSED then handle it if(rsEvent == HBaseEventType.RS2ZK_REGION_CLOSED) { diff --git a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java index 82251780e5b..d19c881fae8 100644 --- a/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java +++ b/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWrapper.java @@ -1133,22 +1133,36 @@ public class ZooKeeperWrapper implements Watcher { return; } - if(LOG.isDebugEnabled()) { - // Check existing state for logging purposes. - Stat stat = new Stat(); - byte[] oldData = null; - try { - oldData = readZNode(znode, stat); - } catch (IOException e) { - LOG.error("Error reading data for " + znode); - } - if(oldData == null) { - LOG.debug("While updating UNASSIGNED region " + regionName + " - node exists with no data" ); - } - else { - LOG.debug("While updating UNASSIGNED region " + regionName + " exists, state = " + (HBaseEventType.fromByte(oldData[0]))); - } + Stat stat = new Stat(); + byte[] oldData = null; + try { + oldData = readZNode(znode, stat); + } catch (IOException e) { + LOG.error("Error reading data for " + znode); } + // If there is no data in the ZNode, then update it + if(oldData == null) { + LOG.debug("While updating UNASSIGNED region " + regionName + " - node exists with no data" ); + } + // If there is data in the ZNode, do not update if it is already correct + else { + HBaseEventType curState = HBaseEventType.fromByte(oldData[0]); + HBaseEventType newState = HBaseEventType.fromByte(data[0]); + // If the znode has the right state already, do not update it. Updating + // the znode again and again will bump up the zk version. This may cause + // the region server to fail. The RS expects that the znode is never + // updated by anyone else while it is opening/closing a region. + if(curState == newState) { + LOG.debug("No need to update UNASSIGNED region " + regionName + + " as it already exists in state = " + curState); + return; + } + + // If the ZNode is in another state, then update it + LOG.debug("UNASSIGNED region " + regionName + " is currently in state = " + + curState + ", updating it to " + newState); + } + // Update the ZNode synchronized(unassignedZNodesWatched) { unassignedZNodesWatched.add(znode); try {