From 4742456403e6ee99071c634600fdd957ae39cd76 Mon Sep 17 00:00:00 2001 From: nkeywal Date: Fri, 7 Feb 2014 20:27:42 +0000 Subject: [PATCH] HBASE-10472 Manage the interruption in ZKUtil#getData git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1565787 13f79535-47bb-0310-9956-ffa450edef68 --- .../hbase/client/ZooKeeperRegistry.java | 3 ++ .../replication/ReplicationPeersZKImpl.java | 9 ++++ .../replication/ReplicationQueuesZKImpl.java | 10 +++++ .../hbase/zookeeper/MasterAddressTracker.java | 8 +++- .../hbase/zookeeper/MetaRegionTracker.java | 3 ++ .../hadoop/hbase/zookeeper/ZKClusterId.java | 8 +++- .../hbase/zookeeper/ZKLeaderManager.java | 4 ++ .../hadoop/hbase/zookeeper/ZKTable.java | 22 ++++++---- .../hbase/zookeeper/ZKTableReadOnly.java | 12 +++--- .../apache/hadoop/hbase/zookeeper/ZKUtil.java | 42 +++++++++++++------ .../hbase/master/AssignmentManager.java | 7 +++- .../apache/hadoop/hbase/master/HMaster.java | 23 ++++++---- .../hadoop/hbase/master/MasterFileSystem.java | 2 +- .../hadoop/hbase/master/SplitLogManager.java | 25 ++++++++--- .../hbase/master/balancer/BalancerChore.java | 4 +- .../master/balancer/ClusterStatusChore.java | 12 +++++- .../procedure/ZKProcedureCoordinatorRpcs.java | 4 ++ .../procedure/ZKProcedureMemberRpcs.java | 7 ++++ .../hbase/regionserver/HRegionServer.java | 7 +++- .../apache/hadoop/hbase/util/HBaseFsck.java | 3 ++ .../hadoop/hbase/util/ZKDataMigrator.java | 10 +++-- .../hbase/zookeeper/RegionServerTracker.java | 3 ++ .../lock/ZKInterProcessLockBase.java | 4 ++ .../hbase/master/TestRegionPlacement.java | 2 +- .../TestSplitTransactionOnCluster.java | 3 +- .../hadoop/hbase/zookeeper/TestZKTable.java | 2 +- 26 files changed, 185 insertions(+), 54 deletions(-) diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZooKeeperRegistry.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZooKeeperRegistry.java index 3d765c6278d..fa1a8d8cfdf 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZooKeeperRegistry.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ZooKeeperRegistry.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hbase.client; import java.io.IOException; +import java.io.InterruptedIOException; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -105,6 +106,8 @@ class ZooKeeperRegistry implements Registry { return ZKTableReadOnly.isDisabledTable(zkw, tableName); } catch (KeeperException e) { throw new IOException("Enable/Disable failed", e); + } catch (InterruptedException e) { + throw new InterruptedIOException(); } finally { zkw.close(); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationPeersZKImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationPeersZKImpl.java index 51f52393d65..a6d8112b281 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationPeersZKImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationPeersZKImpl.java @@ -159,6 +159,8 @@ public class ReplicationPeersZKImpl extends ReplicationStateZKBase implements Re throw new ReplicationException(e); } catch (DeserializationException e) { throw new ReplicationException(e); + } catch (InterruptedException e) { + throw new ReplicationException(e); } } @@ -212,6 +214,8 @@ public class ReplicationPeersZKImpl extends ReplicationStateZKBase implements Re } } catch (KeeperException e) { this.abortable.abort("Cannot get the list of peers ", e); + } catch (InterruptedException e) { + this.abortable.abort("Cannot get the list of peers ", e); } return peers; } @@ -268,6 +272,11 @@ public class ReplicationPeersZKImpl extends ReplicationStateZKBase implements Re } catch (KeeperException e) { throw new ReplicationException("Error getting configuration for peer with id=" + peerId, e); + } catch (InterruptedException e) { + LOG.warn("Could not get configuration for peer because the thread " + + "was interrupted. peerId=" + peerId); + Thread.currentThread().interrupt(); + return null; } if (data == null) { LOG.error("Could not get configuration for peer because it doesn't exist. peerId=" + peerId); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueuesZKImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueuesZKImpl.java index 9563744b01d..f5f437e32c3 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueuesZKImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueuesZKImpl.java @@ -141,6 +141,9 @@ public class ReplicationQueuesZKImpl extends ReplicationStateZKBase implements R } catch (KeeperException e) { throw new ReplicationException("Internal Error: could not get position in log for queueId=" + queueId + ", filename=" + filename, e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return 0; } try { return ZKUtil.parseHLogPositionFrom(bytes); @@ -338,6 +341,10 @@ public class ReplicationQueuesZKImpl extends ReplicationStateZKBase implements R // Multi call failed; it looks like some other regionserver took away the logs. LOG.warn("Got exception in copyQueuesFromRSUsingMulti: ", e); queues.clear(); + } catch (InterruptedException e) { + LOG.warn("Got exception in copyQueuesFromRSUsingMulti: ", e); + queues.clear(); + Thread.currentThread().interrupt(); } return queues; } @@ -403,6 +410,9 @@ public class ReplicationQueuesZKImpl extends ReplicationStateZKBase implements R } } catch (KeeperException e) { this.abortable.abort("Copy queues from rs", e); + } catch (InterruptedException e) { + LOG.warn(e); + Thread.currentThread().interrupt(); } return queues; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterAddressTracker.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterAddressTracker.java index 20409448af7..a177f5050af 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterAddressTracker.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MasterAddressTracker.java @@ -29,6 +29,7 @@ import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.data.Stat; import java.io.IOException; +import java.io.InterruptedIOException; /** * Manages the location of the current active Master for the RegionServer. @@ -103,7 +104,12 @@ public class MasterAddressTracker extends ZooKeeperNodeTracker { */ public static ServerName getMasterAddress(final ZooKeeperWatcher zkw) throws KeeperException, IOException { - byte [] data = ZKUtil.getData(zkw, zkw.getMasterAddressZNode()); + byte [] data; + try { + data = ZKUtil.getData(zkw, zkw.getMasterAddressZNode()); + } catch (InterruptedException e) { + throw new InterruptedIOException(); + } if (data == null){ throw new IOException("Can't get master address from ZooKeeper; znode data == null"); } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaRegionTracker.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaRegionTracker.java index b38eecc42ae..a3a7d0285e9 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaRegionTracker.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaRegionTracker.java @@ -81,6 +81,9 @@ public class MetaRegionTracker extends ZooKeeperNodeTracker { return ServerName.parseFrom(ZKUtil.getData(zkw, zkw.metaServerZNode)); } catch (DeserializationException e) { throw ZKUtil.convert(e); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return null; } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKClusterId.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKClusterId.java index 96dc20a4c1f..e0fb7cd23f4 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKClusterId.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKClusterId.java @@ -63,7 +63,13 @@ public class ZKClusterId { public static String readClusterIdZNode(ZooKeeperWatcher watcher) throws KeeperException { if (ZKUtil.checkExists(watcher, watcher.clusterIdZNode) != -1) { - byte [] data = ZKUtil.getData(watcher, watcher.clusterIdZNode); + byte [] data; + try { + data = ZKUtil.getData(watcher, watcher.clusterIdZNode); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + return null; + } if (data != null) { try { return ClusterId.parseFrom(data).toString(); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKLeaderManager.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKLeaderManager.java index 3fd133bbc97..e1eefb12df8 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKLeaderManager.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKLeaderManager.java @@ -170,6 +170,10 @@ public class ZKLeaderManager extends ZooKeeperListener { watcher.abort("Unhandled zookeeper exception removing leader node", ke); candidate.stop("Unhandled zookeeper exception removing leader node: " + ke.getMessage()); + } catch (InterruptedException e) { + watcher.abort("Unhandled zookeeper exception removing leader node", e); + candidate.stop("Unhandled zookeeper exception removing leader node: " + + e.getMessage()); } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java index 5761a7d2b70..36e6b897c7b 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java @@ -26,6 +26,7 @@ import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; import org.apache.zookeeper.KeeperException; +import java.io.InterruptedIOException; import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -63,7 +64,7 @@ public class ZKTable { // TODO: Make it so always a table znode. Put table schema here as well as table state. // Have watcher on table znode so all are notified of state or schema change. - public ZKTable(final ZooKeeperWatcher zkw) throws KeeperException { + public ZKTable(final ZooKeeperWatcher zkw) throws KeeperException, InterruptedException { super(); this.watcher = zkw; populateTableStates(); @@ -74,7 +75,7 @@ public class ZKTable { * @throws KeeperException */ private void populateTableStates() - throws KeeperException { + throws KeeperException, InterruptedException { synchronized (this.cache) { List children = ZKUtil.listChildrenNoWatch(this.watcher, this.watcher.tableZNode); if (children == null) return; @@ -316,7 +317,7 @@ public class ZKTable { * @throws KeeperException */ public static Set getDisabledTables(ZooKeeperWatcher zkw) - throws KeeperException { + throws KeeperException, InterruptedIOException { return getAllTables(zkw, ZooKeeperProtos.Table.State.DISABLED); } @@ -326,7 +327,7 @@ public class ZKTable { * @throws KeeperException */ public static Set getDisablingTables(ZooKeeperWatcher zkw) - throws KeeperException { + throws KeeperException, InterruptedIOException { return getAllTables(zkw, ZooKeeperProtos.Table.State.DISABLING); } @@ -336,7 +337,7 @@ public class ZKTable { * @throws KeeperException */ public static Set getEnablingTables(ZooKeeperWatcher zkw) - throws KeeperException { + throws KeeperException, InterruptedIOException { return getAllTables(zkw, ZooKeeperProtos.Table.State.ENABLING); } @@ -346,7 +347,7 @@ public class ZKTable { * @throws KeeperException */ public static Set getDisabledOrDisablingTables(ZooKeeperWatcher zkw) - throws KeeperException { + throws KeeperException, InterruptedIOException { return getAllTables(zkw, ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING); } @@ -380,14 +381,19 @@ public class ZKTable { * @throws KeeperException */ static Set getAllTables(final ZooKeeperWatcher zkw, - final ZooKeeperProtos.Table.State... states) throws KeeperException { + final ZooKeeperProtos.Table.State... states) throws KeeperException, InterruptedIOException { Set allTables = new HashSet(); List children = ZKUtil.listChildrenNoWatch(zkw, zkw.tableZNode); if(children == null) return allTables; for (String child: children) { TableName tableName = TableName.valueOf(child); - ZooKeeperProtos.Table.State state = ZKTableReadOnly.getTableState(zkw, tableName); + ZooKeeperProtos.Table.State state = null; + try { + state = ZKTableReadOnly.getTableState(zkw, tableName); + } catch (InterruptedException e) { + throw new InterruptedIOException(); + } for (ZooKeeperProtos.Table.State expectedState: states) { if (state == expectedState) { allTables.add(tableName); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTableReadOnly.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTableReadOnly.java index 25372e0c0be..236aa7f389d 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTableReadOnly.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTableReadOnly.java @@ -55,7 +55,7 @@ public class ZKTableReadOnly { */ public static boolean isDisabledTable(final ZooKeeperWatcher zkw, final TableName tableName) - throws KeeperException { + throws KeeperException, InterruptedException { ZooKeeperProtos.Table.State state = getTableState(zkw, tableName); return isTableState(ZooKeeperProtos.Table.State.DISABLED, state); } @@ -71,7 +71,7 @@ public class ZKTableReadOnly { */ public static boolean isEnabledTable(final ZooKeeperWatcher zkw, final TableName tableName) - throws KeeperException { + throws KeeperException, InterruptedException { return getTableState(zkw, tableName) == ZooKeeperProtos.Table.State.ENABLED; } @@ -87,7 +87,7 @@ public class ZKTableReadOnly { */ public static boolean isDisablingOrDisabledTable(final ZooKeeperWatcher zkw, final TableName tableName) - throws KeeperException { + throws KeeperException, InterruptedException { ZooKeeperProtos.Table.State state = getTableState(zkw, tableName); return isTableState(ZooKeeperProtos.Table.State.DISABLING, state) || isTableState(ZooKeeperProtos.Table.State.DISABLED, state); @@ -99,7 +99,7 @@ public class ZKTableReadOnly { * @throws KeeperException */ public static Set getDisabledTables(ZooKeeperWatcher zkw) - throws KeeperException { + throws KeeperException, InterruptedException { Set disabledTables = new HashSet(); List children = ZKUtil.listChildrenNoWatch(zkw, zkw.tableZNode); @@ -118,7 +118,7 @@ public class ZKTableReadOnly { * @throws KeeperException */ public static Set getDisabledOrDisablingTables(ZooKeeperWatcher zkw) - throws KeeperException { + throws KeeperException, InterruptedException { Set disabledTables = new HashSet(); List children = ZKUtil.listChildrenNoWatch(zkw, zkw.tableZNode); @@ -146,7 +146,7 @@ public class ZKTableReadOnly { */ static ZooKeeperProtos.Table.State getTableState(final ZooKeeperWatcher zkw, final TableName tableName) - throws KeeperException { + throws KeeperException, InterruptedException { String znode = ZKUtil.joinZNode(zkw.tableZNode, tableName.getNameAsString()); byte [] data = ZKUtil.getData(zkw, znode); if (data == null || data.length <= 0) return null; diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java index bd4a6b2aab8..78cf048cae3 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKUtil.java @@ -48,6 +48,7 @@ import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.RegionStoreSequenceIds; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.ExceptionUtil; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.hbase.zookeeper.ZKUtil.ZKUtilOp.CreateAndFailSilent; import org.apache.hadoop.hbase.zookeeper.ZKUtil.ZKUtilOp.DeleteNodeFailSilent; @@ -678,23 +679,19 @@ public class ZKUtil { * error. */ public static byte [] getData(ZooKeeperWatcher zkw, String znode) - throws KeeperException { + throws KeeperException, InterruptedException { try { byte [] data = zkw.getRecoverableZooKeeper().getData(znode, null, null); logRetrievedMsg(zkw, znode, data, false); return data; } catch (KeeperException.NoNodeException e) { LOG.debug(zkw.prefix("Unable to get data of znode " + znode + " " + - "because node does not exist (not an error)")); + "because node does not exist (not an error)")); return null; } catch (KeeperException e) { LOG.warn(zkw.prefix("Unable to get data of znode " + znode), e); zkw.keeperException(e); return null; - } catch (InterruptedException e) { - LOG.warn(zkw.prefix("Unable to get data of znode " + znode), e); - zkw.interruptedException(e); - return null; } } @@ -1661,13 +1658,22 @@ public class ZKUtil { do { String znodeToProcess = stack.remove(stack.size() - 1); sb.append("\n").append(znodeToProcess).append(": "); - byte[] data = ZKUtil.getData(zkw, znodeToProcess); + byte[] data; + try { + data = ZKUtil.getData(zkw, znodeToProcess); + } catch (InterruptedException e) { + zkw.interruptedException(e); + return; + } if (data != null && data.length > 0) { // log position long position = 0; try { position = ZKUtil.parseHLogPositionFrom(ZKUtil.getData(zkw, znodeToProcess)); sb.append(position); - } catch (Exception e) { + } catch (DeserializationException ignored) { + } catch (InterruptedException e) { + zkw.interruptedException(e); + return; } } for (String zNodeChild : ZKUtil.listChildrenNoWatch(zkw, znodeToProcess)) { @@ -1682,7 +1688,13 @@ public class ZKUtil { sb.append("\n").append(peersZnode).append(": "); for (String peerIdZnode : ZKUtil.listChildrenNoWatch(zkw, peersZnode)) { String znodeToProcess = ZKUtil.joinZNode(peersZnode, peerIdZnode); - byte[] data = ZKUtil.getData(zkw, znodeToProcess); + byte[] data; + try { + data = ZKUtil.getData(zkw, znodeToProcess); + } catch (InterruptedException e) { + zkw.interruptedException(e); + return; + } // parse the data of the above peer znode. try { String clusterKey = ZooKeeperProtos.ReplicationPeer.newBuilder(). @@ -1705,9 +1717,15 @@ public class ZKUtil { if (!child.equals(peerState)) continue; String peerStateZnode = ZKUtil.joinZNode(znodeToProcess, child); sb.append("\n").append(peerStateZnode).append(": "); - byte[] peerStateData = ZKUtil.getData(zkw, peerStateZnode); - sb.append(ZooKeeperProtos.ReplicationState.newBuilder() - .mergeFrom(peerStateData, pblen, peerStateData.length - pblen).getState().name()); + byte[] peerStateData; + try { + peerStateData = ZKUtil.getData(zkw, peerStateZnode); + sb.append(ZooKeeperProtos.ReplicationState.newBuilder() + .mergeFrom(peerStateData, pblen, peerStateData.length - pblen).getState().name()); + } catch (InterruptedException e) { + zkw.interruptedException(e); + return; + } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index dc2f93a1fd3..a576942b77f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master; import java.io.IOException; +import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -282,7 +283,11 @@ public class AssignmentManager extends ZooKeeperListener { this.timeoutMonitor = null; this.timerUpdater = null; } - this.zkTable = new ZKTable(this.watcher); + try { + this.zkTable = new ZKTable(this.watcher); + } catch (InterruptedException e) { + throw new InterruptedIOException(); + } // This is the max attempts, not retries, so it should be at least 1. this.maximumAttempts = Math.max(1, this.server.getConfiguration().getInt("hbase.assignment.maximum.attempts", 10)); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 9a01587126a..2754e7b868a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.master; import java.io.IOException; +import java.io.InterruptedIOException; import java.lang.reflect.Constructor; import java.lang.reflect.InvocationTargetException; import java.net.InetAddress; @@ -1426,7 +1427,7 @@ MasterServices, Server { return balancerCutoffTime; } - public boolean balance() throws HBaseIOException { + public boolean balance() throws IOException { // if master not initialized, don't run balancer. if (!this.initialized) { LOG.debug("Master has not been initialized, don't run balancer."); @@ -1513,7 +1514,7 @@ MasterServices, Server { public BalanceResponse balance(RpcController c, BalanceRequest request) throws ServiceException { try { return BalanceResponse.newBuilder().setBalancerRan(balance()).build(); - } catch (HBaseIOException ex) { + } catch (IOException ex) { throw new ServiceException(ex); } } @@ -2078,14 +2079,18 @@ MasterServices, Server { GetClusterStatusRequest req) throws ServiceException { GetClusterStatusResponse.Builder response = GetClusterStatusResponse.newBuilder(); - response.setClusterStatus(getClusterStatus().convert()); + try { + response.setClusterStatus(getClusterStatus().convert()); + } catch (InterruptedIOException e) { + throw new ServiceException(e); + } return response.build(); } /** * @return cluster status */ - public ClusterStatus getClusterStatus() { + public ClusterStatus getClusterStatus() throws InterruptedIOException { // Build Set of backup masters from ZK nodes List backupMasterStrings; try { @@ -2099,9 +2104,13 @@ MasterServices, Server { backupMasterStrings.size()); for (String s: backupMasterStrings) { try { - byte [] bytes = - ZKUtil.getData(this.zooKeeper, ZKUtil.joinZNode( - this.zooKeeper.backupMasterAddressesZNode, s)); + byte [] bytes; + try { + bytes = ZKUtil.getData(this.zooKeeper, ZKUtil.joinZNode( + this.zooKeeper.backupMasterAddressesZNode, s)); + } catch (InterruptedException e) { + throw new InterruptedIOException(); + } if (bytes != null) { ServerName sn; try { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java index 687fc7561d7..26da01e1cb2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java @@ -390,7 +390,7 @@ public class MasterFileSystem { * @throws KeeperException */ void removeStaleRecoveringRegionsFromZK(final Set failedServers) - throws KeeperException { + throws KeeperException, InterruptedIOException { this.splitLogManager.removeStaleRecoveringRegionsFromZK(failedServers); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java index a78374814a5..0fa366ebdd6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java @@ -25,6 +25,7 @@ import static org.apache.hadoop.hbase.master.SplitLogManager.TerminationStatus.I import static org.apache.hadoop.hbase.master.SplitLogManager.TerminationStatus.SUCCESS; import java.io.IOException; +import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; @@ -569,7 +570,7 @@ public class SplitLogManager extends ZooKeeperListener { * @throws KeeperException */ void removeStaleRecoveringRegionsFromZK(final Set failedServers) - throws KeeperException { + throws KeeperException, InterruptedIOException { if (!this.distributedLogReplay) { // remove any regions in recovery from ZK which could happen when we turn the feature on @@ -591,7 +592,12 @@ public class SplitLogManager extends ZooKeeperListener { List tasks = ZKUtil.listChildrenNoWatch(watcher, watcher.splitLogZNode); if (tasks != null) { for (String t : tasks) { - byte[] data = ZKUtil.getData(this.watcher, ZKUtil.joinZNode(watcher.splitLogZNode, t)); + byte[] data; + try { + data = ZKUtil.getData(this.watcher, ZKUtil.joinZNode(watcher.splitLogZNode, t)); + } catch (InterruptedException e) { + throw new InterruptedIOException(); + } if (data != null) { SplitLogTask slt = null; try { @@ -1115,7 +1121,7 @@ public class SplitLogManager extends ZooKeeperListener { * @param userRegions user regiones assigned on the region server */ void markRegionsRecoveringInZK(final ServerName serverName, Set userRegions) - throws KeeperException { + throws KeeperException, InterruptedIOException { if (userRegions == null || !this.distributedLogReplay) { return; } @@ -1172,9 +1178,11 @@ public class SplitLogManager extends ZooKeeperListener { // wait a little bit for retry try { Thread.sleep(20); - } catch (Exception ignoreE) { - // ignore + } catch (InterruptedException e1) { + throw new InterruptedIOException(); } + } catch (InterruptedException e) { + throw new InterruptedIOException(); } } while ((--retries) > 0 && (!this.stopper.isStopped())); } @@ -1240,7 +1248,12 @@ public class SplitLogManager extends ZooKeeperListener { String nodePath = ZKUtil.joinZNode(zkw.recoveringRegionsZNode, encodedRegionName); nodePath = ZKUtil.joinZNode(nodePath, serverName); try { - byte[] data = ZKUtil.getData(zkw, nodePath); + byte[] data; + try { + data = ZKUtil.getData(zkw, nodePath); + } catch (InterruptedException e) { + throw new InterruptedIOException(); + } if (data != null) { result = ZKUtil.parseRegionStoreSequenceIds(data); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerChore.java index 41064bd2c7f..2b02c40c488 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/BalancerChore.java @@ -25,6 +25,8 @@ import org.apache.hadoop.hbase.Chore; import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.master.HMaster; +import java.io.IOException; + /** * Chore that will call HMaster.balance{@link org.apache.hadoop.hbase.master.HMaster#balance()} when * needed. @@ -46,7 +48,7 @@ public class BalancerChore extends Chore { protected void chore() { try { master.balance(); - } catch (HBaseIOException e) { + } catch (IOException e) { LOG.error("Failed to balance.", e); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterStatusChore.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterStatusChore.java index 23f7843a270..9bc7cc0cb46 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterStatusChore.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/balancer/ClusterStatusChore.java @@ -18,17 +18,21 @@ package org.apache.hadoop.hbase.master.balancer; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.Chore; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.LoadBalancer; +import java.io.InterruptedIOException; + /** * Chore that will feed the balancer the cluster status. */ @InterfaceAudience.Private public class ClusterStatusChore extends Chore { - + private static final Log LOG = LogFactory.getLog(ClusterStatusChore.class); private final HMaster master; private final LoadBalancer balancer; @@ -42,6 +46,10 @@ public class ClusterStatusChore extends Chore { @Override protected void chore() { - balancer.setClusterStatus(master.getClusterStatus()); + try { + balancer.setClusterStatus(master.getClusterStatus()); + } catch (InterruptedIOException e) { + LOG.warn("Ignoring interruption", e); + } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java index 8a0cff099fd..6fb9496e3b6 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureCoordinatorRpcs.java @@ -250,6 +250,10 @@ public class ZKProcedureCoordinatorRpcs implements ProcedureCoordinatorRpcs { } catch (KeeperException e) { coordinator.rpcConnectionFailure("Failed to get data for abort node:" + abortNode + zkProc.getAbortZnode(), new IOException(e)); + } catch (InterruptedException e) { + coordinator.rpcConnectionFailure("Failed to get data for abort node:" + abortNode + + zkProc.getAbortZnode(), new IOException(e)); + Thread.currentThread().interrupt(); } coordinator.abortProcedure(procName, ee); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java index 937baaca4eb..4c1623c28fb 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/procedure/ZKProcedureMemberRpcs.java @@ -221,6 +221,10 @@ public class ZKProcedureMemberRpcs implements ProcedureMemberRpcs { } catch (KeeperException e) { member.controllerConnectionFailure("Failed to get data for new procedure:" + opName, new IOException(e)); + } catch (InterruptedException e) { + member.controllerConnectionFailure("Failed to get data for new procedure:" + opName, + new IOException(e)); + Thread.currentThread().interrupt(); } } @@ -330,6 +334,9 @@ public class ZKProcedureMemberRpcs implements ProcedureMemberRpcs { } catch (KeeperException e) { member.controllerConnectionFailure("Failed to get data for abort znode:" + abortZNode + zkController.getAbortZnode(), new IOException(e)); + } catch (InterruptedException e) { + LOG.warn("abort already in progress", e); + Thread.currentThread().interrupt(); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 039ced75460..06c33b5413e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -4513,7 +4513,12 @@ public class HRegionServer implements ClientProtos.ClientService.BlockingInterfa String nodePath = ZKUtil.joinZNode(this.zooKeeper.recoveringRegionsZNode, region.getEncodedName()); // recovering-region level - byte[] data = ZKUtil.getData(zkw, nodePath); + byte[] data; + try { + data = ZKUtil.getData(zkw, nodePath); + } catch (InterruptedException e) { + throw new InterruptedIOException(); + } if (data != null) { lastRecordedFlushedSequenceId = SplitLogManager.parseLastFlushedSequenceIdFrom(data); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java index 8ccd2d5dd68..ba2bf4e7e05 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/HBaseFsck.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.util; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.InterruptedIOException; import java.io.PrintWriter; import java.io.StringWriter; import java.net.URI; @@ -1384,6 +1385,8 @@ public class HBaseFsck extends Configured { } } catch (KeeperException ke) { throw new IOException(ke); + } catch (InterruptedException e) { + throw new InterruptedIOException(); } finally { zkw.close(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ZKDataMigrator.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ZKDataMigrator.java index 25ad1be3cd0..8b813a7d2bd 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ZKDataMigrator.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/ZKDataMigrator.java @@ -131,7 +131,8 @@ public class ZKDataMigrator extends Configured implements Tool { return 0; } - private void checkAndMigrateTableStatesToPB(ZooKeeperWatcher zkw) throws KeeperException { + private void checkAndMigrateTableStatesToPB(ZooKeeperWatcher zkw) throws KeeperException, + InterruptedException { List tables = ZKUtil.listChildrenNoWatch(zkw, zkw.tableZNode); if (tables == null) { LOG.info("No table present to migrate table state to PB. returning.."); @@ -154,7 +155,8 @@ public class ZKDataMigrator extends Configured implements Tool { } } - private void checkAndMigrateReplicationNodesToPB(ZooKeeperWatcher zkw) throws KeeperException { + private void checkAndMigrateReplicationNodesToPB(ZooKeeperWatcher zkw) throws KeeperException, + InterruptedException { String replicationZnodeName = getConf().get("zookeeper.znode.replication", "replication"); String replicationPath = ZKUtil.joinZNode(zkw.baseZNode, replicationZnodeName); List replicationZnodes = ZKUtil.listChildrenNoWatch(zkw, replicationPath); @@ -185,7 +187,7 @@ public class ZKDataMigrator extends Configured implements Tool { } private void checkAndMigrateQueuesToPB(ZooKeeperWatcher zkw, String znode, String rs) - throws KeeperException, NoNodeException { + throws KeeperException, NoNodeException, InterruptedException { String rsPath = ZKUtil.joinZNode(znode, rs); List peers = ZKUtil.listChildrenNoWatch(zkw, rsPath); if (peers == null || peers.isEmpty()) return; @@ -207,7 +209,7 @@ public class ZKDataMigrator extends Configured implements Tool { } private void checkAndMigratePeerZnodesToPB(ZooKeeperWatcher zkw, String znode, - List peers) throws KeeperException, NoNodeException { + List peers) throws KeeperException, NoNodeException, InterruptedException { for (String peer : peers) { String peerZnode = ZKUtil.joinZNode(znode, peer); byte[] data = ZKUtil.getData(zkw, peerZnode); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java index 5d61ecb8607..a83297fc44c 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/RegionServerTracker.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.zookeeper; import java.io.IOException; +import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.List; import java.util.NavigableMap; @@ -98,6 +99,8 @@ public class RegionServerTracker extends ZooKeeperListener { LOG.warn("Get Rs info port from ephemeral node", e); } catch (IOException e) { LOG.warn("Illegal data from ephemeral node", e); + } catch (InterruptedException e) { + throw new InterruptedIOException(); } this.regionServers.put(sn, rsInfoBuilder.build()); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/lock/ZKInterProcessLockBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/lock/ZKInterProcessLockBase.java index d691bd645bd..4e6d4b1e2f3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/lock/ZKInterProcessLockBase.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/zookeeper/lock/ZKInterProcessLockBase.java @@ -362,6 +362,10 @@ public abstract class ZKInterProcessLockBase implements InterProcessLock { } catch (KeeperException ex) { LOG.warn("Error processing lock metadata in " + lockZNode); return false; + } catch (InterruptedException e) { + LOG.warn("InterruptedException processing lock metadata in " + lockZNode); + Thread.currentThread().interrupt(); + return false; } return true; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java index 9d93bcb0739..dbc85ff175b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRegionPlacement.java @@ -476,7 +476,7 @@ public class TestRegionPlacement { * Verify the number of region movement is expected */ private void verifyRegionMovementNum(int expected) - throws InterruptedException, HBaseIOException { + throws InterruptedException, IOException { MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); HMaster m = cluster.getMaster(); int lastRegionOpenedCount = m.assignmentManager.getNumRegionsOpened(); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java index e013f49cb7f..d7717a555e3 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestSplitTransactionOnCluster.java @@ -27,6 +27,7 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; +import java.io.InterruptedIOException; import java.util.List; import java.util.Map; import java.util.concurrent.CountDownLatch; @@ -1255,7 +1256,7 @@ public class TestSplitTransactionOnCluster { } } - private void waitUntilRegionServerDead() throws InterruptedException { + private void waitUntilRegionServerDead() throws InterruptedException, InterruptedIOException { // Wait until the master processes the RS shutdown for (int i=0; cluster.getMaster().getClusterStatus(). getServers().size() == NB_SERVERS && i<100; i++) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKTable.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKTable.java index a65e03e92dc..a51a8d05c42 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKTable.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/zookeeper/TestZKTable.java @@ -50,7 +50,7 @@ public class TestZKTable { @Test public void testTableStates() - throws ZooKeeperConnectionException, IOException, KeeperException { + throws ZooKeeperConnectionException, IOException, KeeperException, InterruptedException { final TableName name = TableName.valueOf("testDisabled"); Abortable abortable = new Abortable() {