hbase-8666: META region isn't fully recovered during master initialization when META region recovery had chained failures

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1489606 13f79535-47bb-0310-9956-ffa450edef68
2013-06-04 21:01:05 +00:00 · 2013-06-04 21:01:05 +00:00 · ccb9fd364d
parent aefb339ce4
commit ccb9fd364d
3 changed files with 42 additions and 5 deletions
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
@ -795,6 +795,7 @@ MasterServices, Server {
      // Note: we can't remove oldMetaServerLocation from previousFailedServers list because it
      // may also host user regions
    }
+    Set<ServerName> previouslyFailedMetaRSs = getPreviouselyFailedMetaServersFromZK();

    this.initializationBeforeMetaAssignment = true;
    // Make sure meta assigned before proceeding.
@ -804,11 +805,19 @@ MasterServices, Server {
    // assigned when master is shutting down
    if(this.stopped) return;

-    if (this.distributedLogReplay && oldMetaServerLocation != null
-        && previouslyFailedServers.contains(oldMetaServerLocation)) {
+    if (this.distributedLogReplay && (!previouslyFailedMetaRSs.isEmpty())) {
      // replay WAL edits mode need new .META. RS is assigned firstly
      status.setStatus("replaying log for Meta Region");
-      this.fileSystemManager.splitMetaLog(oldMetaServerLocation);
+      // need to use union of previouslyFailedMetaRSs recorded in ZK and previouslyFailedServers
+      // instead of oldMetaServerLocation to address the following two situations:
+      // 1) the chained failure situation(recovery failed multiple times in a row).
+      // 2) master get killed right before it could delete the recovering META from ZK while the
+      // same server still has non-meta wals to be replayed so that
+      // removeStaleRecoveringRegionsFromZK can't delete the stale META region
+      // Passing more servers into splitMetaLog is all right. If a server doesn't have .META. wal,
+      // there is no op for the server.
+      previouslyFailedMetaRSs.addAll(previouslyFailedServers);
+      this.fileSystemManager.splitMetaLog(previouslyFailedMetaRSs);
    }

    enableServerShutdownHandler();
@ -992,6 +1001,25 @@ MasterServices, Server {
    return true;
  }

+  /**
+   * This function returns a set of region server names under .META. recovering region ZK node
+   * @return Set of meta server names which were recorded in ZK
+   * @throws KeeperException
+   */
+  private Set<ServerName> getPreviouselyFailedMetaServersFromZK() throws KeeperException {
+    Set<ServerName> result = new HashSet<ServerName>();
+    String metaRecoveringZNode = ZKUtil.joinZNode(zooKeeper.recoveringRegionsZNode,
+      HRegionInfo.FIRST_META_REGIONINFO.getEncodedName());
+    List<String> regionFailedServers = ZKUtil.listChildrenNoWatch(zooKeeper, metaRecoveringZNode);
+    if (regionFailedServers == null) return result;
+
+    for(String failedServer : regionFailedServers) {
+      ServerName server = ServerName.parseServerName(failedServer);
+      result.add(server);
+    }
+    return result;
+  }
+
  @Override
  public TableDescriptors getTableDescriptors() {
    return this.tableDescriptors;
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java
@ -294,9 +294,18 @@ public class MasterFileSystem {
   * @throws IOException
   */
  public void splitMetaLog(final ServerName serverName) throws IOException {
-    long splitTime = 0, splitLogSize = 0;
    Set<ServerName> serverNames = new HashSet<ServerName>();
    serverNames.add(serverName);
+    splitMetaLog(serverNames);
+  }
+
+  /**
+   * Specialized method to handle the splitting for meta HLog
+   * @param serverNames
+   * @throws IOException
+   */
+  public void splitMetaLog(final Set<ServerName> serverNames) throws IOException {
+    long splitTime = 0, splitLogSize = 0;
    List<Path> logDirs = getLogDirs(serverNames);

    splitLogManager.handleDeadWorkers(serverNames);
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java
@ -124,7 +124,7 @@ public class SplitLogManager extends ZooKeeperListener {
  private long unassignedTimeout;
  private long lastNodeCreateTime = Long.MAX_VALUE;
  public boolean ignoreZKDeleteForTesting = false;
-  private volatile long lastRecoveringNodeCreationTime = Long.MAX_VALUE;
+  private volatile long lastRecoveringNodeCreationTime = 0;
  // When lastRecoveringNodeCreationTime is older than the following threshold, we'll check
  // whether to GC stale recovering znodes
  private long checkRecoveringTimeThreshold = 15000; // 15 seconds