From 269cc35ad6f3a5320b562f0f1a21c27cab590e70 Mon Sep 17 00:00:00 2001 From: Jean-Daniel Cryans Date: Fri, 10 Jul 2009 14:58:26 +0000 Subject: [PATCH] HBASE-698 HLog recovery is not performed after master failure git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@792964 13f79535-47bb-0310-9956-ffa450edef68 --- CHANGES.txt | 1 + .../hadoop/hbase/master/BaseScanner.java | 28 -------------- .../apache/hadoop/hbase/master/HMaster.java | 38 +++++++++++++++++++ 3 files changed, 39 insertions(+), 28 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index df311526af9..afea9b71c3a 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -458,6 +458,7 @@ Release 0.20.0 - Unreleased HBASE-1637 Delete client class methods should return itself like Put, Get, Scan (Jon Gray via Nitay) HBASE-1640 Allow passing arguments to jruby script run when run by bin/hbase shell + HBASE-698 HLog recovery is not performed after master failure OPTIMIZATIONS HBASE-1412 Change values for delete column and column family in KeyValue diff --git a/src/java/org/apache/hadoop/hbase/master/BaseScanner.java b/src/java/org/apache/hadoop/hbase/master/BaseScanner.java index 849683004f2..c11f701bf26 100644 --- a/src/java/org/apache/hadoop/hbase/master/BaseScanner.java +++ b/src/java/org/apache/hadoop/hbase/master/BaseScanner.java @@ -42,7 +42,6 @@ import org.apache.hadoop.hbase.client.Delete; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.ipc.HRegionInterface; -import org.apache.hadoop.hbase.regionserver.HLog; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.Store; import org.apache.hadoop.hbase.regionserver.StoreFile; @@ -372,33 +371,6 @@ abstract class BaseScanner extends Chore implements HConstants { " is not valid; " + " Server '" + serverAddress + "' startCode: " + startCode + " unknown."); } - - // Recover the region server's log if there is one. - // This is only done from here if we are restarting and there is stale - // data in the meta region. Once we are on-line, dead server log - // recovery is handled by lease expiration and ProcessServerShutdown - if (!this.master.regionManager.isInitialMetaScanComplete() && - serverName != null) { - Path logDir = - new Path(this.master.rootdir, HLog.getHLogDirectoryName(serverName)); - try { - if (master.fs.exists(logDir)) { - this.master.regionManager.splitLogLock.lock(); - try { - HLog.splitLog(master.rootdir, logDir, master.fs, - master.getConfiguration()); - } finally { - this.master.regionManager.splitLogLock.unlock(); - } - } - if (LOG.isDebugEnabled()) { - LOG.debug("Split " + logDir.toString()); - } - } catch (IOException e) { - LOG.warn("unable to split region server log because: ", e); - throw e; - } - } // Now get the region assigned this.master.regionManager.setUnassigned(info, true); } diff --git a/src/java/org/apache/hadoop/hbase/master/HMaster.java b/src/java/org/apache/hadoop/hbase/master/HMaster.java index 5b39b4ac53c..884002cee2e 100644 --- a/src/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/src/java/org/apache/hadoop/hbase/master/HMaster.java @@ -38,6 +38,7 @@ import java.util.concurrent.atomic.AtomicBoolean; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.ClusterStatus; @@ -71,6 +72,7 @@ import org.apache.hadoop.hbase.ipc.HMasterInterface; import org.apache.hadoop.hbase.ipc.HMasterRegionInterface; import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.master.metrics.MasterMetrics; +import org.apache.hadoop.hbase.regionserver.HLog; import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSUtils; @@ -543,6 +545,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface, // Check if this is a fresh start of the cluster if(addresses.size() == 0) { LOG.debug("This is a fresh start, proceeding with normal startup"); + splitLogAfterStartup(); return; } LOG.info("This is a failover, ZK inspection begins..."); @@ -579,10 +582,45 @@ public class HMaster extends Thread implements HConstants, HMasterInterface, } LOG.info("Inspection found " + assignedRegions.size() + " regions, " + (isRootRegionAssigned ? "with -ROOT-" : "but -ROOT- was MIA")); + splitLogAfterStartup(); } catch(IOException ex) { ex.printStackTrace(); } } + + /** + * Inspect the log directory to recover any log file without + * and active region server. + * @throws IOException + */ + private void splitLogAfterStartup() throws IOException { + Path logsDirPath = + new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME); + FileStatus [] logFolders = this.fs.listStatus(logsDirPath); + if (logFolders == null || logFolders.length == 0) { + LOG.debug("No log files to split, proceeding..."); + return; + } + for (FileStatus status : logFolders) { + String serverName = status.getPath().getName(); + LOG.info("Found log folder : " + serverName); + if(this.serverManager.getServerInfo(serverName) == null) { + LOG.info("Log folder doesn't belong " + + "to a known region server, splitting"); + this.regionManager.splitLogLock.lock(); + Path logDir = + new Path(this.rootdir, HLog.getHLogDirectoryName(serverName)); + try { + HLog.splitLog(this.rootdir, logDir, this.fs, + getConfiguration()); + } finally { + this.regionManager.splitLogLock.unlock(); + } + } else { + LOG.info("Log folder belongs to an existing region server"); + } + } + } /* * Start up all services. If any of these threads gets an unhandled exception