HBASE-698 HLog recovery is not performed after master failure

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@792964 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jean-Daniel Cryans 2009-07-10 14:58:26 +00:00
parent e2f5f48014
commit 269cc35ad6
3 changed files with 39 additions and 28 deletions

View File

@ -458,6 +458,7 @@ Release 0.20.0 - Unreleased
HBASE-1637 Delete client class methods should return itself like Put, Get, HBASE-1637 Delete client class methods should return itself like Put, Get,
Scan (Jon Gray via Nitay) Scan (Jon Gray via Nitay)
HBASE-1640 Allow passing arguments to jruby script run when run by bin/hbase shell HBASE-1640 Allow passing arguments to jruby script run when run by bin/hbase shell
HBASE-698 HLog recovery is not performed after master failure
OPTIMIZATIONS OPTIMIZATIONS
HBASE-1412 Change values for delete column and column family in KeyValue HBASE-1412 Change values for delete column and column family in KeyValue

View File

@ -42,7 +42,6 @@ import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.regionserver.HLog;
import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.regionserver.Store; import org.apache.hadoop.hbase.regionserver.Store;
import org.apache.hadoop.hbase.regionserver.StoreFile; import org.apache.hadoop.hbase.regionserver.StoreFile;
@ -372,33 +371,6 @@ abstract class BaseScanner extends Chore implements HConstants {
" is not valid; " + " Server '" + serverAddress + "' startCode: " + " is not valid; " + " Server '" + serverAddress + "' startCode: " +
startCode + " unknown."); startCode + " unknown.");
} }
// Recover the region server's log if there is one.
// This is only done from here if we are restarting and there is stale
// data in the meta region. Once we are on-line, dead server log
// recovery is handled by lease expiration and ProcessServerShutdown
if (!this.master.regionManager.isInitialMetaScanComplete() &&
serverName != null) {
Path logDir =
new Path(this.master.rootdir, HLog.getHLogDirectoryName(serverName));
try {
if (master.fs.exists(logDir)) {
this.master.regionManager.splitLogLock.lock();
try {
HLog.splitLog(master.rootdir, logDir, master.fs,
master.getConfiguration());
} finally {
this.master.regionManager.splitLogLock.unlock();
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Split " + logDir.toString());
}
} catch (IOException e) {
LOG.warn("unable to split region server log because: ", e);
throw e;
}
}
// Now get the region assigned // Now get the region assigned
this.master.regionManager.setUnassigned(info, true); this.master.regionManager.setUnassigned(info, true);
} }

View File

@ -38,6 +38,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ClusterStatus; import org.apache.hadoop.hbase.ClusterStatus;
@ -71,6 +72,7 @@ import org.apache.hadoop.hbase.ipc.HMasterInterface;
import org.apache.hadoop.hbase.ipc.HMasterRegionInterface; import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
import org.apache.hadoop.hbase.ipc.HRegionInterface; import org.apache.hadoop.hbase.ipc.HRegionInterface;
import org.apache.hadoop.hbase.master.metrics.MasterMetrics; import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
import org.apache.hadoop.hbase.regionserver.HLog;
import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.FSUtils;
@ -543,6 +545,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
// Check if this is a fresh start of the cluster // Check if this is a fresh start of the cluster
if(addresses.size() == 0) { if(addresses.size() == 0) {
LOG.debug("This is a fresh start, proceeding with normal startup"); LOG.debug("This is a fresh start, proceeding with normal startup");
splitLogAfterStartup();
return; return;
} }
LOG.info("This is a failover, ZK inspection begins..."); LOG.info("This is a failover, ZK inspection begins...");
@ -579,11 +582,46 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
} }
LOG.info("Inspection found " + assignedRegions.size() + " regions, " + LOG.info("Inspection found " + assignedRegions.size() + " regions, " +
(isRootRegionAssigned ? "with -ROOT-" : "but -ROOT- was MIA")); (isRootRegionAssigned ? "with -ROOT-" : "but -ROOT- was MIA"));
splitLogAfterStartup();
} catch(IOException ex) { } catch(IOException ex) {
ex.printStackTrace(); ex.printStackTrace();
} }
} }
/**
* Inspect the log directory to recover any log file without
* and active region server.
* @throws IOException
*/
private void splitLogAfterStartup() throws IOException {
Path logsDirPath =
new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME);
FileStatus [] logFolders = this.fs.listStatus(logsDirPath);
if (logFolders == null || logFolders.length == 0) {
LOG.debug("No log files to split, proceeding...");
return;
}
for (FileStatus status : logFolders) {
String serverName = status.getPath().getName();
LOG.info("Found log folder : " + serverName);
if(this.serverManager.getServerInfo(serverName) == null) {
LOG.info("Log folder doesn't belong " +
"to a known region server, splitting");
this.regionManager.splitLogLock.lock();
Path logDir =
new Path(this.rootdir, HLog.getHLogDirectoryName(serverName));
try {
HLog.splitLog(this.rootdir, logDir, this.fs,
getConfiguration());
} finally {
this.regionManager.splitLogLock.unlock();
}
} else {
LOG.info("Log folder belongs to an existing region server");
}
}
}
/* /*
* Start up all services. If any of these threads gets an unhandled exception * Start up all services. If any of these threads gets an unhandled exception
* then they just die with a logged message. This should be fine because * then they just die with a logged message. This should be fine because