HBASE-698 HLog recovery is not performed after master failure
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@792964 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
e2f5f48014
commit
269cc35ad6
|
@ -458,6 +458,7 @@ Release 0.20.0 - Unreleased
|
|||
HBASE-1637 Delete client class methods should return itself like Put, Get,
|
||||
Scan (Jon Gray via Nitay)
|
||||
HBASE-1640 Allow passing arguments to jruby script run when run by bin/hbase shell
|
||||
HBASE-698 HLog recovery is not performed after master failure
|
||||
|
||||
OPTIMIZATIONS
|
||||
HBASE-1412 Change values for delete column and column family in KeyValue
|
||||
|
|
|
@ -42,7 +42,6 @@ import org.apache.hadoop.hbase.client.Delete;
|
|||
import org.apache.hadoop.hbase.client.Result;
|
||||
import org.apache.hadoop.hbase.client.Scan;
|
||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||
import org.apache.hadoop.hbase.regionserver.HLog;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.regionserver.Store;
|
||||
import org.apache.hadoop.hbase.regionserver.StoreFile;
|
||||
|
@ -372,33 +371,6 @@ abstract class BaseScanner extends Chore implements HConstants {
|
|||
" is not valid; " + " Server '" + serverAddress + "' startCode: " +
|
||||
startCode + " unknown.");
|
||||
}
|
||||
|
||||
// Recover the region server's log if there is one.
|
||||
// This is only done from here if we are restarting and there is stale
|
||||
// data in the meta region. Once we are on-line, dead server log
|
||||
// recovery is handled by lease expiration and ProcessServerShutdown
|
||||
if (!this.master.regionManager.isInitialMetaScanComplete() &&
|
||||
serverName != null) {
|
||||
Path logDir =
|
||||
new Path(this.master.rootdir, HLog.getHLogDirectoryName(serverName));
|
||||
try {
|
||||
if (master.fs.exists(logDir)) {
|
||||
this.master.regionManager.splitLogLock.lock();
|
||||
try {
|
||||
HLog.splitLog(master.rootdir, logDir, master.fs,
|
||||
master.getConfiguration());
|
||||
} finally {
|
||||
this.master.regionManager.splitLogLock.unlock();
|
||||
}
|
||||
}
|
||||
if (LOG.isDebugEnabled()) {
|
||||
LOG.debug("Split " + logDir.toString());
|
||||
}
|
||||
} catch (IOException e) {
|
||||
LOG.warn("unable to split region server log because: ", e);
|
||||
throw e;
|
||||
}
|
||||
}
|
||||
// Now get the region assigned
|
||||
this.master.regionManager.setUnassigned(info, true);
|
||||
}
|
||||
|
|
|
@ -38,6 +38,7 @@ import java.util.concurrent.atomic.AtomicBoolean;
|
|||
|
||||
import org.apache.commons.logging.Log;
|
||||
import org.apache.commons.logging.LogFactory;
|
||||
import org.apache.hadoop.fs.FileStatus;
|
||||
import org.apache.hadoop.fs.FileSystem;
|
||||
import org.apache.hadoop.fs.Path;
|
||||
import org.apache.hadoop.hbase.ClusterStatus;
|
||||
|
@ -71,6 +72,7 @@ import org.apache.hadoop.hbase.ipc.HMasterInterface;
|
|||
import org.apache.hadoop.hbase.ipc.HMasterRegionInterface;
|
||||
import org.apache.hadoop.hbase.ipc.HRegionInterface;
|
||||
import org.apache.hadoop.hbase.master.metrics.MasterMetrics;
|
||||
import org.apache.hadoop.hbase.regionserver.HLog;
|
||||
import org.apache.hadoop.hbase.regionserver.HRegion;
|
||||
import org.apache.hadoop.hbase.util.Bytes;
|
||||
import org.apache.hadoop.hbase.util.FSUtils;
|
||||
|
@ -543,6 +545,7 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
|||
// Check if this is a fresh start of the cluster
|
||||
if(addresses.size() == 0) {
|
||||
LOG.debug("This is a fresh start, proceeding with normal startup");
|
||||
splitLogAfterStartup();
|
||||
return;
|
||||
}
|
||||
LOG.info("This is a failover, ZK inspection begins...");
|
||||
|
@ -579,11 +582,46 @@ public class HMaster extends Thread implements HConstants, HMasterInterface,
|
|||
}
|
||||
LOG.info("Inspection found " + assignedRegions.size() + " regions, " +
|
||||
(isRootRegionAssigned ? "with -ROOT-" : "but -ROOT- was MIA"));
|
||||
splitLogAfterStartup();
|
||||
} catch(IOException ex) {
|
||||
ex.printStackTrace();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Inspect the log directory to recover any log file without
|
||||
* and active region server.
|
||||
* @throws IOException
|
||||
*/
|
||||
private void splitLogAfterStartup() throws IOException {
|
||||
Path logsDirPath =
|
||||
new Path(this.rootdir, HConstants.HREGION_LOGDIR_NAME);
|
||||
FileStatus [] logFolders = this.fs.listStatus(logsDirPath);
|
||||
if (logFolders == null || logFolders.length == 0) {
|
||||
LOG.debug("No log files to split, proceeding...");
|
||||
return;
|
||||
}
|
||||
for (FileStatus status : logFolders) {
|
||||
String serverName = status.getPath().getName();
|
||||
LOG.info("Found log folder : " + serverName);
|
||||
if(this.serverManager.getServerInfo(serverName) == null) {
|
||||
LOG.info("Log folder doesn't belong " +
|
||||
"to a known region server, splitting");
|
||||
this.regionManager.splitLogLock.lock();
|
||||
Path logDir =
|
||||
new Path(this.rootdir, HLog.getHLogDirectoryName(serverName));
|
||||
try {
|
||||
HLog.splitLog(this.rootdir, logDir, this.fs,
|
||||
getConfiguration());
|
||||
} finally {
|
||||
this.regionManager.splitLogLock.unlock();
|
||||
}
|
||||
} else {
|
||||
LOG.info("Log folder belongs to an existing region server");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Start up all services. If any of these threads gets an unhandled exception
|
||||
* then they just die with a logged message. This should be fine because
|
||||
|
|
Loading…
Reference in New Issue