HADOOP-1527 Region server won't start because logdir exists
git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@568700 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
ec2d29c902
commit
ccd9248e63
|
@ -10,6 +10,7 @@ Trunk (unreleased changes)
|
||||||
OPTIMIZATIONS
|
OPTIMIZATIONS
|
||||||
|
|
||||||
BUG FIXES
|
BUG FIXES
|
||||||
|
HADOOP-1527 Region server won't start because logdir exists
|
||||||
HADOOP-1723 If master asks region server to shut down, by-pass return of
|
HADOOP-1723 If master asks region server to shut down, by-pass return of
|
||||||
shutdown message
|
shutdown message
|
||||||
HADOOP-1729 Recent renaming or META tables breaks hbase shell
|
HADOOP-1729 Recent renaming or META tables breaks hbase shell
|
||||||
|
|
|
@ -83,7 +83,7 @@ public class HLog implements HConstants {
|
||||||
long filenum = 0;
|
long filenum = 0;
|
||||||
AtomicInteger numEntries = new AtomicInteger(0);
|
AtomicInteger numEntries = new AtomicInteger(0);
|
||||||
|
|
||||||
Integer rollLock = new Integer(0);
|
Integer rollLock = Integer.valueOf(0);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Split up a bunch of log files, that are no longer being written to,
|
* Split up a bunch of log files, that are no longer being written to,
|
||||||
|
@ -439,35 +439,61 @@ public class HLog implements HConstants {
|
||||||
notifyAll();
|
notifyAll();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void usage() {
|
||||||
|
System.err.println("Usage: java org.apache.hbase.HLog" +
|
||||||
|
" {--dump <logfile>... | --split <logdir>...}");
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Pass a log file and it will dump out a text version on
|
* Pass one or more log file names and it will either dump out a text version
|
||||||
* <code>stdout</code>.
|
* on <code>stdout</code> or split the specified log files.
|
||||||
* @param args
|
* @param args
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
*/
|
*/
|
||||||
public static void main(String[] args) throws IOException {
|
public static void main(String[] args) throws IOException {
|
||||||
if (args.length < 1) {
|
if (args.length < 2) {
|
||||||
System.err.println("Usage: java org.apache.hbase.HLog <logfile>");
|
usage();
|
||||||
System.exit(-1);
|
System.exit(-1);
|
||||||
}
|
}
|
||||||
|
boolean dump = true;
|
||||||
|
if (args[0].compareTo("--dump") != 0) {
|
||||||
|
if (args[0].compareTo("--split") == 0) {
|
||||||
|
dump = false;
|
||||||
|
|
||||||
|
} else {
|
||||||
|
usage();
|
||||||
|
System.exit(-1);
|
||||||
|
}
|
||||||
|
}
|
||||||
Configuration conf = new HBaseConfiguration();
|
Configuration conf = new HBaseConfiguration();
|
||||||
FileSystem fs = FileSystem.get(conf);
|
FileSystem fs = FileSystem.get(conf);
|
||||||
Path logfile = new Path(args[0]);
|
Path baseDir = new Path(conf.get(HBASE_DIR, DEFAULT_HBASE_DIR));
|
||||||
if (!fs.exists(logfile)) {
|
|
||||||
throw new FileNotFoundException(args[0] + " does not exist");
|
for (int i = 1; i < args.length; i++) {
|
||||||
}
|
Path logPath = new Path(args[i]);
|
||||||
if (!fs.isFile(logfile)) {
|
if (!fs.exists(logPath)) {
|
||||||
throw new IOException(args[0] + " is not a file");
|
throw new FileNotFoundException(args[i] + " does not exist");
|
||||||
}
|
}
|
||||||
Reader log = new SequenceFile.Reader(fs, logfile, conf);
|
if (dump) {
|
||||||
try {
|
if (!fs.isFile(logPath)) {
|
||||||
HLogKey key = new HLogKey();
|
throw new IOException(args[i] + " is not a file");
|
||||||
HLogEdit val = new HLogEdit();
|
}
|
||||||
while(log.next(key, val)) {
|
Reader log = new SequenceFile.Reader(fs, logPath, conf);
|
||||||
System.out.println(key.toString() + " " + val.toString());
|
try {
|
||||||
|
HLogKey key = new HLogKey();
|
||||||
|
HLogEdit val = new HLogEdit();
|
||||||
|
while(log.next(key, val)) {
|
||||||
|
System.out.println(key.toString() + " " + val.toString());
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
log.close();
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if (!fs.getFileStatus(logPath).isDir()) {
|
||||||
|
throw new IOException(args[i] + " is not a directory");
|
||||||
|
}
|
||||||
|
splitLog(baseDir, logPath, fs, conf);
|
||||||
}
|
}
|
||||||
} finally {
|
|
||||||
log.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -38,6 +38,8 @@ import java.util.concurrent.LinkedBlockingQueue;
|
||||||
import java.util.concurrent.TimeUnit;
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicInteger;
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
import java.util.concurrent.atomic.AtomicReference;
|
||||||
|
import java.util.concurrent.locks.Lock;
|
||||||
|
import java.util.concurrent.locks.ReentrantLock;
|
||||||
|
|
||||||
import org.apache.commons.logging.Log;
|
import org.apache.commons.logging.Log;
|
||||||
import org.apache.commons.logging.LogFactory;
|
import org.apache.commons.logging.LogFactory;
|
||||||
|
@ -101,6 +103,8 @@ HMasterRegionInterface, Runnable {
|
||||||
long metaRescanInterval;
|
long metaRescanInterval;
|
||||||
|
|
||||||
final AtomicReference<HServerAddress> rootRegionLocation;
|
final AtomicReference<HServerAddress> rootRegionLocation;
|
||||||
|
|
||||||
|
Lock splitLogLock = new ReentrantLock();
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Base HRegion scanner class. Holds utilty common to <code>ROOT</code> and
|
* Base HRegion scanner class. Holds utilty common to <code>ROOT</code> and
|
||||||
|
@ -424,7 +428,32 @@ HMasterRegionInterface, Runnable {
|
||||||
pendingRegions.contains(info.regionName))
|
pendingRegions.contains(info.regionName))
|
||||||
&& (storedInfo == null || storedInfo.getStartCode() != startCode)) {
|
&& (storedInfo == null || storedInfo.getStartCode() != startCode)) {
|
||||||
|
|
||||||
// The current assignment is no good; load the region.
|
// The current assignment is no good
|
||||||
|
|
||||||
|
// Recover the region server's log if there is one.
|
||||||
|
|
||||||
|
if (serverName.length() != 0) {
|
||||||
|
StringBuilder dirName = new StringBuilder("log_");
|
||||||
|
dirName.append(serverName.replace(":", "_"));
|
||||||
|
Path logDir = new Path(dir, dirName.toString());
|
||||||
|
try {
|
||||||
|
if (fs.exists(logDir)) {
|
||||||
|
splitLogLock.lock();
|
||||||
|
try {
|
||||||
|
HLog.splitLog(dir, logDir, fs, conf);
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
splitLogLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.warn("unable to split region server log because: ", e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now get the region assigned
|
||||||
|
|
||||||
unassignedRegions.put(info.regionName, info);
|
unassignedRegions.put(info.regionName, info);
|
||||||
assignAttempts.put(info.regionName, Long.valueOf(0L));
|
assignAttempts.put(info.regionName, Long.valueOf(0L));
|
||||||
}
|
}
|
||||||
|
@ -513,7 +542,7 @@ HMasterRegionInterface, Runnable {
|
||||||
|
|
||||||
private RootScanner rootScanner;
|
private RootScanner rootScanner;
|
||||||
private Thread rootScannerThread;
|
private Thread rootScannerThread;
|
||||||
Integer rootScannerLock = new Integer(0);
|
Integer rootScannerLock = Integer.valueOf(0);
|
||||||
|
|
||||||
@SuppressWarnings("unchecked")
|
@SuppressWarnings("unchecked")
|
||||||
static class MetaRegion implements Comparable {
|
static class MetaRegion implements Comparable {
|
||||||
|
@ -702,7 +731,7 @@ HMasterRegionInterface, Runnable {
|
||||||
|
|
||||||
MetaScanner metaScanner;
|
MetaScanner metaScanner;
|
||||||
private Thread metaScannerThread;
|
private Thread metaScannerThread;
|
||||||
Integer metaScannerLock = new Integer(0);
|
Integer metaScannerLock = Integer.valueOf(0);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* The 'unassignedRegions' table maps from a region name to a HRegionInfo
|
* The 'unassignedRegions' table maps from a region name to a HRegionInfo
|
||||||
|
@ -1832,9 +1861,23 @@ HMasterRegionInterface, Runnable {
|
||||||
if (!logSplit) {
|
if (!logSplit) {
|
||||||
// Process the old log file
|
// Process the old log file
|
||||||
|
|
||||||
HLog.splitLog(dir, new Path(dir, "log" + "_" +
|
StringBuilder dirName = new StringBuilder("log_");
|
||||||
deadServer.getBindAddress() + "_" + deadServer.getPort()), fs, conf);
|
dirName.append(deadServer.getBindAddress());
|
||||||
|
dirName.append("_");
|
||||||
|
dirName.append(deadServer.getPort());
|
||||||
|
Path logdir = new Path(dir, dirName.toString());
|
||||||
|
|
||||||
|
if (fs.exists(logdir)) {
|
||||||
|
if (!splitLogLock.tryLock()) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
HLog.splitLog(dir, logdir, fs, conf);
|
||||||
|
|
||||||
|
} finally {
|
||||||
|
splitLogLock.unlock();
|
||||||
|
}
|
||||||
|
}
|
||||||
logSplit = true;
|
logSplit = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2154,8 +2197,8 @@ HMasterRegionInterface, Runnable {
|
||||||
// We can't proceed until the root region is online and has been scanned
|
// We can't proceed until the root region is online and has been scanned
|
||||||
if (LOG.isDebugEnabled()) {
|
if (LOG.isDebugEnabled()) {
|
||||||
LOG.debug("root region: " +
|
LOG.debug("root region: " +
|
||||||
((rootRegionLocation != null)?
|
((rootRegionLocation.get() != null)?
|
||||||
rootRegionLocation.toString(): "null") +
|
rootRegionLocation.get().toString(): "null") +
|
||||||
", rootScanned: " + rootScanned);
|
", rootScanned: " + rootScanned);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -2946,7 +2989,9 @@ HMasterRegionInterface, Runnable {
|
||||||
// the PendingServerShutdown operation has a chance to split the log file.
|
// the PendingServerShutdown operation has a chance to split the log file.
|
||||||
|
|
||||||
try {
|
try {
|
||||||
msgQueue.put(new PendingServerShutdown(info));
|
if (info != null) {
|
||||||
|
msgQueue.put(new PendingServerShutdown(info));
|
||||||
|
}
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
throw new RuntimeException("Putting into msgQueue was interrupted.", e);
|
throw new RuntimeException("Putting into msgQueue was interrupted.", e);
|
||||||
}
|
}
|
||||||
|
|
|
@ -37,7 +37,7 @@ public class TestRegionServerAbort extends HBaseClusterTestCase {
|
||||||
super(2);
|
super(2);
|
||||||
conf.setInt("ipc.client.timeout", 5000); // reduce client timeout
|
conf.setInt("ipc.client.timeout", 5000); // reduce client timeout
|
||||||
conf.setInt("ipc.client.connect.max.retries", 5); // and number of retries
|
conf.setInt("ipc.client.connect.max.retries", 5); // and number of retries
|
||||||
conf.setInt("hbase.client.retries.number", 3); // reduce HBase retries
|
conf.setInt("hbase.client.retries.number", 5); // reduce HBase retries
|
||||||
Logger.getRootLogger().setLevel(Level.WARN);
|
Logger.getRootLogger().setLevel(Level.WARN);
|
||||||
Logger.getLogger(this.getClass().getPackage().getName()).setLevel(Level.DEBUG);
|
Logger.getLogger(this.getClass().getPackage().getName()).setLevel(Level.DEBUG);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue