HADOOP-1527 Region server won't start because logdir exists

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@568700 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jim Kellerman 2007-08-22 16:59:43 +00:00
parent ec2d29c902
commit ccd9248e63
4 changed files with 101 additions and 29 deletions

View File

@ -10,6 +10,7 @@ Trunk (unreleased changes)
OPTIMIZATIONS OPTIMIZATIONS
BUG FIXES BUG FIXES
HADOOP-1527 Region server won't start because logdir exists
HADOOP-1723 If master asks region server to shut down, by-pass return of HADOOP-1723 If master asks region server to shut down, by-pass return of
shutdown message shutdown message
HADOOP-1729 Recent renaming or META tables breaks hbase shell HADOOP-1729 Recent renaming or META tables breaks hbase shell

View File

@ -83,7 +83,7 @@ public class HLog implements HConstants {
long filenum = 0; long filenum = 0;
AtomicInteger numEntries = new AtomicInteger(0); AtomicInteger numEntries = new AtomicInteger(0);
Integer rollLock = new Integer(0); Integer rollLock = Integer.valueOf(0);
/** /**
* Split up a bunch of log files, that are no longer being written to, * Split up a bunch of log files, that are no longer being written to,
@ -439,27 +439,46 @@ public class HLog implements HConstants {
notifyAll(); notifyAll();
} }
private static void usage() {
System.err.println("Usage: java org.apache.hbase.HLog" +
" {--dump <logfile>... | --split <logdir>...}");
}
/** /**
* Pass a log file and it will dump out a text version on * Pass one or more log file names and it will either dump out a text version
* <code>stdout</code>. * on <code>stdout</code> or split the specified log files.
* @param args * @param args
* @throws IOException * @throws IOException
*/ */
public static void main(String[] args) throws IOException { public static void main(String[] args) throws IOException {
if (args.length < 1) { if (args.length < 2) {
System.err.println("Usage: java org.apache.hbase.HLog <logfile>"); usage();
System.exit(-1); System.exit(-1);
} }
boolean dump = true;
if (args[0].compareTo("--dump") != 0) {
if (args[0].compareTo("--split") == 0) {
dump = false;
} else {
usage();
System.exit(-1);
}
}
Configuration conf = new HBaseConfiguration(); Configuration conf = new HBaseConfiguration();
FileSystem fs = FileSystem.get(conf); FileSystem fs = FileSystem.get(conf);
Path logfile = new Path(args[0]); Path baseDir = new Path(conf.get(HBASE_DIR, DEFAULT_HBASE_DIR));
if (!fs.exists(logfile)) {
throw new FileNotFoundException(args[0] + " does not exist"); for (int i = 1; i < args.length; i++) {
Path logPath = new Path(args[i]);
if (!fs.exists(logPath)) {
throw new FileNotFoundException(args[i] + " does not exist");
} }
if (!fs.isFile(logfile)) { if (dump) {
throw new IOException(args[0] + " is not a file"); if (!fs.isFile(logPath)) {
throw new IOException(args[i] + " is not a file");
} }
Reader log = new SequenceFile.Reader(fs, logfile, conf); Reader log = new SequenceFile.Reader(fs, logPath, conf);
try { try {
HLogKey key = new HLogKey(); HLogKey key = new HLogKey();
HLogEdit val = new HLogEdit(); HLogEdit val = new HLogEdit();
@ -469,5 +488,12 @@ public class HLog implements HConstants {
} finally { } finally {
log.close(); log.close();
} }
} else {
if (!fs.getFileStatus(logPath).isDir()) {
throw new IOException(args[i] + " is not a directory");
}
splitLog(baseDir, logPath, fs, conf);
}
}
} }
} }

View File

@ -38,6 +38,8 @@ import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference; import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.commons.logging.Log; import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
@ -102,6 +104,8 @@ HMasterRegionInterface, Runnable {
final AtomicReference<HServerAddress> rootRegionLocation; final AtomicReference<HServerAddress> rootRegionLocation;
Lock splitLogLock = new ReentrantLock();
/** /**
* Base HRegion scanner class. Holds utilty common to <code>ROOT</code> and * Base HRegion scanner class. Holds utilty common to <code>ROOT</code> and
* <code>META</code> HRegion scanners. * <code>META</code> HRegion scanners.
@ -424,7 +428,32 @@ HMasterRegionInterface, Runnable {
pendingRegions.contains(info.regionName)) pendingRegions.contains(info.regionName))
&& (storedInfo == null || storedInfo.getStartCode() != startCode)) { && (storedInfo == null || storedInfo.getStartCode() != startCode)) {
// The current assignment is no good; load the region. // The current assignment is no good
// Recover the region server's log if there is one.
if (serverName.length() != 0) {
StringBuilder dirName = new StringBuilder("log_");
dirName.append(serverName.replace(":", "_"));
Path logDir = new Path(dir, dirName.toString());
try {
if (fs.exists(logDir)) {
splitLogLock.lock();
try {
HLog.splitLog(dir, logDir, fs, conf);
} finally {
splitLogLock.unlock();
}
}
} catch (IOException e) {
LOG.warn("unable to split region server log because: ", e);
}
}
// Now get the region assigned
unassignedRegions.put(info.regionName, info); unassignedRegions.put(info.regionName, info);
assignAttempts.put(info.regionName, Long.valueOf(0L)); assignAttempts.put(info.regionName, Long.valueOf(0L));
} }
@ -513,7 +542,7 @@ HMasterRegionInterface, Runnable {
private RootScanner rootScanner; private RootScanner rootScanner;
private Thread rootScannerThread; private Thread rootScannerThread;
Integer rootScannerLock = new Integer(0); Integer rootScannerLock = Integer.valueOf(0);
@SuppressWarnings("unchecked") @SuppressWarnings("unchecked")
static class MetaRegion implements Comparable { static class MetaRegion implements Comparable {
@ -702,7 +731,7 @@ HMasterRegionInterface, Runnable {
MetaScanner metaScanner; MetaScanner metaScanner;
private Thread metaScannerThread; private Thread metaScannerThread;
Integer metaScannerLock = new Integer(0); Integer metaScannerLock = Integer.valueOf(0);
/** /**
* The 'unassignedRegions' table maps from a region name to a HRegionInfo * The 'unassignedRegions' table maps from a region name to a HRegionInfo
@ -1832,9 +1861,23 @@ HMasterRegionInterface, Runnable {
if (!logSplit) { if (!logSplit) {
// Process the old log file // Process the old log file
HLog.splitLog(dir, new Path(dir, "log" + "_" + StringBuilder dirName = new StringBuilder("log_");
deadServer.getBindAddress() + "_" + deadServer.getPort()), fs, conf); dirName.append(deadServer.getBindAddress());
dirName.append("_");
dirName.append(deadServer.getPort());
Path logdir = new Path(dir, dirName.toString());
if (fs.exists(logdir)) {
if (!splitLogLock.tryLock()) {
return false;
}
try {
HLog.splitLog(dir, logdir, fs, conf);
} finally {
splitLogLock.unlock();
}
}
logSplit = true; logSplit = true;
} }
@ -2154,8 +2197,8 @@ HMasterRegionInterface, Runnable {
// We can't proceed until the root region is online and has been scanned // We can't proceed until the root region is online and has been scanned
if (LOG.isDebugEnabled()) { if (LOG.isDebugEnabled()) {
LOG.debug("root region: " + LOG.debug("root region: " +
((rootRegionLocation != null)? ((rootRegionLocation.get() != null)?
rootRegionLocation.toString(): "null") + rootRegionLocation.get().toString(): "null") +
", rootScanned: " + rootScanned); ", rootScanned: " + rootScanned);
} }
return false; return false;
@ -2946,7 +2989,9 @@ HMasterRegionInterface, Runnable {
// the PendingServerShutdown operation has a chance to split the log file. // the PendingServerShutdown operation has a chance to split the log file.
try { try {
if (info != null) {
msgQueue.put(new PendingServerShutdown(info)); msgQueue.put(new PendingServerShutdown(info));
}
} catch (InterruptedException e) { } catch (InterruptedException e) {
throw new RuntimeException("Putting into msgQueue was interrupted.", e); throw new RuntimeException("Putting into msgQueue was interrupted.", e);
} }

View File

@ -37,7 +37,7 @@ public class TestRegionServerAbort extends HBaseClusterTestCase {
super(2); super(2);
conf.setInt("ipc.client.timeout", 5000); // reduce client timeout conf.setInt("ipc.client.timeout", 5000); // reduce client timeout
conf.setInt("ipc.client.connect.max.retries", 5); // and number of retries conf.setInt("ipc.client.connect.max.retries", 5); // and number of retries
conf.setInt("hbase.client.retries.number", 3); // reduce HBase retries conf.setInt("hbase.client.retries.number", 5); // reduce HBase retries
Logger.getRootLogger().setLevel(Level.WARN); Logger.getRootLogger().setLevel(Level.WARN);
Logger.getLogger(this.getClass().getPackage().getName()).setLevel(Level.DEBUG); Logger.getLogger(this.getClass().getPackage().getName()).setLevel(Level.DEBUG);
} }