HBASE-2726. Region Server should never abort without an informative log message

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@954705 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Todd Lipcon 2010-06-15 05:04:42 +00:00
parent 18a53dfd6b
commit 43b0753558
6 changed files with 33 additions and 22 deletions

View File

@ -694,6 +694,8 @@ Release 0.21.0 - Unreleased
HBASE-2468 Improvements to prewarm META cache on clients
(Mingjie Lai via Stack)
HBASE-2353 Batch puts should sync HLog as few times as possible
HBASE-2726 Region Server should never abort without an informative log
message
NEW FEATURES
HBASE-1961 HBase EC2 scripts

View File

@ -377,7 +377,7 @@ public class HRegionServer implements HRegionInterface,
if (restart) {
restart();
} else {
abort();
abort("ZooKeeper session expired");
}
} else if (type == EventType.NodeDeleted) {
watchMasterAddress();
@ -397,8 +397,7 @@ public class HRegionServer implements HRegionInterface,
}
private void restart() {
LOG.info("Restarting Region Server");
abort();
abort("Restarting region server");
Threads.shutdown(regionServerThread);
boolean done = false;
while (!done) {
@ -568,8 +567,7 @@ public class HRegionServer implements HRegionInterface,
} // for
} catch (Throwable t) {
if (!checkOOME(t)) {
LOG.fatal("Unhandled exception. Aborting...", t);
abort();
abort("Unhandled exception", t);
}
}
this.leases.closeAfterLeasesExpire();
@ -836,8 +834,7 @@ public class HRegionServer implements HRegionInterface,
(e.getCause() != null && e.getCause() instanceof OutOfMemoryError) ||
(e.getMessage() != null &&
e.getMessage().contains("java.lang.OutOfMemoryError"))) {
LOG.fatal("OutOfMemoryError, aborting.", e);
abort();
abort("OutOfMemoryError, aborting", e);
stop = true;
}
return stop;
@ -855,8 +852,7 @@ public class HRegionServer implements HRegionInterface,
try {
FSUtils.checkFileSystemAvailable(this.fs);
} catch (IOException e) {
LOG.fatal("Shutting down HRegionServer: file system not available", e);
abort();
abort("File System not available", e);
this.fsOk = false;
}
}
@ -1008,8 +1004,7 @@ public class HRegionServer implements HRegionInterface,
String n = Thread.currentThread().getName();
UncaughtExceptionHandler handler = new UncaughtExceptionHandler() {
public void uncaughtException(Thread t, Throwable e) {
abort();
LOG.fatal("Set stop flag in " + t.getName(), e);
abort("Uncaught exception in service thread " + t.getName(), e);
}
};
Threads.setDaemonThreadRunning(this.hlogRoller, n + ".logRoller",
@ -1132,8 +1127,15 @@ public class HRegionServer implements HRegionInterface,
* log it is using and without notifying the master.
* Used unit testing and on catastrophic events such as HDFS is yanked out
* from under hbase or we OOME.
* @param reason the reason we are aborting
* @param cause the exception that caused the abort, or null
*/
public void abort() {
public void abort(String reason, Throwable cause) {
if (cause != null) {
LOG.fatal("Aborting region server " + this + ": " + reason, cause);
} else {
LOG.fatal("Aborting region server " + this + ": " + reason);
}
this.abortRequested = true;
this.reservedSpace.clear();
if (this.metrics != null) {
@ -1141,6 +1143,13 @@ public class HRegionServer implements HRegionInterface,
}
stop();
}
/**
* @see HRegionServer#abort(String, Throwable)
*/
public void abort(String reason) {
abort(reason, null);
}
/*
* Simulate a kill -9 of this server.
@ -1149,7 +1158,7 @@ public class HRegionServer implements HRegionInterface,
*/
protected void kill() {
this.killed = true;
abort();
abort("Simulated kill");
}
/**

View File

@ -86,20 +86,21 @@ class LogRoller extends Thread implements LogRollListener {
} catch (FailedLogCloseException e) {
LOG.fatal("Forcing server shutdown", e);
server.checkFileSystem();
server.abort();
server.abort("Failed log close in log roller", e);
} catch (java.net.ConnectException e) {
LOG.fatal("Forcing server shutdown", e);
server.checkFileSystem();
server.abort();
server.abort("Failed connect in log roller", e);
} catch (IOException ex) {
LOG.fatal("Log rolling failed with ioe: ",
RemoteExceptionHandler.checkIOException(ex));
server.checkFileSystem();
// Abort if we get here. We probably won't recover an IOE. HBASE-1132
server.abort();
server.abort("IOE in log roller", ex);
} catch (Exception ex) {
LOG.error("Log rolling failed", ex);
server.checkFileSystem();
server.abort("Log rolling failed", ex);
} finally {
rollLog.set(false);
rollLock.unlock();

View File

@ -261,8 +261,7 @@ class MemStoreFlusher extends Thread implements FlushRequester {
// is required. Currently the only way to do this is a restart of
// the server. Abort because hdfs is probably bad (HBASE-644 is a case
// where hdfs was bad but passed the hdfs check).
LOG.fatal("Replay of hlog required. Forcing server shutdown", ex);
server.abort();
server.abort("Replay of HLog required. Forcing server shutdown", ex);
return false;
} catch (IOException ex) {
LOG.error("Cache flush failed"

View File

@ -274,7 +274,7 @@ public class MiniHBaseCluster {
public String abortRegionServer(int serverNumber) {
HRegionServer server = getRegionServer(serverNumber);
LOG.info("Aborting " + server.toString());
server.abort();
server.abort("Aborting for tests", new Exception("Trace info"));
return server.toString();
}

View File

@ -337,10 +337,10 @@ public class TestMasterTransitions {
if (!incomingMsg.isType(HMsg.Type.MSG_REPORT_PROCESS_OPEN)) return true;
// Save the region that is in transition so can test later it came back.
this.regionToFind = incomingMsg.getRegionInfo();
LOG.info("ABORTING " + this.victim + " because got a " +
String msg = "ABORTING " + this.victim + " because got a " +
HMsg.Type.MSG_REPORT_PROCESS_OPEN + " on this server for " +
incomingMsg.getRegionInfo().getRegionNameAsString());
this.victim.abort();
incomingMsg.getRegionInfo().getRegionNameAsString();
this.victim.abort(msg);
this.abortSent = true;
return true;
}