HBASE-2726. Region Server should never abort without an informative log message
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@954705 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
18a53dfd6b
commit
43b0753558
|
@ -694,6 +694,8 @@ Release 0.21.0 - Unreleased
|
|||
HBASE-2468 Improvements to prewarm META cache on clients
|
||||
(Mingjie Lai via Stack)
|
||||
HBASE-2353 Batch puts should sync HLog as few times as possible
|
||||
HBASE-2726 Region Server should never abort without an informative log
|
||||
message
|
||||
|
||||
NEW FEATURES
|
||||
HBASE-1961 HBase EC2 scripts
|
||||
|
|
|
@ -377,7 +377,7 @@ public class HRegionServer implements HRegionInterface,
|
|||
if (restart) {
|
||||
restart();
|
||||
} else {
|
||||
abort();
|
||||
abort("ZooKeeper session expired");
|
||||
}
|
||||
} else if (type == EventType.NodeDeleted) {
|
||||
watchMasterAddress();
|
||||
|
@ -397,8 +397,7 @@ public class HRegionServer implements HRegionInterface,
|
|||
}
|
||||
|
||||
private void restart() {
|
||||
LOG.info("Restarting Region Server");
|
||||
abort();
|
||||
abort("Restarting region server");
|
||||
Threads.shutdown(regionServerThread);
|
||||
boolean done = false;
|
||||
while (!done) {
|
||||
|
@ -568,8 +567,7 @@ public class HRegionServer implements HRegionInterface,
|
|||
} // for
|
||||
} catch (Throwable t) {
|
||||
if (!checkOOME(t)) {
|
||||
LOG.fatal("Unhandled exception. Aborting...", t);
|
||||
abort();
|
||||
abort("Unhandled exception", t);
|
||||
}
|
||||
}
|
||||
this.leases.closeAfterLeasesExpire();
|
||||
|
@ -836,8 +834,7 @@ public class HRegionServer implements HRegionInterface,
|
|||
(e.getCause() != null && e.getCause() instanceof OutOfMemoryError) ||
|
||||
(e.getMessage() != null &&
|
||||
e.getMessage().contains("java.lang.OutOfMemoryError"))) {
|
||||
LOG.fatal("OutOfMemoryError, aborting.", e);
|
||||
abort();
|
||||
abort("OutOfMemoryError, aborting", e);
|
||||
stop = true;
|
||||
}
|
||||
return stop;
|
||||
|
@ -855,8 +852,7 @@ public class HRegionServer implements HRegionInterface,
|
|||
try {
|
||||
FSUtils.checkFileSystemAvailable(this.fs);
|
||||
} catch (IOException e) {
|
||||
LOG.fatal("Shutting down HRegionServer: file system not available", e);
|
||||
abort();
|
||||
abort("File System not available", e);
|
||||
this.fsOk = false;
|
||||
}
|
||||
}
|
||||
|
@ -1008,8 +1004,7 @@ public class HRegionServer implements HRegionInterface,
|
|||
String n = Thread.currentThread().getName();
|
||||
UncaughtExceptionHandler handler = new UncaughtExceptionHandler() {
|
||||
public void uncaughtException(Thread t, Throwable e) {
|
||||
abort();
|
||||
LOG.fatal("Set stop flag in " + t.getName(), e);
|
||||
abort("Uncaught exception in service thread " + t.getName(), e);
|
||||
}
|
||||
};
|
||||
Threads.setDaemonThreadRunning(this.hlogRoller, n + ".logRoller",
|
||||
|
@ -1132,8 +1127,15 @@ public class HRegionServer implements HRegionInterface,
|
|||
* log it is using and without notifying the master.
|
||||
* Used unit testing and on catastrophic events such as HDFS is yanked out
|
||||
* from under hbase or we OOME.
|
||||
* @param reason the reason we are aborting
|
||||
* @param cause the exception that caused the abort, or null
|
||||
*/
|
||||
public void abort() {
|
||||
public void abort(String reason, Throwable cause) {
|
||||
if (cause != null) {
|
||||
LOG.fatal("Aborting region server " + this + ": " + reason, cause);
|
||||
} else {
|
||||
LOG.fatal("Aborting region server " + this + ": " + reason);
|
||||
}
|
||||
this.abortRequested = true;
|
||||
this.reservedSpace.clear();
|
||||
if (this.metrics != null) {
|
||||
|
@ -1142,6 +1144,13 @@ public class HRegionServer implements HRegionInterface,
|
|||
stop();
|
||||
}
|
||||
|
||||
/**
|
||||
* @see HRegionServer#abort(String, Throwable)
|
||||
*/
|
||||
public void abort(String reason) {
|
||||
abort(reason, null);
|
||||
}
|
||||
|
||||
/*
|
||||
* Simulate a kill -9 of this server.
|
||||
* Exits w/o closing regions or cleaninup logs but it does close socket in
|
||||
|
@ -1149,7 +1158,7 @@ public class HRegionServer implements HRegionInterface,
|
|||
*/
|
||||
protected void kill() {
|
||||
this.killed = true;
|
||||
abort();
|
||||
abort("Simulated kill");
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -86,20 +86,21 @@ class LogRoller extends Thread implements LogRollListener {
|
|||
} catch (FailedLogCloseException e) {
|
||||
LOG.fatal("Forcing server shutdown", e);
|
||||
server.checkFileSystem();
|
||||
server.abort();
|
||||
server.abort("Failed log close in log roller", e);
|
||||
} catch (java.net.ConnectException e) {
|
||||
LOG.fatal("Forcing server shutdown", e);
|
||||
server.checkFileSystem();
|
||||
server.abort();
|
||||
server.abort("Failed connect in log roller", e);
|
||||
} catch (IOException ex) {
|
||||
LOG.fatal("Log rolling failed with ioe: ",
|
||||
RemoteExceptionHandler.checkIOException(ex));
|
||||
server.checkFileSystem();
|
||||
// Abort if we get here. We probably won't recover an IOE. HBASE-1132
|
||||
server.abort();
|
||||
server.abort("IOE in log roller", ex);
|
||||
} catch (Exception ex) {
|
||||
LOG.error("Log rolling failed", ex);
|
||||
server.checkFileSystem();
|
||||
server.abort("Log rolling failed", ex);
|
||||
} finally {
|
||||
rollLog.set(false);
|
||||
rollLock.unlock();
|
||||
|
|
|
@ -261,8 +261,7 @@ class MemStoreFlusher extends Thread implements FlushRequester {
|
|||
// is required. Currently the only way to do this is a restart of
|
||||
// the server. Abort because hdfs is probably bad (HBASE-644 is a case
|
||||
// where hdfs was bad but passed the hdfs check).
|
||||
LOG.fatal("Replay of hlog required. Forcing server shutdown", ex);
|
||||
server.abort();
|
||||
server.abort("Replay of HLog required. Forcing server shutdown", ex);
|
||||
return false;
|
||||
} catch (IOException ex) {
|
||||
LOG.error("Cache flush failed"
|
||||
|
|
|
@ -274,7 +274,7 @@ public class MiniHBaseCluster {
|
|||
public String abortRegionServer(int serverNumber) {
|
||||
HRegionServer server = getRegionServer(serverNumber);
|
||||
LOG.info("Aborting " + server.toString());
|
||||
server.abort();
|
||||
server.abort("Aborting for tests", new Exception("Trace info"));
|
||||
return server.toString();
|
||||
}
|
||||
|
||||
|
|
|
@ -337,10 +337,10 @@ public class TestMasterTransitions {
|
|||
if (!incomingMsg.isType(HMsg.Type.MSG_REPORT_PROCESS_OPEN)) return true;
|
||||
// Save the region that is in transition so can test later it came back.
|
||||
this.regionToFind = incomingMsg.getRegionInfo();
|
||||
LOG.info("ABORTING " + this.victim + " because got a " +
|
||||
String msg = "ABORTING " + this.victim + " because got a " +
|
||||
HMsg.Type.MSG_REPORT_PROCESS_OPEN + " on this server for " +
|
||||
incomingMsg.getRegionInfo().getRegionNameAsString());
|
||||
this.victim.abort();
|
||||
incomingMsg.getRegionInfo().getRegionNameAsString();
|
||||
this.victim.abort(msg);
|
||||
this.abortSent = true;
|
||||
return true;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue