HBASE-1020 Regionserver OOME handler should dump vital stats

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@720617 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2008-11-25 21:50:19 +00:00
parent 2c56399b01
commit baa1af5683
2 changed files with 40 additions and 17 deletions

View File

@ -140,6 +140,7 @@ Release 0.19.0 - Unreleased
HBASE-972 Update hbase trunk to use released hadoop 0.19.0 HBASE-972 Update hbase trunk to use released hadoop 0.19.0
HBASE-1022 Add storefile index size to hbase metrics HBASE-1022 Add storefile index size to hbase metrics
HBASE-1026 Tests in mapred are failing HBASE-1026 Tests in mapred are failing
HBASE-1020 Regionserver OOME handler should dump vital stats
NEW FEATURES NEW FEATURES
HBASE-875 Use MurmurHash instead of JenkinsHash [in bloomfilters] HBASE-875 Use MurmurHash instead of JenkinsHash [in bloomfilters]

View File

@ -434,10 +434,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
housekeeping(); housekeeping();
sleeper.sleep(lastMsg); sleeper.sleep(lastMsg);
} // for } // for
} catch (OutOfMemoryError error) {
abort();
LOG.fatal("Ran out of memory", error);
} catch (Throwable t) { } catch (Throwable t) {
checkOOME(t);
LOG.fatal("Unhandled exception. Aborting...", t); LOG.fatal("Unhandled exception. Aborting...", t);
abort(); abort();
} }
@ -550,6 +548,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
isOnline = true; isOnline = true;
} catch (IOException e) { } catch (IOException e) {
this.stopRequested.set(true); this.stopRequested.set(true);
checkOOME(e);
isOnline = false; isOnline = false;
e = RemoteExceptionHandler.checkIOException(e); e = RemoteExceptionHandler.checkIOException(e);
LOG.fatal("Failed init", e); LOG.fatal("Failed init", e);
@ -559,6 +558,22 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
} }
} }
/*
* Check if an OOME and if so, call abort.
* @param e
* @return True if we OOME'd and are aborting.
*/
private boolean checkOOME(final Throwable e) {
boolean aborting = false;
if (e instanceof OutOfMemoryError ||
(e.getCause()!= null && e.getCause() instanceof OutOfMemoryError)) {
LOG.fatal("OOME, aborting.", e);
abort();
aborting = true;
}
return aborting;
}
/* /*
* Thread to shutdown the region server in an orderly manner. This thread * Thread to shutdown the region server in an orderly manner. This thread
* is registered as a shutdown hook in the HRegionServer constructor and is * is registered as a shutdown hook in the HRegionServer constructor and is
@ -800,8 +815,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
* from under hbase or we OOME. * from under hbase or we OOME.
*/ */
public void abort() { public void abort() {
reservedSpace.clear();
this.abortRequested = true; this.abortRequested = true;
this.reservedSpace.clear();
LOG.info("Dump of metrics: " + this.metrics.toString());
stop(); stop();
} }
@ -892,7 +908,6 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
*/ */
void reportSplit(HRegionInfo oldRegion, HRegionInfo newRegionA, void reportSplit(HRegionInfo oldRegion, HRegionInfo newRegionA,
HRegionInfo newRegionB) { HRegionInfo newRegionB) {
outboundMsgs.add(new HMsg(HMsg.Type.MSG_REPORT_SPLIT, oldRegion, outboundMsgs.add(new HMsg(HMsg.Type.MSG_REPORT_SPLIT, oldRegion,
(oldRegion.getRegionNameAsString() + " split; daughters: " + (oldRegion.getRegionNameAsString() + " split; daughters: " +
newRegionA.getRegionNameAsString() + ", " + newRegionA.getRegionNameAsString() + ", " +
@ -1017,6 +1032,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
} }
} }
} catch(Throwable t) { } catch(Throwable t) {
checkOOME(t);
LOG.fatal("Unhandled exception", t); LOG.fatal("Unhandled exception", t);
} finally { } finally {
LOG.info("worker thread exiting"); LOG.info("worker thread exiting");
@ -1039,8 +1055,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
this.compactSplitThread. this.compactSplitThread.
compactionRequested(region, "Region open check"); compactionRequested(region, "Region open check");
} catch (IOException e) { } catch (IOException e) {
LOG.error("error opening region " + regionInfo.getRegionNameAsString(), e); checkOOME(e);
LOG.error("error opening region " + regionInfo.getRegionNameAsString(),
e);
// TODO: add an extra field in HRegionInfo to indicate that there is // TODO: add an extra field in HRegionInfo to indicate that there is
// an error. We can't do that now because that would be an incompatible // an error. We can't do that now because that would be an incompatible
// change that would require a migration // change that would require a migration
@ -1113,6 +1130,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
LOG.error("error closing region " + LOG.error("error closing region " +
Bytes.toString(region.getRegionName()), Bytes.toString(region.getRegionName()),
RemoteExceptionHandler.checkIOException(e)); RemoteExceptionHandler.checkIOException(e));
checkOOME(e);
} }
} }
return regionsToClose; return regionsToClose;
@ -1233,6 +1251,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
result.putAll(map); result.putAll(map);
return new RowResult(row, result); return new RowResult(row, result);
} catch (IOException e) { } catch (IOException e) {
checkOOME(e);
checkFileSystem(); checkFileSystem();
throw e; throw e;
} }
@ -1250,6 +1269,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
RowResult rr = region.getClosestRowBefore(row, columnFamily); RowResult rr = region.getClosestRowBefore(row, columnFamily);
return rr; return rr;
} catch (IOException e) { } catch (IOException e) {
checkOOME(e);
checkFileSystem(); checkFileSystem();
throw e; throw e;
} }
@ -1286,6 +1306,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
} }
return resultSets.toArray(new RowResult[resultSets.size()]); return resultSets.toArray(new RowResult[resultSets.size()]);
} catch (IOException e) { } catch (IOException e) {
checkOOME(e);
checkFileSystem(); checkFileSystem();
throw e; throw e;
} }
@ -1304,10 +1325,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
try { try {
cacheFlusher.reclaimMemcacheMemory(); cacheFlusher.reclaimMemcacheMemory();
region.batchUpdate(b, getLockFromId(b.getRowLock())); region.batchUpdate(b, getLockFromId(b.getRowLock()));
} catch (OutOfMemoryError error) {
abort();
LOG.fatal("Ran out of memory", error);
} catch (IOException e) { } catch (IOException e) {
checkOOME(e);
checkFileSystem(); checkFileSystem();
throw e; throw e;
} }
@ -1327,14 +1346,12 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
locks[i] = getLockFromId(b[i].getRowLock()); locks[i] = getLockFromId(b[i].getRowLock());
region.batchUpdate(b[i], locks[i]); region.batchUpdate(b[i], locks[i]);
} }
} catch (OutOfMemoryError error) {
abort();
LOG.fatal("Ran out of memory", error);
} catch(WrongRegionException ex) { } catch(WrongRegionException ex) {
return i; return i;
} catch (NotServingRegionException ex) { } catch (NotServingRegionException ex) {
return i; return i;
} catch (IOException e) { } catch (IOException e) {
checkOOME(e);
checkFileSystem(); checkFileSystem();
throw e; throw e;
} }
@ -1397,7 +1414,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
return scannerId; return scannerId;
} catch (IOException e) { } catch (IOException e) {
LOG.error("Error opening scanner (fsOk: " + this.fsOk + ")", LOG.error("Error opening scanner (fsOk: " + this.fsOk + ")",
RemoteExceptionHandler.checkIOException(e)); RemoteExceptionHandler.checkIOException(e));
checkOOME(e);
checkFileSystem(); checkFileSystem();
throw e; throw e;
} }
@ -1430,6 +1448,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
s.close(); s.close();
this.leases.cancelLease(scannerName); this.leases.cancelLease(scannerName);
} catch (IOException e) { } catch (IOException e) {
// TODO: Should we even be returning an exception out of a close?
// What can the client do with an exception in close?
checkOOME(e);
checkFileSystem(); checkFileSystem();
throw e; throw e;
} }
@ -1527,7 +1548,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
return lockId; return lockId;
} catch (IOException e) { } catch (IOException e) {
LOG.error("Error obtaining row lock (fsOk: " + this.fsOk + ")", LOG.error("Error obtaining row lock (fsOk: " + this.fsOk + ")",
RemoteExceptionHandler.checkIOException(e)); RemoteExceptionHandler.checkIOException(e));
checkOOME(e);
checkFileSystem(); checkFileSystem();
throw e; throw e;
} }
@ -1842,7 +1864,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
} }
public long getProtocolVersion(final String protocol, public long getProtocolVersion(final String protocol,
@SuppressWarnings("unused") final long clientVersion) final long clientVersion)
throws IOException { throws IOException {
if (protocol.equals(HRegionInterface.class.getName())) { if (protocol.equals(HRegionInterface.class.getName())) {
return HBaseRPCProtocolVersion.versionID; return HBaseRPCProtocolVersion.versionID;