HBASE-1020 Regionserver OOME handler should dump vital stats
git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@720617 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
2c56399b01
commit
baa1af5683
|
@ -140,6 +140,7 @@ Release 0.19.0 - Unreleased
|
||||||
HBASE-972 Update hbase trunk to use released hadoop 0.19.0
|
HBASE-972 Update hbase trunk to use released hadoop 0.19.0
|
||||||
HBASE-1022 Add storefile index size to hbase metrics
|
HBASE-1022 Add storefile index size to hbase metrics
|
||||||
HBASE-1026 Tests in mapred are failing
|
HBASE-1026 Tests in mapred are failing
|
||||||
|
HBASE-1020 Regionserver OOME handler should dump vital stats
|
||||||
|
|
||||||
NEW FEATURES
|
NEW FEATURES
|
||||||
HBASE-875 Use MurmurHash instead of JenkinsHash [in bloomfilters]
|
HBASE-875 Use MurmurHash instead of JenkinsHash [in bloomfilters]
|
||||||
|
|
|
@ -434,10 +434,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
housekeeping();
|
housekeeping();
|
||||||
sleeper.sleep(lastMsg);
|
sleeper.sleep(lastMsg);
|
||||||
} // for
|
} // for
|
||||||
} catch (OutOfMemoryError error) {
|
|
||||||
abort();
|
|
||||||
LOG.fatal("Ran out of memory", error);
|
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
|
checkOOME(t);
|
||||||
LOG.fatal("Unhandled exception. Aborting...", t);
|
LOG.fatal("Unhandled exception. Aborting...", t);
|
||||||
abort();
|
abort();
|
||||||
}
|
}
|
||||||
|
@ -550,6 +548,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
isOnline = true;
|
isOnline = true;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
this.stopRequested.set(true);
|
this.stopRequested.set(true);
|
||||||
|
checkOOME(e);
|
||||||
isOnline = false;
|
isOnline = false;
|
||||||
e = RemoteExceptionHandler.checkIOException(e);
|
e = RemoteExceptionHandler.checkIOException(e);
|
||||||
LOG.fatal("Failed init", e);
|
LOG.fatal("Failed init", e);
|
||||||
|
@ -559,6 +558,22 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Check if an OOME and if so, call abort.
|
||||||
|
* @param e
|
||||||
|
* @return True if we OOME'd and are aborting.
|
||||||
|
*/
|
||||||
|
private boolean checkOOME(final Throwable e) {
|
||||||
|
boolean aborting = false;
|
||||||
|
if (e instanceof OutOfMemoryError ||
|
||||||
|
(e.getCause()!= null && e.getCause() instanceof OutOfMemoryError)) {
|
||||||
|
LOG.fatal("OOME, aborting.", e);
|
||||||
|
abort();
|
||||||
|
aborting = true;
|
||||||
|
}
|
||||||
|
return aborting;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Thread to shutdown the region server in an orderly manner. This thread
|
* Thread to shutdown the region server in an orderly manner. This thread
|
||||||
* is registered as a shutdown hook in the HRegionServer constructor and is
|
* is registered as a shutdown hook in the HRegionServer constructor and is
|
||||||
|
@ -800,8 +815,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
* from under hbase or we OOME.
|
* from under hbase or we OOME.
|
||||||
*/
|
*/
|
||||||
public void abort() {
|
public void abort() {
|
||||||
reservedSpace.clear();
|
|
||||||
this.abortRequested = true;
|
this.abortRequested = true;
|
||||||
|
this.reservedSpace.clear();
|
||||||
|
LOG.info("Dump of metrics: " + this.metrics.toString());
|
||||||
stop();
|
stop();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -892,7 +908,6 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
*/
|
*/
|
||||||
void reportSplit(HRegionInfo oldRegion, HRegionInfo newRegionA,
|
void reportSplit(HRegionInfo oldRegion, HRegionInfo newRegionA,
|
||||||
HRegionInfo newRegionB) {
|
HRegionInfo newRegionB) {
|
||||||
|
|
||||||
outboundMsgs.add(new HMsg(HMsg.Type.MSG_REPORT_SPLIT, oldRegion,
|
outboundMsgs.add(new HMsg(HMsg.Type.MSG_REPORT_SPLIT, oldRegion,
|
||||||
(oldRegion.getRegionNameAsString() + " split; daughters: " +
|
(oldRegion.getRegionNameAsString() + " split; daughters: " +
|
||||||
newRegionA.getRegionNameAsString() + ", " +
|
newRegionA.getRegionNameAsString() + ", " +
|
||||||
|
@ -1017,6 +1032,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch(Throwable t) {
|
} catch(Throwable t) {
|
||||||
|
checkOOME(t);
|
||||||
LOG.fatal("Unhandled exception", t);
|
LOG.fatal("Unhandled exception", t);
|
||||||
} finally {
|
} finally {
|
||||||
LOG.info("worker thread exiting");
|
LOG.info("worker thread exiting");
|
||||||
|
@ -1039,8 +1055,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
this.compactSplitThread.
|
this.compactSplitThread.
|
||||||
compactionRequested(region, "Region open check");
|
compactionRequested(region, "Region open check");
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.error("error opening region " + regionInfo.getRegionNameAsString(), e);
|
checkOOME(e);
|
||||||
|
LOG.error("error opening region " + regionInfo.getRegionNameAsString(),
|
||||||
|
e);
|
||||||
// TODO: add an extra field in HRegionInfo to indicate that there is
|
// TODO: add an extra field in HRegionInfo to indicate that there is
|
||||||
// an error. We can't do that now because that would be an incompatible
|
// an error. We can't do that now because that would be an incompatible
|
||||||
// change that would require a migration
|
// change that would require a migration
|
||||||
|
@ -1113,6 +1130,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
LOG.error("error closing region " +
|
LOG.error("error closing region " +
|
||||||
Bytes.toString(region.getRegionName()),
|
Bytes.toString(region.getRegionName()),
|
||||||
RemoteExceptionHandler.checkIOException(e));
|
RemoteExceptionHandler.checkIOException(e));
|
||||||
|
checkOOME(e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return regionsToClose;
|
return regionsToClose;
|
||||||
|
@ -1233,6 +1251,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
result.putAll(map);
|
result.putAll(map);
|
||||||
return new RowResult(row, result);
|
return new RowResult(row, result);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
checkOOME(e);
|
||||||
checkFileSystem();
|
checkFileSystem();
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
@ -1250,6 +1269,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
RowResult rr = region.getClosestRowBefore(row, columnFamily);
|
RowResult rr = region.getClosestRowBefore(row, columnFamily);
|
||||||
return rr;
|
return rr;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
checkOOME(e);
|
||||||
checkFileSystem();
|
checkFileSystem();
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
@ -1286,6 +1306,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
}
|
}
|
||||||
return resultSets.toArray(new RowResult[resultSets.size()]);
|
return resultSets.toArray(new RowResult[resultSets.size()]);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
checkOOME(e);
|
||||||
checkFileSystem();
|
checkFileSystem();
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
@ -1304,10 +1325,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
try {
|
try {
|
||||||
cacheFlusher.reclaimMemcacheMemory();
|
cacheFlusher.reclaimMemcacheMemory();
|
||||||
region.batchUpdate(b, getLockFromId(b.getRowLock()));
|
region.batchUpdate(b, getLockFromId(b.getRowLock()));
|
||||||
} catch (OutOfMemoryError error) {
|
|
||||||
abort();
|
|
||||||
LOG.fatal("Ran out of memory", error);
|
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
checkOOME(e);
|
||||||
checkFileSystem();
|
checkFileSystem();
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
@ -1327,14 +1346,12 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
locks[i] = getLockFromId(b[i].getRowLock());
|
locks[i] = getLockFromId(b[i].getRowLock());
|
||||||
region.batchUpdate(b[i], locks[i]);
|
region.batchUpdate(b[i], locks[i]);
|
||||||
}
|
}
|
||||||
} catch (OutOfMemoryError error) {
|
|
||||||
abort();
|
|
||||||
LOG.fatal("Ran out of memory", error);
|
|
||||||
} catch(WrongRegionException ex) {
|
} catch(WrongRegionException ex) {
|
||||||
return i;
|
return i;
|
||||||
} catch (NotServingRegionException ex) {
|
} catch (NotServingRegionException ex) {
|
||||||
return i;
|
return i;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
checkOOME(e);
|
||||||
checkFileSystem();
|
checkFileSystem();
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
@ -1397,7 +1414,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
return scannerId;
|
return scannerId;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.error("Error opening scanner (fsOk: " + this.fsOk + ")",
|
LOG.error("Error opening scanner (fsOk: " + this.fsOk + ")",
|
||||||
RemoteExceptionHandler.checkIOException(e));
|
RemoteExceptionHandler.checkIOException(e));
|
||||||
|
checkOOME(e);
|
||||||
checkFileSystem();
|
checkFileSystem();
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
@ -1430,6 +1448,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
s.close();
|
s.close();
|
||||||
this.leases.cancelLease(scannerName);
|
this.leases.cancelLease(scannerName);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
// TODO: Should we even be returning an exception out of a close?
|
||||||
|
// What can the client do with an exception in close?
|
||||||
|
checkOOME(e);
|
||||||
checkFileSystem();
|
checkFileSystem();
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
@ -1527,7 +1548,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
return lockId;
|
return lockId;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.error("Error obtaining row lock (fsOk: " + this.fsOk + ")",
|
LOG.error("Error obtaining row lock (fsOk: " + this.fsOk + ")",
|
||||||
RemoteExceptionHandler.checkIOException(e));
|
RemoteExceptionHandler.checkIOException(e));
|
||||||
|
checkOOME(e);
|
||||||
checkFileSystem();
|
checkFileSystem();
|
||||||
throw e;
|
throw e;
|
||||||
}
|
}
|
||||||
|
@ -1842,7 +1864,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
}
|
}
|
||||||
|
|
||||||
public long getProtocolVersion(final String protocol,
|
public long getProtocolVersion(final String protocol,
|
||||||
@SuppressWarnings("unused") final long clientVersion)
|
final long clientVersion)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
if (protocol.equals(HRegionInterface.class.getName())) {
|
if (protocol.equals(HRegionInterface.class.getName())) {
|
||||||
return HBaseRPCProtocolVersion.versionID;
|
return HBaseRPCProtocolVersion.versionID;
|
||||||
|
|
Loading…
Reference in New Issue