HBASE-706 On OOME, regionserver sticks around and doesn't go down with cluster

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@674958 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2008-07-08 21:10:33 +00:00
parent 9d87dd9088
commit 6fe7bfb481
4 changed files with 34 additions and 3 deletions

View File

@ -265,6 +265,8 @@ Trunk (unreleased changes)
(Jean-Daniel Cryans via Stack)
HBASE-731 Add a meta refresh tag to the Web ui for master and region server
(Jean-Daniel Cryans via Stack)
HBASE-706 On OOME, regionserver sticks around and doesn't go down with cluster
(Jean-Daniel Cryans via Stack)
NEW FEATURES
HBASE-47 Option to set TTL for columns in hbase

View File

@ -250,6 +250,13 @@
<description>How often a region server runs the split/compaction check.
</description>
</property>
<property>
<name>hbase.regionserver.nbreservationblocks</name>
<value>4</value>
<description>The number of reservation blocks which are used to prevent
unstable region servers caused by an OOME.
</description>
</property>
<property>
<name>hbase.io.index.interval</name>
<value>32</value>

View File

@ -104,7 +104,10 @@ public interface HConstants {
/** Default maximum file size */
static final long DEFAULT_MAX_FILE_SIZE = 256 * 1024 * 1024;
/** Default size of a reservation block */
static final int DEFAULT_SIZE_RESERVATION_BLOCK = 1024 * 1024 * 5;
// Always store the location of the root table's HRegion.
// This HRegion is never split.

View File

@ -31,6 +31,7 @@ import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
@ -156,6 +157,12 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
/** region server process name */
public static final String REGIONSERVER = "regionserver";
/**
* Space is reserved in HRS constructor and then released when aborting
* to recover from an OOME. See HBASE-706.
*/
private final LinkedList<byte[]> reservedSpace = new LinkedList<byte []>();
/**
* Thread to shutdown the region server in an orderly manner. This thread
* is registered as a shutdown hook in the HRegionServer constructor and is
@ -257,7 +264,12 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
this.leases = new Leases(
conf.getInt("hbase.regionserver.lease.period", 3 * 60 * 1000),
this.threadWakeFrequency);
int nbBlocks = conf.getInt("hbase.regionserver.nbreservationblocks", 4);
for(int i = 0; i < nbBlocks; i++) {
reservedSpace.add(new byte[DEFAULT_SIZE_RESERVATION_BLOCK]);
}
// Register shutdown hook for HRegionServer, runs an orderly shutdown
// when a kill signal is recieved
Runtime.getRuntime().addShutdownHook(new ShutdownThread(this));
@ -403,6 +415,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
housekeeping();
sleeper.sleep(lastMsg);
} // for
} catch (OutOfMemoryError error) {
abort();
LOG.fatal("Ran out of memory", error);
} catch (Throwable t) {
LOG.fatal("Unhandled exception. Aborting...", t);
abort();
@ -649,6 +664,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
* from under hbase or we OOME.
*/
public void abort() {
reservedSpace.clear();
this.abortRequested = true;
stop();
}
@ -1133,6 +1149,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
try {
cacheFlusher.reclaimMemcacheMemory();
region.batchUpdate(b);
} catch (OutOfMemoryError error) {
abort();
LOG.fatal("Ran out of memory", error);
} catch (IOException e) {
checkFileSystem();
throw e;
@ -1608,4 +1627,4 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
.getClass(HConstants.REGION_SERVER_IMPL, HRegionServer.class);
doMain(args, regionServerClass);
}
}
}