HBASE-1062 Compactions at (re)start on a large table can overwhelm DFS

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@729560 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Andrew Kyle Purtell 2008-12-26 23:23:04 +00:00
parent 39b36d225c
commit c4aa758efe
5 changed files with 128 additions and 7 deletions

View File

@ -190,6 +190,7 @@ Release 0.19.0 - Unreleased
HBASE-1069 Show whether HRegion major compacts or not in INFO level
HBASE-1066 Master should support close/open/reassignment/enable/disable
operations on individual regions
HBASE-1062 Compactions at (re)start on a large table can overwhelm DFS
NEW FEATURES
HBASE-875 Use MurmurHash instead of JenkinsHash [in bloomfilters]

View File

@ -224,6 +224,12 @@
Used as sleep interval by service threads such as META scanner and log roller.
</description>
</property>
<property>
<name>hbase.regionserver.safemode.period</name>
<value>120000</value>
<description>Time to wait on regionserver startup before beginning
compactions and memcache flushes.</description>
</property>
<property>
<name>hbase.hregion.memcache.flush.size</name>
<value>67108864</value>

View File

@ -60,7 +60,9 @@ class CompactSplitThread extends Thread implements HConstants {
new LinkedBlockingQueue<HRegion>();
private final HashSet<HRegion> regionsInQueue = new HashSet<HRegion>();
private volatile int limit = 1;
/** @param server */
public CompactSplitThread(HRegionServer server) {
super();
@ -73,9 +75,25 @@ class CompactSplitThread extends Thread implements HConstants {
@Override
public void run() {
while (!this.server.isStopRequested() && this.server.isInSafeMode()) {
try {
Thread.sleep(this.frequency);
} catch (InterruptedException ex) {
continue;
}
}
int count = 0;
while (!this.server.isStopRequested()) {
HRegion r = null;
try {
if ((limit > 0) && (++count > limit)) {
try {
Thread.sleep(this.frequency);
} catch (InterruptedException ex) {
continue;
}
count = 0;
}
r = compactionQueue.poll(this.frequency, TimeUnit.MILLISECONDS);
if (r != null && !this.server.isStopRequested()) {
synchronized (regionsInQueue) {
@ -195,7 +213,15 @@ class CompactSplitThread extends Thread implements HConstants {
// Do not serve the new regions. Let the Master assign them.
}
/**
* Sets the number of compactions allowed per cycle.
* @param limit the number of compactions allowed, or -1 to unlimit
*/
void setLimit(int limit) {
this.limit = limit;
}
/**
* Only interrupt once it's done with a run through the work loop.
*/

View File

@ -117,7 +117,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
protected final AtomicBoolean stopRequested = new AtomicBoolean(false);
protected final AtomicBoolean quiesced = new AtomicBoolean(false);
protected final AtomicBoolean safeMode = new AtomicBoolean(true);
// Go down hard. Used if file system becomes unavailable and also in
// debugging and unit tests.
protected volatile boolean abortRequested;
@ -197,6 +199,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
final LogRoller logRoller;
final LogFlusher logFlusher;
// safemode processing
SafeModeThread safeModeThread;
// flag set after we're done setting up server threads (used for testing)
protected volatile boolean isOnline;
@ -433,8 +438,8 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
checkFileSystem();
}
if (this.stopRequested.get()) {
LOG.info("Stop was requested, clearing the toDo " +
"despite of the exception");
LOG.info("Stop was requested, clearing the toDo " +
"despite of the exception");
toDo.clear();
continue;
}
@ -718,6 +723,63 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
return this.fsOk;
}
/**
* Thread for toggling safemode after some configurable interval.
*/
private class SafeModeThread extends Thread {
public void run() {
// first, wait the required interval before turning off safemode
int safemodeInterval =
conf.getInt("hbase.regionserver.safemode.period", 120 * 1000);
try {
Thread.sleep(safemodeInterval);
} catch (InterruptedException ex) {
// turn off safemode and limits on the way out due to some kind of
// abnormal condition so we do not prevent such things as memcache
// flushes and worsen the situation
safeMode.set(false);
compactSplitThread.setLimit(-1);
if (LOG.isDebugEnabled()) {
LOG.debug(this.getName() + " exiting on interrupt");
}
return;
}
LOG.info("leaving safe mode");
safeMode.set(false);
// now that safemode is off, slowly increase the per-cycle compaction
// limit, finally setting it to unlimited (-1)
int compactionCheckInterval =
conf.getInt("hbase.regionserver.thread.splitcompactcheckfrequency",
20 * 1000);
final int limitSteps[] = {
1, 1, 1, 1,
2, 2, 2, 2, 2, 2,
3, 3, 3, 3, 3, 3, 3, 3,
4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
-1
};
for (int i = 0; i < limitSteps.length; i++) {
if (LOG.isDebugEnabled()) {
LOG.debug("setting compaction limit to " + limitSteps[i]);
}
compactSplitThread.setLimit(limitSteps[i]);
try {
Thread.sleep(compactionCheckInterval);
} catch (InterruptedException ex) {
// unlimit compactions before exiting
compactSplitThread.setLimit(-1);
if (LOG.isDebugEnabled()) {
LOG.debug(this.getName() + " exiting on interrupt");
}
return;
}
}
LOG.info("compactions no longer limited");
}
}
/*
* Thread to shutdown the region server in an orderly manner. This thread
* is registered as a shutdown hook in the HRegionServer constructor and is
@ -937,6 +999,18 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
this.infoServer.setAttribute("regionserver", this);
this.infoServer.start();
}
// Set up the safe mode handler if safe mode has been configured.
if (conf.getInt("hbase.regionserver.safemode.period", 0) < 1) {
safeMode.set(false);
compactSplitThread.setLimit(-1);
LOG.debug("skipping safe mode");
} else {
this.safeModeThread = new SafeModeThread();
Threads.setDaemonThreadRunning(this.safeModeThread, n + ".safeMode",
handler);
}
// Start Server. This service is like leases in that it internally runs
// a thread.
this.server.start();
@ -1304,7 +1378,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
void closeRegion(final HRegionInfo hri, final boolean reportWhenCompleted)
throws IOException {
HRegion region = this.removeFromOnlineRegions(hri);
HRegion region = this.removeFromOnlineRegions(hri);
if (region != null) {
region.close();
if(reportWhenCompleted) {
@ -1842,7 +1916,14 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
public boolean isStopRequested() {
return stopRequested.get();
}
/**
* @return true if the region server is in safe mode
*/
public boolean isInSafeMode() {
return safeMode.get();
}
/**
*
* @return the configuration

View File

@ -120,6 +120,13 @@ class MemcacheFlusher extends Thread implements FlushRequester {
@Override
public void run() {
while (!this.server.isStopRequested() && this.server.isInSafeMode()) {
try {
Thread.sleep(threadWakeFrequency);
} catch (InterruptedException ex) {
continue;
}
}
while (!server.isStopRequested()) {
HRegion r = null;
try {