HBASE-1052 Stopping a HRegionServer with unflushed cache causes data loss from org.apache.hadoop.hbase.DroppedSnapshotException

git-svn-id: https://svn.apache.org/repos/asf/hadoop/hbase/trunk@725828 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jim Kellerman 2008-12-11 21:53:35 +00:00
parent 7d7f84b693
commit 2ab4b14572
2 changed files with 68 additions and 7 deletions

View File

@ -104,6 +104,8 @@ Release 0.19.0 - Unreleased
HBASE-900 Regionserver memory leak causing OOME during relatively
modest bulk importing; part 1
HBASE-1054 Index NPE on scanning (Clint Morgan via Andrew Purtell)
HBASE-1052 Stopping a HRegionServer with unflushed cache causes data loss
from org.apache.hadoop.hbase.DroppedSnapshotException
IMPROVEMENTS
HBASE-901 Add a limit to key length, check key and value length on client side

View File

@ -24,6 +24,7 @@ import java.lang.Thread.UncaughtExceptionHandler;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryUsage;
import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.Arrays;
@ -282,10 +283,6 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
for(int i = 0; i < nbBlocks; i++) {
reservedSpace.add(new byte[DEFAULT_SIZE_RESERVATION_BLOCK]);
}
// Register shutdown hook for HRegionServer, runs an orderly shutdown
// when a kill signal is recieved
Runtime.getRuntime().addShutdownHook(new ShutdownThread(this));
}
/**
@ -522,6 +519,15 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
this.hbaseMaster = null;
}
join();
LOG.info("Running hdfs shutdown thread");
hdfsShutdownThread.start();
try {
hdfsShutdownThread.join();
LOG.info("Hdfs shutdown thread completed.");
} catch (InterruptedException e) {
LOG.warn("hdfsShutdownThread.join() was interrupted", e);
}
LOG.info(Thread.currentThread().getName() + " exiting");
}
@ -552,6 +558,13 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
// to defaults).
this.conf.set("fs.default.name", this.conf.get("hbase.rootdir"));
this.fs = FileSystem.get(this.conf);
// Register shutdown hook for HRegionServer, runs an orderly shutdown
// when a kill signal is recieved
Runtime.getRuntime().addShutdownHook(new ShutdownThread(this,
Thread.currentThread()));
this.hdfsShutdownThread = suppressHdfsShutdownHook();
this.rootDir = new Path(this.conf.get(HConstants.HBASE_DIR));
this.log = setupHLog();
this.logFlusher.setHLog(log);
@ -693,25 +706,34 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
*/
private static class ShutdownThread extends Thread {
private final HRegionServer instance;
private final Thread mainThread;
/**
* @param instance
* @param mainThread
*/
public ShutdownThread(HRegionServer instance) {
public ShutdownThread(HRegionServer instance, Thread mainThread) {
this.instance = instance;
this.mainThread = mainThread;
}
@Override
public void run() {
LOG.info("Starting shutdown thread.");
// tell the region server to stop and wait for it to complete
// tell the region server to stop
instance.stop();
instance.join();
// Wait for main thread to exit.
Threads.shutdown(mainThread);
LOG.info("Shutdown thread complete");
}
}
// We need to call HDFS shutdown when we are done shutting down
private Thread hdfsShutdownThread;
/*
* Inner class that runs on a long period checking if regions need major
* compaction.
@ -744,6 +766,43 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
}
}
/**
* So, HDFS caches FileSystems so when you call FileSystem.get it's fast. In
* order to make sure things are cleaned up, it also creates a shutdown hook
* so that all filesystems can be closed when the process is terminated. This
* conveniently runs concurrently with our own shutdown handler, and
* therefore causes all the filesystems to be closed before the server can do
* all its necessary cleanup.
*
* The crazy dirty reflection in this method sneaks into the FileSystem cache
* and grabs the shutdown hook, removes it from the list of active shutdown
* hooks, and hangs onto it until later. Then, after we're properly done with
* our graceful shutdown, we can execute the hdfs hook manually to make sure
* loose ends are tied up.
*
* This seems quite fragile and susceptible to breaking if Hadoop changes
* anything about the way this cleanup is managed. Keep an eye on things.
*/
private Thread suppressHdfsShutdownHook() {
try {
Field field = FileSystem.class.getDeclaredField ("clientFinalizer");
field.setAccessible(true);
Thread hdfsClientFinalizer = (Thread)field.get(null);
if (hdfsClientFinalizer == null) {
throw new RuntimeException("client finalizer is null, can't suppress!");
}
Runtime.getRuntime().removeShutdownHook(hdfsClientFinalizer);
return hdfsClientFinalizer;
} catch (NoSuchFieldException nsfe) {
LOG.fatal("Couldn't find field 'clientFinalizer' in FileSystem!", nsfe);
throw new RuntimeException("Failed to suppress HDFS shutdown hook");
} catch (IllegalAccessException iae) {
LOG.fatal("Couldn't access field 'clientFinalizer' in FileSystem!", iae);
throw new RuntimeException("Failed to suppress HDFS shutdown hook");
}
}
/**
* Report the status of the server. A server is online once all the startup
* is completed (setting up filesystem, starting service threads, etc.). This