diff --git a/CHANGES.txt b/CHANGES.txt index 3a453ff5b38..6cd663e6af3 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -104,6 +104,8 @@ Release 0.19.0 - Unreleased HBASE-900 Regionserver memory leak causing OOME during relatively modest bulk importing; part 1 HBASE-1054 Index NPE on scanning (Clint Morgan via Andrew Purtell) + HBASE-1052 Stopping a HRegionServer with unflushed cache causes data loss + from org.apache.hadoop.hbase.DroppedSnapshotException IMPROVEMENTS HBASE-901 Add a limit to key length, check key and value length on client side diff --git a/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index 173fd291074..6f9d8937049 100644 --- a/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/src/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -24,6 +24,7 @@ import java.lang.Thread.UncaughtExceptionHandler; import java.lang.management.ManagementFactory; import java.lang.management.MemoryUsage; import java.lang.reflect.Constructor; +import java.lang.reflect.Field; import java.net.InetSocketAddress; import java.util.ArrayList; import java.util.Arrays; @@ -282,10 +283,6 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { for(int i = 0; i < nbBlocks; i++) { reservedSpace.add(new byte[DEFAULT_SIZE_RESERVATION_BLOCK]); } - - // Register shutdown hook for HRegionServer, runs an orderly shutdown - // when a kill signal is recieved - Runtime.getRuntime().addShutdownHook(new ShutdownThread(this)); } /** @@ -522,6 +519,15 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { this.hbaseMaster = null; } join(); + + LOG.info("Running hdfs shutdown thread"); + hdfsShutdownThread.start(); + try { + hdfsShutdownThread.join(); + LOG.info("Hdfs shutdown thread completed."); + } catch (InterruptedException e) { + LOG.warn("hdfsShutdownThread.join() was interrupted", e); + } LOG.info(Thread.currentThread().getName() + " exiting"); } @@ -552,6 +558,13 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { // to defaults). this.conf.set("fs.default.name", this.conf.get("hbase.rootdir")); this.fs = FileSystem.get(this.conf); + + // Register shutdown hook for HRegionServer, runs an orderly shutdown + // when a kill signal is recieved + Runtime.getRuntime().addShutdownHook(new ShutdownThread(this, + Thread.currentThread())); + this.hdfsShutdownThread = suppressHdfsShutdownHook(); + this.rootDir = new Path(this.conf.get(HConstants.HBASE_DIR)); this.log = setupHLog(); this.logFlusher.setHLog(log); @@ -693,25 +706,34 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { */ private static class ShutdownThread extends Thread { private final HRegionServer instance; + private final Thread mainThread; /** * @param instance + * @param mainThread */ - public ShutdownThread(HRegionServer instance) { + public ShutdownThread(HRegionServer instance, Thread mainThread) { this.instance = instance; + this.mainThread = mainThread; } @Override public void run() { LOG.info("Starting shutdown thread."); - // tell the region server to stop and wait for it to complete + // tell the region server to stop instance.stop(); - instance.join(); + + // Wait for main thread to exit. + Threads.shutdown(mainThread); + LOG.info("Shutdown thread complete"); } } + // We need to call HDFS shutdown when we are done shutting down + private Thread hdfsShutdownThread; + /* * Inner class that runs on a long period checking if regions need major * compaction. @@ -744,6 +766,43 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { } } + /** + * So, HDFS caches FileSystems so when you call FileSystem.get it's fast. In + * order to make sure things are cleaned up, it also creates a shutdown hook + * so that all filesystems can be closed when the process is terminated. This + * conveniently runs concurrently with our own shutdown handler, and + * therefore causes all the filesystems to be closed before the server can do + * all its necessary cleanup. + * + * The crazy dirty reflection in this method sneaks into the FileSystem cache + * and grabs the shutdown hook, removes it from the list of active shutdown + * hooks, and hangs onto it until later. Then, after we're properly done with + * our graceful shutdown, we can execute the hdfs hook manually to make sure + * loose ends are tied up. + * + * This seems quite fragile and susceptible to breaking if Hadoop changes + * anything about the way this cleanup is managed. Keep an eye on things. + */ + private Thread suppressHdfsShutdownHook() { + try { + Field field = FileSystem.class.getDeclaredField ("clientFinalizer"); + field.setAccessible(true); + Thread hdfsClientFinalizer = (Thread)field.get(null); + if (hdfsClientFinalizer == null) { + throw new RuntimeException("client finalizer is null, can't suppress!"); + } + Runtime.getRuntime().removeShutdownHook(hdfsClientFinalizer); + return hdfsClientFinalizer; + + } catch (NoSuchFieldException nsfe) { + LOG.fatal("Couldn't find field 'clientFinalizer' in FileSystem!", nsfe); + throw new RuntimeException("Failed to suppress HDFS shutdown hook"); + } catch (IllegalAccessException iae) { + LOG.fatal("Couldn't access field 'clientFinalizer' in FileSystem!", iae); + throw new RuntimeException("Failed to suppress HDFS shutdown hook"); + } + } + /** * Report the status of the server. A server is online once all the startup * is completed (setting up filesystem, starting service threads, etc.). This