diff --git a/CHANGES.txt b/CHANGES.txt index 64efb906319..849af93993b 100644 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -39,6 +39,8 @@ Trunk (unreleased changes) HADOOP-1834 Scanners ignore timestamp passed on creation HADOOP-1847 Many HBase tests do not fail well. HADOOP-1847 Many HBase tests do not fail well. (phase 2) + HADOOP-1870 Once file system failure has been detected, don't check it again + and get on with shutting down the hbase cluster. IMPROVEMENTS HADOOP-1737 Make HColumnDescriptor data publically members settable diff --git a/src/java/org/apache/hadoop/hbase/HMaster.java b/src/java/org/apache/hadoop/hbase/HMaster.java index dce76ac1762..ebf217a62fd 100644 --- a/src/java/org/apache/hadoop/hbase/HMaster.java +++ b/src/java/org/apache/hadoop/hbase/HMaster.java @@ -85,6 +85,7 @@ HMasterRegionInterface, Runnable { static final Log LOG = LogFactory.getLog(HMaster.class.getName()); volatile boolean closed; + volatile boolean fsOk; Path dir; Configuration conf; FileSystem fs; @@ -511,6 +512,12 @@ HMasterRegionInterface, Runnable { LOG.warn("Scan ROOT region", e); } else { LOG.error("Scan ROOT region", e); + + if (tries == numRetries - 1) { + // We ran out of tries. Make sure the file system is still available + + checkFileSystem(); + } } } catch (Exception e) { // If for some reason we get some other kind of exception, @@ -518,13 +525,6 @@ HMasterRegionInterface, Runnable { LOG.error("Unexpected exception", e); } - // We ran out of tries. Make sure the file system is still available - - if (!FSUtils.isFileSystemAvailable(fs)) { - LOG.fatal("Shutting down hbase cluster: file system not available"); - closed = true; - } - if (!closed) { // sleep before retry @@ -681,20 +681,18 @@ HMasterRegionInterface, Runnable { LOG.warn("Scan one META region", e); } else { LOG.error("Scan one META region", e); + + if (tries == numRetries - 1) { + // We ran out of tries. Make sure the file system is still available + + checkFileSystem(); + } } } catch (Exception e) { // If for some reason we get some other kind of exception, // at least log it rather than go out silently. LOG.error("Unexpected exception", e); } - - // We ran out of tries. Make sure the file system is still available - - if (!FSUtils.isFileSystemAvailable(fs)) { - LOG.fatal("Shutting down hbase cluster: file system not available"); - closed = true; - } - if (!closed) { // sleep before retry try { @@ -852,6 +850,7 @@ HMasterRegionInterface, Runnable { throws IOException { this.closed = true; + this.fsOk = true; this.dir = dir; this.conf = conf; this.fs = FileSystem.get(conf); @@ -979,6 +978,23 @@ HMasterRegionInterface, Runnable { LOG.info("HMaster initialized on " + this.address.toString()); } + /** + * Checks to see if the file system is still accessible. + * If not, sets closed + * + * @return false if file system is not available + */ + protected boolean checkFileSystem() { + if (fsOk) { + if (!FSUtils.isFileSystemAvailable(fs)) { + LOG.fatal("Shutting down HBase cluster: file system not available"); + closed = true; + fsOk = false; + } + } + return fsOk; + } + /** @return HServerAddress of the master server */ public HServerAddress getMasterAddress() { return address; @@ -1071,9 +1087,7 @@ HMasterRegionInterface, Runnable { LOG.warn("main processing loop: " + op.toString(), e); } } - if (!FSUtils.isFileSystemAvailable(fs)) { - LOG.fatal("Shutting down hbase cluster: file system not available"); - closed = true; + if (!checkFileSystem()) { break; } LOG.warn("Processing pending operations: " + op.toString(), ex); @@ -2664,10 +2678,7 @@ HMasterRegionInterface, Runnable { if (tries == numRetries - 1) { // No retries left - if (!FSUtils.isFileSystemAvailable(fs)) { - LOG.fatal("Shutting down hbase cluster: file system not available"); - closed = true; - } + checkFileSystem(); if (e instanceof RemoteException) { e = RemoteExceptionHandler.decodeRemoteException( diff --git a/src/java/org/apache/hadoop/hbase/HRegionServer.java b/src/java/org/apache/hadoop/hbase/HRegionServer.java index 71924c8ed4d..91f22aacde0 100644 --- a/src/java/org/apache/hadoop/hbase/HRegionServer.java +++ b/src/java/org/apache/hadoop/hbase/HRegionServer.java @@ -84,6 +84,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { // debugging and unit tests. protected volatile boolean abortRequested; + // If false, the file system has become unavailable + protected volatile boolean fsOk; + final Path rootDir; protected final HServerInfo serverInfo; protected final Configuration conf; @@ -435,6 +438,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { // Basic setup this.stopRequested = false; this.abortRequested = false; + this.fsOk = true; this.rootDir = rootDir; this.conf = conf; this.rand = new Random(); @@ -512,6 +516,11 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { } } + /** @return the HLog */ + HLog getLog() { + return log; + } + /** * Sets a flag that will cause all the HRegionServer threads to shut down * in an orderly fashion. @@ -1101,6 +1110,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { } } + /** {@inheritDoc} */ public void batchUpdate(Text regionName, long timestamp, BatchUpdate b) throws IOException { requestCount.incrementAndGet(); @@ -1259,6 +1269,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { region.delete(lockid, column); } + /** {@inheritDoc} */ public void deleteAll(final Text regionName, final Text row, final Text column, final long timestamp) throws IOException { @@ -1326,12 +1337,13 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable { * @return false if file system is not available */ protected boolean checkFileSystem() { - boolean fsOk = true; - if (!FSUtils.isFileSystemAvailable(fs)) { - LOG.fatal("Shutting down HRegionServer: file system not available"); - abortRequested = true; - stopRequested = true; - fsOk = false; + if (fsOk) { + if (!FSUtils.isFileSystemAvailable(fs)) { + LOG.fatal("Shutting down HRegionServer: file system not available"); + abortRequested = true; + stopRequested = true; + fsOk = false; + } } return fsOk; } diff --git a/src/java/org/apache/hadoop/hbase/util/FSUtils.java b/src/java/org/apache/hadoop/hbase/util/FSUtils.java index 5a5278d9bb3..f470bc0b3ca 100644 --- a/src/java/org/apache/hadoop/hbase/util/FSUtils.java +++ b/src/java/org/apache/hadoop/hbase/util/FSUtils.java @@ -54,6 +54,15 @@ public class FSUtils { } catch (IOException e) { LOG.fatal("file system unavailable because: ", e); } + + try { + if (!available) { + fs.close(); + } + + } catch (IOException e) { + LOG.error("file system close", e); + } } else { available = true;