HADOOP-1870 Once file system failure has been detected, don't check it again and get on with shutting down the hbase cluster.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@574731 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jim Kellerman 2007-09-11 23:39:17 +00:00
parent d60908dcf9
commit c0c89222f1
4 changed files with 62 additions and 28 deletions

View File

@ -39,6 +39,8 @@ Trunk (unreleased changes)
HADOOP-1834 Scanners ignore timestamp passed on creation HADOOP-1834 Scanners ignore timestamp passed on creation
HADOOP-1847 Many HBase tests do not fail well. HADOOP-1847 Many HBase tests do not fail well.
HADOOP-1847 Many HBase tests do not fail well. (phase 2) HADOOP-1847 Many HBase tests do not fail well. (phase 2)
HADOOP-1870 Once file system failure has been detected, don't check it again
and get on with shutting down the hbase cluster.
IMPROVEMENTS IMPROVEMENTS
HADOOP-1737 Make HColumnDescriptor data publically members settable HADOOP-1737 Make HColumnDescriptor data publically members settable

View File

@ -85,6 +85,7 @@ HMasterRegionInterface, Runnable {
static final Log LOG = LogFactory.getLog(HMaster.class.getName()); static final Log LOG = LogFactory.getLog(HMaster.class.getName());
volatile boolean closed; volatile boolean closed;
volatile boolean fsOk;
Path dir; Path dir;
Configuration conf; Configuration conf;
FileSystem fs; FileSystem fs;
@ -511,6 +512,12 @@ HMasterRegionInterface, Runnable {
LOG.warn("Scan ROOT region", e); LOG.warn("Scan ROOT region", e);
} else { } else {
LOG.error("Scan ROOT region", e); LOG.error("Scan ROOT region", e);
if (tries == numRetries - 1) {
// We ran out of tries. Make sure the file system is still available
checkFileSystem();
}
} }
} catch (Exception e) { } catch (Exception e) {
// If for some reason we get some other kind of exception, // If for some reason we get some other kind of exception,
@ -518,13 +525,6 @@ HMasterRegionInterface, Runnable {
LOG.error("Unexpected exception", e); LOG.error("Unexpected exception", e);
} }
// We ran out of tries. Make sure the file system is still available
if (!FSUtils.isFileSystemAvailable(fs)) {
LOG.fatal("Shutting down hbase cluster: file system not available");
closed = true;
}
if (!closed) { if (!closed) {
// sleep before retry // sleep before retry
@ -681,20 +681,18 @@ HMasterRegionInterface, Runnable {
LOG.warn("Scan one META region", e); LOG.warn("Scan one META region", e);
} else { } else {
LOG.error("Scan one META region", e); LOG.error("Scan one META region", e);
if (tries == numRetries - 1) {
// We ran out of tries. Make sure the file system is still available
checkFileSystem();
}
} }
} catch (Exception e) { } catch (Exception e) {
// If for some reason we get some other kind of exception, // If for some reason we get some other kind of exception,
// at least log it rather than go out silently. // at least log it rather than go out silently.
LOG.error("Unexpected exception", e); LOG.error("Unexpected exception", e);
} }
// We ran out of tries. Make sure the file system is still available
if (!FSUtils.isFileSystemAvailable(fs)) {
LOG.fatal("Shutting down hbase cluster: file system not available");
closed = true;
}
if (!closed) { if (!closed) {
// sleep before retry // sleep before retry
try { try {
@ -852,6 +850,7 @@ HMasterRegionInterface, Runnable {
throws IOException { throws IOException {
this.closed = true; this.closed = true;
this.fsOk = true;
this.dir = dir; this.dir = dir;
this.conf = conf; this.conf = conf;
this.fs = FileSystem.get(conf); this.fs = FileSystem.get(conf);
@ -979,6 +978,23 @@ HMasterRegionInterface, Runnable {
LOG.info("HMaster initialized on " + this.address.toString()); LOG.info("HMaster initialized on " + this.address.toString());
} }
/**
* Checks to see if the file system is still accessible.
* If not, sets closed
*
* @return false if file system is not available
*/
protected boolean checkFileSystem() {
if (fsOk) {
if (!FSUtils.isFileSystemAvailable(fs)) {
LOG.fatal("Shutting down HBase cluster: file system not available");
closed = true;
fsOk = false;
}
}
return fsOk;
}
/** @return HServerAddress of the master server */ /** @return HServerAddress of the master server */
public HServerAddress getMasterAddress() { public HServerAddress getMasterAddress() {
return address; return address;
@ -1071,9 +1087,7 @@ HMasterRegionInterface, Runnable {
LOG.warn("main processing loop: " + op.toString(), e); LOG.warn("main processing loop: " + op.toString(), e);
} }
} }
if (!FSUtils.isFileSystemAvailable(fs)) { if (!checkFileSystem()) {
LOG.fatal("Shutting down hbase cluster: file system not available");
closed = true;
break; break;
} }
LOG.warn("Processing pending operations: " + op.toString(), ex); LOG.warn("Processing pending operations: " + op.toString(), ex);
@ -2664,10 +2678,7 @@ HMasterRegionInterface, Runnable {
if (tries == numRetries - 1) { if (tries == numRetries - 1) {
// No retries left // No retries left
if (!FSUtils.isFileSystemAvailable(fs)) { checkFileSystem();
LOG.fatal("Shutting down hbase cluster: file system not available");
closed = true;
}
if (e instanceof RemoteException) { if (e instanceof RemoteException) {
e = RemoteExceptionHandler.decodeRemoteException( e = RemoteExceptionHandler.decodeRemoteException(

View File

@ -84,6 +84,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
// debugging and unit tests. // debugging and unit tests.
protected volatile boolean abortRequested; protected volatile boolean abortRequested;
// If false, the file system has become unavailable
protected volatile boolean fsOk;
final Path rootDir; final Path rootDir;
protected final HServerInfo serverInfo; protected final HServerInfo serverInfo;
protected final Configuration conf; protected final Configuration conf;
@ -435,6 +438,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
// Basic setup // Basic setup
this.stopRequested = false; this.stopRequested = false;
this.abortRequested = false; this.abortRequested = false;
this.fsOk = true;
this.rootDir = rootDir; this.rootDir = rootDir;
this.conf = conf; this.conf = conf;
this.rand = new Random(); this.rand = new Random();
@ -512,6 +516,11 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
} }
} }
/** @return the HLog */
HLog getLog() {
return log;
}
/** /**
* Sets a flag that will cause all the HRegionServer threads to shut down * Sets a flag that will cause all the HRegionServer threads to shut down
* in an orderly fashion. * in an orderly fashion.
@ -1101,6 +1110,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
} }
} }
/** {@inheritDoc} */
public void batchUpdate(Text regionName, long timestamp, BatchUpdate b) public void batchUpdate(Text regionName, long timestamp, BatchUpdate b)
throws IOException { throws IOException {
requestCount.incrementAndGet(); requestCount.incrementAndGet();
@ -1259,6 +1269,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
region.delete(lockid, column); region.delete(lockid, column);
} }
/** {@inheritDoc} */
public void deleteAll(final Text regionName, final Text row, public void deleteAll(final Text regionName, final Text row,
final Text column, final long timestamp) final Text column, final long timestamp)
throws IOException { throws IOException {
@ -1326,12 +1337,13 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
* @return false if file system is not available * @return false if file system is not available
*/ */
protected boolean checkFileSystem() { protected boolean checkFileSystem() {
boolean fsOk = true; if (fsOk) {
if (!FSUtils.isFileSystemAvailable(fs)) { if (!FSUtils.isFileSystemAvailable(fs)) {
LOG.fatal("Shutting down HRegionServer: file system not available"); LOG.fatal("Shutting down HRegionServer: file system not available");
abortRequested = true; abortRequested = true;
stopRequested = true; stopRequested = true;
fsOk = false; fsOk = false;
}
} }
return fsOk; return fsOk;
} }

View File

@ -54,6 +54,15 @@ public class FSUtils {
} catch (IOException e) { } catch (IOException e) {
LOG.fatal("file system unavailable because: ", e); LOG.fatal("file system unavailable because: ", e);
} }
try {
if (!available) {
fs.close();
}
} catch (IOException e) {
LOG.error("file system close", e);
}
} else { } else {
available = true; available = true;