HADOOP-1870 Once file system failure has been detected, don't check it again and get on with shutting down the hbase cluster.

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@574731 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jim Kellerman 2007-09-11 23:39:17 +00:00
parent d60908dcf9
commit c0c89222f1
4 changed files with 62 additions and 28 deletions

View File

@ -39,6 +39,8 @@ Trunk (unreleased changes)
HADOOP-1834 Scanners ignore timestamp passed on creation
HADOOP-1847 Many HBase tests do not fail well.
HADOOP-1847 Many HBase tests do not fail well. (phase 2)
HADOOP-1870 Once file system failure has been detected, don't check it again
and get on with shutting down the hbase cluster.
IMPROVEMENTS
HADOOP-1737 Make HColumnDescriptor data publically members settable

View File

@ -85,6 +85,7 @@ HMasterRegionInterface, Runnable {
static final Log LOG = LogFactory.getLog(HMaster.class.getName());
volatile boolean closed;
volatile boolean fsOk;
Path dir;
Configuration conf;
FileSystem fs;
@ -511,6 +512,12 @@ HMasterRegionInterface, Runnable {
LOG.warn("Scan ROOT region", e);
} else {
LOG.error("Scan ROOT region", e);
if (tries == numRetries - 1) {
// We ran out of tries. Make sure the file system is still available
checkFileSystem();
}
}
} catch (Exception e) {
// If for some reason we get some other kind of exception,
@ -518,13 +525,6 @@ HMasterRegionInterface, Runnable {
LOG.error("Unexpected exception", e);
}
// We ran out of tries. Make sure the file system is still available
if (!FSUtils.isFileSystemAvailable(fs)) {
LOG.fatal("Shutting down hbase cluster: file system not available");
closed = true;
}
if (!closed) {
// sleep before retry
@ -681,20 +681,18 @@ HMasterRegionInterface, Runnable {
LOG.warn("Scan one META region", e);
} else {
LOG.error("Scan one META region", e);
if (tries == numRetries - 1) {
// We ran out of tries. Make sure the file system is still available
checkFileSystem();
}
}
} catch (Exception e) {
// If for some reason we get some other kind of exception,
// at least log it rather than go out silently.
LOG.error("Unexpected exception", e);
}
// We ran out of tries. Make sure the file system is still available
if (!FSUtils.isFileSystemAvailable(fs)) {
LOG.fatal("Shutting down hbase cluster: file system not available");
closed = true;
}
if (!closed) {
// sleep before retry
try {
@ -852,6 +850,7 @@ HMasterRegionInterface, Runnable {
throws IOException {
this.closed = true;
this.fsOk = true;
this.dir = dir;
this.conf = conf;
this.fs = FileSystem.get(conf);
@ -979,6 +978,23 @@ HMasterRegionInterface, Runnable {
LOG.info("HMaster initialized on " + this.address.toString());
}
/**
* Checks to see if the file system is still accessible.
* If not, sets closed
*
* @return false if file system is not available
*/
protected boolean checkFileSystem() {
if (fsOk) {
if (!FSUtils.isFileSystemAvailable(fs)) {
LOG.fatal("Shutting down HBase cluster: file system not available");
closed = true;
fsOk = false;
}
}
return fsOk;
}
/** @return HServerAddress of the master server */
public HServerAddress getMasterAddress() {
return address;
@ -1071,9 +1087,7 @@ HMasterRegionInterface, Runnable {
LOG.warn("main processing loop: " + op.toString(), e);
}
}
if (!FSUtils.isFileSystemAvailable(fs)) {
LOG.fatal("Shutting down hbase cluster: file system not available");
closed = true;
if (!checkFileSystem()) {
break;
}
LOG.warn("Processing pending operations: " + op.toString(), ex);
@ -2664,10 +2678,7 @@ HMasterRegionInterface, Runnable {
if (tries == numRetries - 1) {
// No retries left
if (!FSUtils.isFileSystemAvailable(fs)) {
LOG.fatal("Shutting down hbase cluster: file system not available");
closed = true;
}
checkFileSystem();
if (e instanceof RemoteException) {
e = RemoteExceptionHandler.decodeRemoteException(

View File

@ -84,6 +84,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
// debugging and unit tests.
protected volatile boolean abortRequested;
// If false, the file system has become unavailable
protected volatile boolean fsOk;
final Path rootDir;
protected final HServerInfo serverInfo;
protected final Configuration conf;
@ -435,6 +438,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
// Basic setup
this.stopRequested = false;
this.abortRequested = false;
this.fsOk = true;
this.rootDir = rootDir;
this.conf = conf;
this.rand = new Random();
@ -512,6 +516,11 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
}
}
/** @return the HLog */
HLog getLog() {
return log;
}
/**
* Sets a flag that will cause all the HRegionServer threads to shut down
* in an orderly fashion.
@ -1101,6 +1110,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
}
}
/** {@inheritDoc} */
public void batchUpdate(Text regionName, long timestamp, BatchUpdate b)
throws IOException {
requestCount.incrementAndGet();
@ -1259,6 +1269,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
region.delete(lockid, column);
}
/** {@inheritDoc} */
public void deleteAll(final Text regionName, final Text row,
final Text column, final long timestamp)
throws IOException {
@ -1326,12 +1337,13 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
* @return false if file system is not available
*/
protected boolean checkFileSystem() {
boolean fsOk = true;
if (!FSUtils.isFileSystemAvailable(fs)) {
LOG.fatal("Shutting down HRegionServer: file system not available");
abortRequested = true;
stopRequested = true;
fsOk = false;
if (fsOk) {
if (!FSUtils.isFileSystemAvailable(fs)) {
LOG.fatal("Shutting down HRegionServer: file system not available");
abortRequested = true;
stopRequested = true;
fsOk = false;
}
}
return fsOk;
}

View File

@ -54,6 +54,15 @@ public class FSUtils {
} catch (IOException e) {
LOG.fatal("file system unavailable because: ", e);
}
try {
if (!available) {
fs.close();
}
} catch (IOException e) {
LOG.error("file system close", e);
}
} else {
available = true;