HADOOP-1870 Once file system failure has been detected, don't check it again and get on with shutting down the hbase cluster.
git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@574731 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d60908dcf9
commit
c0c89222f1
|
@ -39,6 +39,8 @@ Trunk (unreleased changes)
|
|||
HADOOP-1834 Scanners ignore timestamp passed on creation
|
||||
HADOOP-1847 Many HBase tests do not fail well.
|
||||
HADOOP-1847 Many HBase tests do not fail well. (phase 2)
|
||||
HADOOP-1870 Once file system failure has been detected, don't check it again
|
||||
and get on with shutting down the hbase cluster.
|
||||
|
||||
IMPROVEMENTS
|
||||
HADOOP-1737 Make HColumnDescriptor data publically members settable
|
||||
|
|
|
@ -85,6 +85,7 @@ HMasterRegionInterface, Runnable {
|
|||
static final Log LOG = LogFactory.getLog(HMaster.class.getName());
|
||||
|
||||
volatile boolean closed;
|
||||
volatile boolean fsOk;
|
||||
Path dir;
|
||||
Configuration conf;
|
||||
FileSystem fs;
|
||||
|
@ -511,6 +512,12 @@ HMasterRegionInterface, Runnable {
|
|||
LOG.warn("Scan ROOT region", e);
|
||||
} else {
|
||||
LOG.error("Scan ROOT region", e);
|
||||
|
||||
if (tries == numRetries - 1) {
|
||||
// We ran out of tries. Make sure the file system is still available
|
||||
|
||||
checkFileSystem();
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// If for some reason we get some other kind of exception,
|
||||
|
@ -518,13 +525,6 @@ HMasterRegionInterface, Runnable {
|
|||
LOG.error("Unexpected exception", e);
|
||||
}
|
||||
|
||||
// We ran out of tries. Make sure the file system is still available
|
||||
|
||||
if (!FSUtils.isFileSystemAvailable(fs)) {
|
||||
LOG.fatal("Shutting down hbase cluster: file system not available");
|
||||
closed = true;
|
||||
}
|
||||
|
||||
if (!closed) {
|
||||
// sleep before retry
|
||||
|
||||
|
@ -681,20 +681,18 @@ HMasterRegionInterface, Runnable {
|
|||
LOG.warn("Scan one META region", e);
|
||||
} else {
|
||||
LOG.error("Scan one META region", e);
|
||||
|
||||
if (tries == numRetries - 1) {
|
||||
// We ran out of tries. Make sure the file system is still available
|
||||
|
||||
checkFileSystem();
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
// If for some reason we get some other kind of exception,
|
||||
// at least log it rather than go out silently.
|
||||
LOG.error("Unexpected exception", e);
|
||||
}
|
||||
|
||||
// We ran out of tries. Make sure the file system is still available
|
||||
|
||||
if (!FSUtils.isFileSystemAvailable(fs)) {
|
||||
LOG.fatal("Shutting down hbase cluster: file system not available");
|
||||
closed = true;
|
||||
}
|
||||
|
||||
if (!closed) {
|
||||
// sleep before retry
|
||||
try {
|
||||
|
@ -852,6 +850,7 @@ HMasterRegionInterface, Runnable {
|
|||
throws IOException {
|
||||
|
||||
this.closed = true;
|
||||
this.fsOk = true;
|
||||
this.dir = dir;
|
||||
this.conf = conf;
|
||||
this.fs = FileSystem.get(conf);
|
||||
|
@ -979,6 +978,23 @@ HMasterRegionInterface, Runnable {
|
|||
LOG.info("HMaster initialized on " + this.address.toString());
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks to see if the file system is still accessible.
|
||||
* If not, sets closed
|
||||
*
|
||||
* @return false if file system is not available
|
||||
*/
|
||||
protected boolean checkFileSystem() {
|
||||
if (fsOk) {
|
||||
if (!FSUtils.isFileSystemAvailable(fs)) {
|
||||
LOG.fatal("Shutting down HBase cluster: file system not available");
|
||||
closed = true;
|
||||
fsOk = false;
|
||||
}
|
||||
}
|
||||
return fsOk;
|
||||
}
|
||||
|
||||
/** @return HServerAddress of the master server */
|
||||
public HServerAddress getMasterAddress() {
|
||||
return address;
|
||||
|
@ -1071,9 +1087,7 @@ HMasterRegionInterface, Runnable {
|
|||
LOG.warn("main processing loop: " + op.toString(), e);
|
||||
}
|
||||
}
|
||||
if (!FSUtils.isFileSystemAvailable(fs)) {
|
||||
LOG.fatal("Shutting down hbase cluster: file system not available");
|
||||
closed = true;
|
||||
if (!checkFileSystem()) {
|
||||
break;
|
||||
}
|
||||
LOG.warn("Processing pending operations: " + op.toString(), ex);
|
||||
|
@ -2664,10 +2678,7 @@ HMasterRegionInterface, Runnable {
|
|||
if (tries == numRetries - 1) {
|
||||
// No retries left
|
||||
|
||||
if (!FSUtils.isFileSystemAvailable(fs)) {
|
||||
LOG.fatal("Shutting down hbase cluster: file system not available");
|
||||
closed = true;
|
||||
}
|
||||
checkFileSystem();
|
||||
|
||||
if (e instanceof RemoteException) {
|
||||
e = RemoteExceptionHandler.decodeRemoteException(
|
||||
|
|
|
@ -84,6 +84,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
|||
// debugging and unit tests.
|
||||
protected volatile boolean abortRequested;
|
||||
|
||||
// If false, the file system has become unavailable
|
||||
protected volatile boolean fsOk;
|
||||
|
||||
final Path rootDir;
|
||||
protected final HServerInfo serverInfo;
|
||||
protected final Configuration conf;
|
||||
|
@ -435,6 +438,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
|||
// Basic setup
|
||||
this.stopRequested = false;
|
||||
this.abortRequested = false;
|
||||
this.fsOk = true;
|
||||
this.rootDir = rootDir;
|
||||
this.conf = conf;
|
||||
this.rand = new Random();
|
||||
|
@ -512,6 +516,11 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
|||
}
|
||||
}
|
||||
|
||||
/** @return the HLog */
|
||||
HLog getLog() {
|
||||
return log;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets a flag that will cause all the HRegionServer threads to shut down
|
||||
* in an orderly fashion.
|
||||
|
@ -1101,6 +1110,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
|||
}
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void batchUpdate(Text regionName, long timestamp, BatchUpdate b)
|
||||
throws IOException {
|
||||
requestCount.incrementAndGet();
|
||||
|
@ -1259,6 +1269,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
|||
region.delete(lockid, column);
|
||||
}
|
||||
|
||||
/** {@inheritDoc} */
|
||||
public void deleteAll(final Text regionName, final Text row,
|
||||
final Text column, final long timestamp)
|
||||
throws IOException {
|
||||
|
@ -1326,12 +1337,13 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
|||
* @return false if file system is not available
|
||||
*/
|
||||
protected boolean checkFileSystem() {
|
||||
boolean fsOk = true;
|
||||
if (!FSUtils.isFileSystemAvailable(fs)) {
|
||||
LOG.fatal("Shutting down HRegionServer: file system not available");
|
||||
abortRequested = true;
|
||||
stopRequested = true;
|
||||
fsOk = false;
|
||||
if (fsOk) {
|
||||
if (!FSUtils.isFileSystemAvailable(fs)) {
|
||||
LOG.fatal("Shutting down HRegionServer: file system not available");
|
||||
abortRequested = true;
|
||||
stopRequested = true;
|
||||
fsOk = false;
|
||||
}
|
||||
}
|
||||
return fsOk;
|
||||
}
|
||||
|
|
|
@ -54,6 +54,15 @@ public class FSUtils {
|
|||
} catch (IOException e) {
|
||||
LOG.fatal("file system unavailable because: ", e);
|
||||
}
|
||||
|
||||
try {
|
||||
if (!available) {
|
||||
fs.close();
|
||||
}
|
||||
|
||||
} catch (IOException e) {
|
||||
LOG.error("file system close", e);
|
||||
}
|
||||
|
||||
} else {
|
||||
available = true;
|
||||
|
|
Loading…
Reference in New Issue