HADOOP-1870 Once file system failure has been detected, don't check it again and get on with shutting down the hbase cluster.
git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@574731 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
d60908dcf9
commit
c0c89222f1
|
@ -39,6 +39,8 @@ Trunk (unreleased changes)
|
||||||
HADOOP-1834 Scanners ignore timestamp passed on creation
|
HADOOP-1834 Scanners ignore timestamp passed on creation
|
||||||
HADOOP-1847 Many HBase tests do not fail well.
|
HADOOP-1847 Many HBase tests do not fail well.
|
||||||
HADOOP-1847 Many HBase tests do not fail well. (phase 2)
|
HADOOP-1847 Many HBase tests do not fail well. (phase 2)
|
||||||
|
HADOOP-1870 Once file system failure has been detected, don't check it again
|
||||||
|
and get on with shutting down the hbase cluster.
|
||||||
|
|
||||||
IMPROVEMENTS
|
IMPROVEMENTS
|
||||||
HADOOP-1737 Make HColumnDescriptor data publically members settable
|
HADOOP-1737 Make HColumnDescriptor data publically members settable
|
||||||
|
|
|
@ -85,6 +85,7 @@ HMasterRegionInterface, Runnable {
|
||||||
static final Log LOG = LogFactory.getLog(HMaster.class.getName());
|
static final Log LOG = LogFactory.getLog(HMaster.class.getName());
|
||||||
|
|
||||||
volatile boolean closed;
|
volatile boolean closed;
|
||||||
|
volatile boolean fsOk;
|
||||||
Path dir;
|
Path dir;
|
||||||
Configuration conf;
|
Configuration conf;
|
||||||
FileSystem fs;
|
FileSystem fs;
|
||||||
|
@ -511,6 +512,12 @@ HMasterRegionInterface, Runnable {
|
||||||
LOG.warn("Scan ROOT region", e);
|
LOG.warn("Scan ROOT region", e);
|
||||||
} else {
|
} else {
|
||||||
LOG.error("Scan ROOT region", e);
|
LOG.error("Scan ROOT region", e);
|
||||||
|
|
||||||
|
if (tries == numRetries - 1) {
|
||||||
|
// We ran out of tries. Make sure the file system is still available
|
||||||
|
|
||||||
|
checkFileSystem();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// If for some reason we get some other kind of exception,
|
// If for some reason we get some other kind of exception,
|
||||||
|
@ -518,13 +525,6 @@ HMasterRegionInterface, Runnable {
|
||||||
LOG.error("Unexpected exception", e);
|
LOG.error("Unexpected exception", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
// We ran out of tries. Make sure the file system is still available
|
|
||||||
|
|
||||||
if (!FSUtils.isFileSystemAvailable(fs)) {
|
|
||||||
LOG.fatal("Shutting down hbase cluster: file system not available");
|
|
||||||
closed = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!closed) {
|
if (!closed) {
|
||||||
// sleep before retry
|
// sleep before retry
|
||||||
|
|
||||||
|
@ -681,20 +681,18 @@ HMasterRegionInterface, Runnable {
|
||||||
LOG.warn("Scan one META region", e);
|
LOG.warn("Scan one META region", e);
|
||||||
} else {
|
} else {
|
||||||
LOG.error("Scan one META region", e);
|
LOG.error("Scan one META region", e);
|
||||||
|
|
||||||
|
if (tries == numRetries - 1) {
|
||||||
|
// We ran out of tries. Make sure the file system is still available
|
||||||
|
|
||||||
|
checkFileSystem();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
// If for some reason we get some other kind of exception,
|
// If for some reason we get some other kind of exception,
|
||||||
// at least log it rather than go out silently.
|
// at least log it rather than go out silently.
|
||||||
LOG.error("Unexpected exception", e);
|
LOG.error("Unexpected exception", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
// We ran out of tries. Make sure the file system is still available
|
|
||||||
|
|
||||||
if (!FSUtils.isFileSystemAvailable(fs)) {
|
|
||||||
LOG.fatal("Shutting down hbase cluster: file system not available");
|
|
||||||
closed = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!closed) {
|
if (!closed) {
|
||||||
// sleep before retry
|
// sleep before retry
|
||||||
try {
|
try {
|
||||||
|
@ -852,6 +850,7 @@ HMasterRegionInterface, Runnable {
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
|
||||||
this.closed = true;
|
this.closed = true;
|
||||||
|
this.fsOk = true;
|
||||||
this.dir = dir;
|
this.dir = dir;
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
this.fs = FileSystem.get(conf);
|
this.fs = FileSystem.get(conf);
|
||||||
|
@ -979,6 +978,23 @@ HMasterRegionInterface, Runnable {
|
||||||
LOG.info("HMaster initialized on " + this.address.toString());
|
LOG.info("HMaster initialized on " + this.address.toString());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Checks to see if the file system is still accessible.
|
||||||
|
* If not, sets closed
|
||||||
|
*
|
||||||
|
* @return false if file system is not available
|
||||||
|
*/
|
||||||
|
protected boolean checkFileSystem() {
|
||||||
|
if (fsOk) {
|
||||||
|
if (!FSUtils.isFileSystemAvailable(fs)) {
|
||||||
|
LOG.fatal("Shutting down HBase cluster: file system not available");
|
||||||
|
closed = true;
|
||||||
|
fsOk = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return fsOk;
|
||||||
|
}
|
||||||
|
|
||||||
/** @return HServerAddress of the master server */
|
/** @return HServerAddress of the master server */
|
||||||
public HServerAddress getMasterAddress() {
|
public HServerAddress getMasterAddress() {
|
||||||
return address;
|
return address;
|
||||||
|
@ -1071,9 +1087,7 @@ HMasterRegionInterface, Runnable {
|
||||||
LOG.warn("main processing loop: " + op.toString(), e);
|
LOG.warn("main processing loop: " + op.toString(), e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!FSUtils.isFileSystemAvailable(fs)) {
|
if (!checkFileSystem()) {
|
||||||
LOG.fatal("Shutting down hbase cluster: file system not available");
|
|
||||||
closed = true;
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
LOG.warn("Processing pending operations: " + op.toString(), ex);
|
LOG.warn("Processing pending operations: " + op.toString(), ex);
|
||||||
|
@ -2664,10 +2678,7 @@ HMasterRegionInterface, Runnable {
|
||||||
if (tries == numRetries - 1) {
|
if (tries == numRetries - 1) {
|
||||||
// No retries left
|
// No retries left
|
||||||
|
|
||||||
if (!FSUtils.isFileSystemAvailable(fs)) {
|
checkFileSystem();
|
||||||
LOG.fatal("Shutting down hbase cluster: file system not available");
|
|
||||||
closed = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (e instanceof RemoteException) {
|
if (e instanceof RemoteException) {
|
||||||
e = RemoteExceptionHandler.decodeRemoteException(
|
e = RemoteExceptionHandler.decodeRemoteException(
|
||||||
|
|
|
@ -84,6 +84,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
// debugging and unit tests.
|
// debugging and unit tests.
|
||||||
protected volatile boolean abortRequested;
|
protected volatile boolean abortRequested;
|
||||||
|
|
||||||
|
// If false, the file system has become unavailable
|
||||||
|
protected volatile boolean fsOk;
|
||||||
|
|
||||||
final Path rootDir;
|
final Path rootDir;
|
||||||
protected final HServerInfo serverInfo;
|
protected final HServerInfo serverInfo;
|
||||||
protected final Configuration conf;
|
protected final Configuration conf;
|
||||||
|
@ -435,6 +438,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
// Basic setup
|
// Basic setup
|
||||||
this.stopRequested = false;
|
this.stopRequested = false;
|
||||||
this.abortRequested = false;
|
this.abortRequested = false;
|
||||||
|
this.fsOk = true;
|
||||||
this.rootDir = rootDir;
|
this.rootDir = rootDir;
|
||||||
this.conf = conf;
|
this.conf = conf;
|
||||||
this.rand = new Random();
|
this.rand = new Random();
|
||||||
|
@ -512,6 +516,11 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** @return the HLog */
|
||||||
|
HLog getLog() {
|
||||||
|
return log;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets a flag that will cause all the HRegionServer threads to shut down
|
* Sets a flag that will cause all the HRegionServer threads to shut down
|
||||||
* in an orderly fashion.
|
* in an orderly fashion.
|
||||||
|
@ -1101,6 +1110,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** {@inheritDoc} */
|
||||||
public void batchUpdate(Text regionName, long timestamp, BatchUpdate b)
|
public void batchUpdate(Text regionName, long timestamp, BatchUpdate b)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
requestCount.incrementAndGet();
|
requestCount.incrementAndGet();
|
||||||
|
@ -1259,6 +1269,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
region.delete(lockid, column);
|
region.delete(lockid, column);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** {@inheritDoc} */
|
||||||
public void deleteAll(final Text regionName, final Text row,
|
public void deleteAll(final Text regionName, final Text row,
|
||||||
final Text column, final long timestamp)
|
final Text column, final long timestamp)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
|
@ -1326,12 +1337,13 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
|
||||||
* @return false if file system is not available
|
* @return false if file system is not available
|
||||||
*/
|
*/
|
||||||
protected boolean checkFileSystem() {
|
protected boolean checkFileSystem() {
|
||||||
boolean fsOk = true;
|
if (fsOk) {
|
||||||
if (!FSUtils.isFileSystemAvailable(fs)) {
|
if (!FSUtils.isFileSystemAvailable(fs)) {
|
||||||
LOG.fatal("Shutting down HRegionServer: file system not available");
|
LOG.fatal("Shutting down HRegionServer: file system not available");
|
||||||
abortRequested = true;
|
abortRequested = true;
|
||||||
stopRequested = true;
|
stopRequested = true;
|
||||||
fsOk = false;
|
fsOk = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return fsOk;
|
return fsOk;
|
||||||
}
|
}
|
||||||
|
|
|
@ -54,6 +54,15 @@ public class FSUtils {
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
LOG.fatal("file system unavailable because: ", e);
|
LOG.fatal("file system unavailable because: ", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
if (!available) {
|
||||||
|
fs.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (IOException e) {
|
||||||
|
LOG.error("file system close", e);
|
||||||
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
available = true;
|
available = true;
|
||||||
|
|
Loading…
Reference in New Issue