HBASE-5003 If the master is started with a wrong root dir, it gets stuck and can't be killed

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1290882 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2012-02-18 21:32:42 +00:00
parent 588206f411
commit d69299dfdc
4 changed files with 40 additions and 12 deletions

View File

@ -183,7 +183,13 @@ public final class HConstants {
/** Default value for thread wake frequency */
public static final int DEFAULT_THREAD_WAKE_FREQUENCY = 10 * 1000;
/** Parameter name for how often we should try to write a version file, before failing */
public static final String VERSION_FILE_WRITE_ATTEMPTS = "hbase.server.versionfile.writeattempts";
/** Parameter name for how often we should try to write a version file, before failing */
public static final int DEFAULT_VERSION_FILE_WRITE_ATTEMPTS = 3;
/** Parameter name for how often a region should should perform a major compaction */
public static final String MAJOR_COMPACTION_PERIOD = "hbase.hregion.majorcompaction";

View File

@ -335,14 +335,16 @@ public class MasterFileSystem {
// there is one datanode it will succeed. Permission problems should have
// already been caught by mkdirs above.
FSUtils.setVersion(fs, rd, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
10 * 1000));
10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
} else {
if (!fs.isDirectory(rd)) {
throw new IllegalArgumentException(rd.toString() + " is not a directory");
}
// as above
FSUtils.checkVersion(fs, rd, true, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
10 * 1000));
10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
}
} catch (IllegalArgumentException iae) {
LOG.fatal("Please fix invalid configuration for "

View File

@ -204,7 +204,8 @@ public abstract class FSUtils {
*/
public static void checkVersion(FileSystem fs, Path rootdir,
boolean message) throws IOException {
checkVersion(fs, rootdir, message, 0);
checkVersion(fs, rootdir, message, 0,
HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
}
/**
@ -213,19 +214,20 @@ public abstract class FSUtils {
* @param fs file system
* @param rootdir root directory of HBase installation
* @param message if true, issues a message on System.out
* @param wait wait interval for retry if > 0
* @param wait wait interval
* @param retries number of times to retry
*
* @throws IOException e
*/
public static void checkVersion(FileSystem fs, Path rootdir,
boolean message, int wait) throws IOException {
boolean message, int wait, int retries) throws IOException {
String version = getVersion(fs, rootdir);
if (version == null) {
if (!rootRegionExists(fs, rootdir)) {
// rootDir is empty (no version file and no root region)
// just create new version file (HBASE-1195)
FSUtils.setVersion(fs, rootdir, wait);
FSUtils.setVersion(fs, rootdir, wait, retries);
return;
}
} else if (version.compareTo(HConstants.FILE_SYSTEM_VERSION) == 0)
@ -252,7 +254,8 @@ public abstract class FSUtils {
*/
public static void setVersion(FileSystem fs, Path rootdir)
throws IOException {
setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0);
setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0,
HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
}
/**
@ -261,13 +264,15 @@ public abstract class FSUtils {
* @param fs filesystem object
* @param rootdir hbase root
* @param wait time to wait for retry
* @param retries number of times to retry before failing
* @throws IOException e
*/
public static void setVersion(FileSystem fs, Path rootdir, int wait)
public static void setVersion(FileSystem fs, Path rootdir, int wait, int retries)
throws IOException {
setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, wait);
setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, wait, retries);
}
/**
* Sets version of file system
*
@ -275,10 +280,11 @@ public abstract class FSUtils {
* @param rootdir hbase root directory
* @param version version to set
* @param wait time to wait for retry
* @param retries number of times to retry before throwing an IOException
* @throws IOException e
*/
public static void setVersion(FileSystem fs, Path rootdir, String version,
int wait) throws IOException {
int wait, int retries) throws IOException {
Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME);
while (true) {
try {
@ -289,15 +295,20 @@ public abstract class FSUtils {
s.close();
return;
} catch (IOException e) {
if (wait > 0) {
if (retries > 0) {
LOG.warn("Unable to create version file at " + rootdir.toString() +
", retrying: " + e.getMessage());
fs.delete(versionFile, false);
try {
Thread.sleep(wait);
if (wait > 0) {
Thread.sleep(wait);
}
} catch (InterruptedException ex) {
// ignore
}
retries--;
} else {
throw e;
}
}
}

View File

@ -332,6 +332,15 @@
Used as sleep interval by service threads such as log roller.
</description>
</property>
<property>
<name>hbase.server.versionfile.writeattempts</name>
<value>3</value>
<description>
How many time to retry attempting to write a version file
before just aborting. Each attempt is seperated by the
hbase.server.thread.wakefrequency milliseconds.
</description>
</property>
<property>
<name>hbase.hregion.memstore.flush.size</name>
<value>134217728</value>