HBASE-5003 If the master is started with a wrong root dir, it gets stuck and can't be killed

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1290882 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2012-02-18 21:32:42 +00:00
parent 588206f411
commit d69299dfdc
4 changed files with 40 additions and 12 deletions

View File

@ -184,6 +184,12 @@ public final class HConstants {
/** Default value for thread wake frequency */ /** Default value for thread wake frequency */
public static final int DEFAULT_THREAD_WAKE_FREQUENCY = 10 * 1000; public static final int DEFAULT_THREAD_WAKE_FREQUENCY = 10 * 1000;
/** Parameter name for how often we should try to write a version file, before failing */
public static final String VERSION_FILE_WRITE_ATTEMPTS = "hbase.server.versionfile.writeattempts";
/** Parameter name for how often we should try to write a version file, before failing */
public static final int DEFAULT_VERSION_FILE_WRITE_ATTEMPTS = 3;
/** Parameter name for how often a region should should perform a major compaction */ /** Parameter name for how often a region should should perform a major compaction */
public static final String MAJOR_COMPACTION_PERIOD = "hbase.hregion.majorcompaction"; public static final String MAJOR_COMPACTION_PERIOD = "hbase.hregion.majorcompaction";

View File

@ -335,14 +335,16 @@ public class MasterFileSystem {
// there is one datanode it will succeed. Permission problems should have // there is one datanode it will succeed. Permission problems should have
// already been caught by mkdirs above. // already been caught by mkdirs above.
FSUtils.setVersion(fs, rd, c.getInt(HConstants.THREAD_WAKE_FREQUENCY, FSUtils.setVersion(fs, rd, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
10 * 1000)); 10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
} else { } else {
if (!fs.isDirectory(rd)) { if (!fs.isDirectory(rd)) {
throw new IllegalArgumentException(rd.toString() + " is not a directory"); throw new IllegalArgumentException(rd.toString() + " is not a directory");
} }
// as above // as above
FSUtils.checkVersion(fs, rd, true, c.getInt(HConstants.THREAD_WAKE_FREQUENCY, FSUtils.checkVersion(fs, rd, true, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
10 * 1000)); 10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
} }
} catch (IllegalArgumentException iae) { } catch (IllegalArgumentException iae) {
LOG.fatal("Please fix invalid configuration for " LOG.fatal("Please fix invalid configuration for "

View File

@ -204,7 +204,8 @@ public abstract class FSUtils {
*/ */
public static void checkVersion(FileSystem fs, Path rootdir, public static void checkVersion(FileSystem fs, Path rootdir,
boolean message) throws IOException { boolean message) throws IOException {
checkVersion(fs, rootdir, message, 0); checkVersion(fs, rootdir, message, 0,
HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
} }
/** /**
@ -213,19 +214,20 @@ public abstract class FSUtils {
* @param fs file system * @param fs file system
* @param rootdir root directory of HBase installation * @param rootdir root directory of HBase installation
* @param message if true, issues a message on System.out * @param message if true, issues a message on System.out
* @param wait wait interval for retry if > 0 * @param wait wait interval
* @param retries number of times to retry
* *
* @throws IOException e * @throws IOException e
*/ */
public static void checkVersion(FileSystem fs, Path rootdir, public static void checkVersion(FileSystem fs, Path rootdir,
boolean message, int wait) throws IOException { boolean message, int wait, int retries) throws IOException {
String version = getVersion(fs, rootdir); String version = getVersion(fs, rootdir);
if (version == null) { if (version == null) {
if (!rootRegionExists(fs, rootdir)) { if (!rootRegionExists(fs, rootdir)) {
// rootDir is empty (no version file and no root region) // rootDir is empty (no version file and no root region)
// just create new version file (HBASE-1195) // just create new version file (HBASE-1195)
FSUtils.setVersion(fs, rootdir, wait); FSUtils.setVersion(fs, rootdir, wait, retries);
return; return;
} }
} else if (version.compareTo(HConstants.FILE_SYSTEM_VERSION) == 0) } else if (version.compareTo(HConstants.FILE_SYSTEM_VERSION) == 0)
@ -252,7 +254,8 @@ public abstract class FSUtils {
*/ */
public static void setVersion(FileSystem fs, Path rootdir) public static void setVersion(FileSystem fs, Path rootdir)
throws IOException { throws IOException {
setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0); setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0,
HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
} }
/** /**
@ -261,13 +264,15 @@ public abstract class FSUtils {
* @param fs filesystem object * @param fs filesystem object
* @param rootdir hbase root * @param rootdir hbase root
* @param wait time to wait for retry * @param wait time to wait for retry
* @param retries number of times to retry before failing
* @throws IOException e * @throws IOException e
*/ */
public static void setVersion(FileSystem fs, Path rootdir, int wait) public static void setVersion(FileSystem fs, Path rootdir, int wait, int retries)
throws IOException { throws IOException {
setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, wait); setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, wait, retries);
} }
/** /**
* Sets version of file system * Sets version of file system
* *
@ -275,10 +280,11 @@ public abstract class FSUtils {
* @param rootdir hbase root directory * @param rootdir hbase root directory
* @param version version to set * @param version version to set
* @param wait time to wait for retry * @param wait time to wait for retry
* @param retries number of times to retry before throwing an IOException
* @throws IOException e * @throws IOException e
*/ */
public static void setVersion(FileSystem fs, Path rootdir, String version, public static void setVersion(FileSystem fs, Path rootdir, String version,
int wait) throws IOException { int wait, int retries) throws IOException {
Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME); Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME);
while (true) { while (true) {
try { try {
@ -289,15 +295,20 @@ public abstract class FSUtils {
s.close(); s.close();
return; return;
} catch (IOException e) { } catch (IOException e) {
if (wait > 0) { if (retries > 0) {
LOG.warn("Unable to create version file at " + rootdir.toString() + LOG.warn("Unable to create version file at " + rootdir.toString() +
", retrying: " + e.getMessage()); ", retrying: " + e.getMessage());
fs.delete(versionFile, false); fs.delete(versionFile, false);
try { try {
if (wait > 0) {
Thread.sleep(wait); Thread.sleep(wait);
}
} catch (InterruptedException ex) { } catch (InterruptedException ex) {
// ignore // ignore
} }
retries--;
} else {
throw e;
} }
} }
} }

View File

@ -332,6 +332,15 @@
Used as sleep interval by service threads such as log roller. Used as sleep interval by service threads such as log roller.
</description> </description>
</property> </property>
<property>
<name>hbase.server.versionfile.writeattempts</name>
<value>3</value>
<description>
How many time to retry attempting to write a version file
before just aborting. Each attempt is seperated by the
hbase.server.thread.wakefrequency milliseconds.
</description>
</property>
<property> <property>
<name>hbase.hregion.memstore.flush.size</name> <name>hbase.hregion.memstore.flush.size</name>
<value>134217728</value> <value>134217728</value>