HBASE-5003 If the master is started with a wrong root dir, it gets stuck and can't be killed
git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1290882 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
588206f411
commit
d69299dfdc
|
@ -184,6 +184,12 @@ public final class HConstants {
|
||||||
/** Default value for thread wake frequency */
|
/** Default value for thread wake frequency */
|
||||||
public static final int DEFAULT_THREAD_WAKE_FREQUENCY = 10 * 1000;
|
public static final int DEFAULT_THREAD_WAKE_FREQUENCY = 10 * 1000;
|
||||||
|
|
||||||
|
/** Parameter name for how often we should try to write a version file, before failing */
|
||||||
|
public static final String VERSION_FILE_WRITE_ATTEMPTS = "hbase.server.versionfile.writeattempts";
|
||||||
|
|
||||||
|
/** Parameter name for how often we should try to write a version file, before failing */
|
||||||
|
public static final int DEFAULT_VERSION_FILE_WRITE_ATTEMPTS = 3;
|
||||||
|
|
||||||
/** Parameter name for how often a region should should perform a major compaction */
|
/** Parameter name for how often a region should should perform a major compaction */
|
||||||
public static final String MAJOR_COMPACTION_PERIOD = "hbase.hregion.majorcompaction";
|
public static final String MAJOR_COMPACTION_PERIOD = "hbase.hregion.majorcompaction";
|
||||||
|
|
||||||
|
|
|
@ -335,14 +335,16 @@ public class MasterFileSystem {
|
||||||
// there is one datanode it will succeed. Permission problems should have
|
// there is one datanode it will succeed. Permission problems should have
|
||||||
// already been caught by mkdirs above.
|
// already been caught by mkdirs above.
|
||||||
FSUtils.setVersion(fs, rd, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
|
FSUtils.setVersion(fs, rd, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
|
||||||
10 * 1000));
|
10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
|
||||||
|
HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
|
||||||
} else {
|
} else {
|
||||||
if (!fs.isDirectory(rd)) {
|
if (!fs.isDirectory(rd)) {
|
||||||
throw new IllegalArgumentException(rd.toString() + " is not a directory");
|
throw new IllegalArgumentException(rd.toString() + " is not a directory");
|
||||||
}
|
}
|
||||||
// as above
|
// as above
|
||||||
FSUtils.checkVersion(fs, rd, true, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
|
FSUtils.checkVersion(fs, rd, true, c.getInt(HConstants.THREAD_WAKE_FREQUENCY,
|
||||||
10 * 1000));
|
10 * 1000), c.getInt(HConstants.VERSION_FILE_WRITE_ATTEMPTS,
|
||||||
|
HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS));
|
||||||
}
|
}
|
||||||
} catch (IllegalArgumentException iae) {
|
} catch (IllegalArgumentException iae) {
|
||||||
LOG.fatal("Please fix invalid configuration for "
|
LOG.fatal("Please fix invalid configuration for "
|
||||||
|
|
|
@ -204,7 +204,8 @@ public abstract class FSUtils {
|
||||||
*/
|
*/
|
||||||
public static void checkVersion(FileSystem fs, Path rootdir,
|
public static void checkVersion(FileSystem fs, Path rootdir,
|
||||||
boolean message) throws IOException {
|
boolean message) throws IOException {
|
||||||
checkVersion(fs, rootdir, message, 0);
|
checkVersion(fs, rootdir, message, 0,
|
||||||
|
HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -213,19 +214,20 @@ public abstract class FSUtils {
|
||||||
* @param fs file system
|
* @param fs file system
|
||||||
* @param rootdir root directory of HBase installation
|
* @param rootdir root directory of HBase installation
|
||||||
* @param message if true, issues a message on System.out
|
* @param message if true, issues a message on System.out
|
||||||
* @param wait wait interval for retry if > 0
|
* @param wait wait interval
|
||||||
|
* @param retries number of times to retry
|
||||||
*
|
*
|
||||||
* @throws IOException e
|
* @throws IOException e
|
||||||
*/
|
*/
|
||||||
public static void checkVersion(FileSystem fs, Path rootdir,
|
public static void checkVersion(FileSystem fs, Path rootdir,
|
||||||
boolean message, int wait) throws IOException {
|
boolean message, int wait, int retries) throws IOException {
|
||||||
String version = getVersion(fs, rootdir);
|
String version = getVersion(fs, rootdir);
|
||||||
|
|
||||||
if (version == null) {
|
if (version == null) {
|
||||||
if (!rootRegionExists(fs, rootdir)) {
|
if (!rootRegionExists(fs, rootdir)) {
|
||||||
// rootDir is empty (no version file and no root region)
|
// rootDir is empty (no version file and no root region)
|
||||||
// just create new version file (HBASE-1195)
|
// just create new version file (HBASE-1195)
|
||||||
FSUtils.setVersion(fs, rootdir, wait);
|
FSUtils.setVersion(fs, rootdir, wait, retries);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
} else if (version.compareTo(HConstants.FILE_SYSTEM_VERSION) == 0)
|
} else if (version.compareTo(HConstants.FILE_SYSTEM_VERSION) == 0)
|
||||||
|
@ -252,7 +254,8 @@ public abstract class FSUtils {
|
||||||
*/
|
*/
|
||||||
public static void setVersion(FileSystem fs, Path rootdir)
|
public static void setVersion(FileSystem fs, Path rootdir)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0);
|
setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, 0,
|
||||||
|
HConstants.DEFAULT_VERSION_FILE_WRITE_ATTEMPTS);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -261,13 +264,15 @@ public abstract class FSUtils {
|
||||||
* @param fs filesystem object
|
* @param fs filesystem object
|
||||||
* @param rootdir hbase root
|
* @param rootdir hbase root
|
||||||
* @param wait time to wait for retry
|
* @param wait time to wait for retry
|
||||||
|
* @param retries number of times to retry before failing
|
||||||
* @throws IOException e
|
* @throws IOException e
|
||||||
*/
|
*/
|
||||||
public static void setVersion(FileSystem fs, Path rootdir, int wait)
|
public static void setVersion(FileSystem fs, Path rootdir, int wait, int retries)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, wait);
|
setVersion(fs, rootdir, HConstants.FILE_SYSTEM_VERSION, wait, retries);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Sets version of file system
|
* Sets version of file system
|
||||||
*
|
*
|
||||||
|
@ -275,10 +280,11 @@ public abstract class FSUtils {
|
||||||
* @param rootdir hbase root directory
|
* @param rootdir hbase root directory
|
||||||
* @param version version to set
|
* @param version version to set
|
||||||
* @param wait time to wait for retry
|
* @param wait time to wait for retry
|
||||||
|
* @param retries number of times to retry before throwing an IOException
|
||||||
* @throws IOException e
|
* @throws IOException e
|
||||||
*/
|
*/
|
||||||
public static void setVersion(FileSystem fs, Path rootdir, String version,
|
public static void setVersion(FileSystem fs, Path rootdir, String version,
|
||||||
int wait) throws IOException {
|
int wait, int retries) throws IOException {
|
||||||
Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME);
|
Path versionFile = new Path(rootdir, HConstants.VERSION_FILE_NAME);
|
||||||
while (true) {
|
while (true) {
|
||||||
try {
|
try {
|
||||||
|
@ -289,15 +295,20 @@ public abstract class FSUtils {
|
||||||
s.close();
|
s.close();
|
||||||
return;
|
return;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
if (wait > 0) {
|
if (retries > 0) {
|
||||||
LOG.warn("Unable to create version file at " + rootdir.toString() +
|
LOG.warn("Unable to create version file at " + rootdir.toString() +
|
||||||
", retrying: " + e.getMessage());
|
", retrying: " + e.getMessage());
|
||||||
fs.delete(versionFile, false);
|
fs.delete(versionFile, false);
|
||||||
try {
|
try {
|
||||||
|
if (wait > 0) {
|
||||||
Thread.sleep(wait);
|
Thread.sleep(wait);
|
||||||
|
}
|
||||||
} catch (InterruptedException ex) {
|
} catch (InterruptedException ex) {
|
||||||
// ignore
|
// ignore
|
||||||
}
|
}
|
||||||
|
retries--;
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -332,6 +332,15 @@
|
||||||
Used as sleep interval by service threads such as log roller.
|
Used as sleep interval by service threads such as log roller.
|
||||||
</description>
|
</description>
|
||||||
</property>
|
</property>
|
||||||
|
<property>
|
||||||
|
<name>hbase.server.versionfile.writeattempts</name>
|
||||||
|
<value>3</value>
|
||||||
|
<description>
|
||||||
|
How many time to retry attempting to write a version file
|
||||||
|
before just aborting. Each attempt is seperated by the
|
||||||
|
hbase.server.thread.wakefrequency milliseconds.
|
||||||
|
</description>
|
||||||
|
</property>
|
||||||
<property>
|
<property>
|
||||||
<name>hbase.hregion.memstore.flush.size</name>
|
<name>hbase.hregion.memstore.flush.size</name>
|
||||||
<value>134217728</value>
|
<value>134217728</value>
|
||||||
|
|
Loading…
Reference in New Issue