HBASE-4510 Check and workaround usage of internal HDFS APIs in HBase

git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1186992 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael Stack 2011-10-20 19:03:56 +00:00
parent de797e9237
commit 94e4322937
3 changed files with 41 additions and 15 deletions

View File

@ -381,6 +381,8 @@ Release 0.92.0 - Unreleased
HBASE-4430 Disable TestSlabCache and TestSingleSizedCache temporarily to HBASE-4430 Disable TestSlabCache and TestSingleSizedCache temporarily to
see if these are cause of build box failure though all tests see if these are cause of build box failure though all tests
pass (Li Pi) pass (Li Pi)
HBASE-4510 Check and workaround usage of internal HDFS APIs in HBase
(Harsh)
TESTS TESTS
HBASE-4450 test for number of blocks read: to serve as baseline for expected HBASE-4450 test for number of blocks read: to serve as baseline for expected

View File

@ -32,7 +32,6 @@ import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.RemoteExceptionHandler; import org.apache.hadoop.hbase.RemoteExceptionHandler;
import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException; import org.apache.hadoop.hdfs.protocol.AlreadyBeingCreatedException;
import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException; import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
@ -42,6 +41,16 @@ import org.apache.hadoop.hdfs.server.namenode.LeaseExpiredException;
public class FSHDFSUtils extends FSUtils{ public class FSHDFSUtils extends FSUtils{
private static final Log LOG = LogFactory.getLog(FSHDFSUtils.class); private static final Log LOG = LogFactory.getLog(FSHDFSUtils.class);
/**
* Lease timeout constant, sourced from HDFS upstream.
* The upstream constant is defined in a private interface, so we
* can't reuse for compatibility reasons.
* NOTE: On versions earlier than Hadoop 0.23, the constant is in
* o.a.h.hdfs.protocol.FSConstants, while for 0.23 and above it is
* in o.a.h.hdfs.protocol.HdfsConstants cause of HDFS-1620.
*/
public static final long LEASE_SOFTLIMIT_PERIOD = 60 * 1000;
public void recoverFileLease(final FileSystem fs, final Path p, Configuration conf) public void recoverFileLease(final FileSystem fs, final Path p, Configuration conf)
throws IOException{ throws IOException{
if (!isAppendSupported(conf)) { if (!isAppendSupported(conf)) {
@ -86,7 +95,7 @@ public class FSHDFSUtils extends FSUtils{
// that the RS is holding onto the file even though it lost its // that the RS is holding onto the file even though it lost its
// znode. We could potentially abort after some time here. // znode. We could potentially abort after some time here.
long waitedFor = System.currentTimeMillis() - startWaiting; long waitedFor = System.currentTimeMillis() - startWaiting;
if (waitedFor > FSConstants.LEASE_SOFTLIMIT_PERIOD) { if (waitedFor > LEASE_SOFTLIMIT_PERIOD) {
LOG.warn("Waited " + waitedFor + "ms for lease recovery on " + p + LOG.warn("Waited " + waitedFor + "ms for lease recovery on " + p +
":" + e.getMessage()); ":" + e.getMessage());
} }

View File

@ -29,6 +29,7 @@ import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter; import org.apache.hadoop.fs.PathFilter;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HDFSBlocksDistribution; import org.apache.hadoop.hbase.HDFSBlocksDistribution;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
@ -38,6 +39,7 @@ import org.apache.hadoop.hbase.master.HMaster;
import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegion;
import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.protocol.FSConstants; import org.apache.hadoop.hdfs.protocol.FSConstants;
import org.apache.hadoop.hdfs.server.namenode.SafeModeException;
import org.apache.hadoop.io.SequenceFile; import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.util.ReflectionUtils; import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils; import org.apache.hadoop.util.StringUtils;
@ -150,22 +152,37 @@ public abstract class FSUtils {
throw io; throw io;
} }
/**
* Utility to check if provided FS is in safemode.
* @return true if dfs is in safemode, false otherwise.
*
*/
private static boolean isInSafeMode(FileSystem fs) throws IOException {
// Refactored safe-mode check for HBASE-4510
if (fs instanceof DistributedFileSystem) {
Path rootPath = new Path("/");
FsPermission rootPerm = fs.getFileStatus(rootPath).getPermission();
try {
// Should be harmless to set back the path we retrieved.
// The first check server-side is the safemode, so if
// other exceptions are spewed out, we're not interested.
fs.setPermission(rootPath, rootPerm);
} catch (SafeModeException e) {
return true;
}
}
return false;
}
/** /**
* Check whether dfs is in safemode. * Check whether dfs is in safemode.
* @param conf * @param conf Configuration to use
* @return true if dfs is in safemode. * @throws IOException if dfs is in safemode
* @throws IOException
*/ */
public static void checkDfsSafeMode(final Configuration conf) public static void checkDfsSafeMode(final Configuration conf)
throws IOException { throws IOException {
boolean isInSafeMode = false;
FileSystem fs = FileSystem.get(conf); FileSystem fs = FileSystem.get(conf);
if (fs instanceof DistributedFileSystem) { if (isInSafeMode(fs)) {
DistributedFileSystem dfs = (DistributedFileSystem)fs;
// Check whether dfs is on safemode.
isInSafeMode = dfs.setSafeMode(FSConstants.SafeModeAction.SAFEMODE_GET);
}
if (isInSafeMode) {
throw new IOException("File system is in safemode, it can't be written now"); throw new IOException("File system is in safemode, it can't be written now");
} }
} }
@ -436,10 +453,8 @@ public abstract class FSUtils {
final long wait) final long wait)
throws IOException { throws IOException {
FileSystem fs = FileSystem.get(conf); FileSystem fs = FileSystem.get(conf);
if (!(fs instanceof DistributedFileSystem)) return;
DistributedFileSystem dfs = (DistributedFileSystem)fs;
// Make sure dfs is not in safe mode // Make sure dfs is not in safe mode
while (dfs.setSafeMode(FSConstants.SafeModeAction.SAFEMODE_GET)) { while (isInSafeMode(fs)) {
LOG.info("Waiting for dfs to exit safe mode..."); LOG.info("Waiting for dfs to exit safe mode...");
try { try {
Thread.sleep(wait); Thread.sleep(wait);