HADOOP-1966 Make HBase unit tests more reliable in the Hudson environment.

Set hbase.root in test/hbase-site.xml; when running a test, the default does not work consistantly.

When a HBase mini cluster is started on top of an existing mini dfs cluster, it should not shut down the mini dfs cluster when the mini HBase cluster is shut down.

TestDFSAbort catches exceptions, prints the stack trace and re-throws the exception, so you can see when the exception happened in the log.

Catch runtime exceptions that were escaping from FSUtils.isFileSystemAvailable, enabling more reliable detection of dfs failure. HRegionServer also now checks to see if it is still accepting client requests.



git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@580745 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jim Kellerman 2007-09-30 16:09:38 +00:00
parent 354c848546
commit 8a3bc9c23e
8 changed files with 148 additions and 102 deletions

View File

@ -62,6 +62,7 @@ Trunk (unreleased changes)
down is inconsistent b) TestDFSAbort failed in nightly #242 down is inconsistent b) TestDFSAbort failed in nightly #242
HADOOP-1929 Add hbase-default.xml to hbase jar HADOOP-1929 Add hbase-default.xml to hbase jar
HADOOP-1941 StopRowFilter throws NPE when passed null row HADOOP-1941 StopRowFilter throws NPE when passed null row
HADOOP-1966 Make HBase unit tests more reliable in the Hudson environment.
IMPROVEMENTS IMPROVEMENTS
HADOOP-1737 Make HColumnDescriptor data publically members settable HADOOP-1737 Make HColumnDescriptor data publically members settable

View File

@ -71,9 +71,6 @@ public interface HConstants {
/** Used to construct the name of the directory in which a HRegion resides */ /** Used to construct the name of the directory in which a HRegion resides */
static final String HREGIONDIR_PREFIX = "hregion_"; static final String HREGIONDIR_PREFIX = "hregion_";
// TODO: Someone may try to name a column family 'log'. If they
// do, it will clash with the HREGION log dir subdirectory. FIX.
/** Used to construct the name of the log directory for a region server */ /** Used to construct the name of the log directory for a region server */
static final String HREGION_LOGDIR_NAME = "log"; static final String HREGION_LOGDIR_NAME = "log";

View File

@ -504,7 +504,7 @@ HMasterRegionInterface {
LOG.error("Scan ROOT region", e); LOG.error("Scan ROOT region", e);
if (tries == numRetries - 1) { if (tries == numRetries - 1) {
// We ran out of tries. Make sure the file system is still available // We ran out of tries. Make sure the file system is still available
if (checkFileSystem()) { if (!checkFileSystem()) {
continue; // Avoid sleeping. continue; // Avoid sleeping.
} }
} }
@ -654,7 +654,7 @@ HMasterRegionInterface {
if (tries == numRetries - 1) { if (tries == numRetries - 1) {
// We ran out of tries. Make sure the file system is still // We ran out of tries. Make sure the file system is still
// available // available
if (checkFileSystem()) { if (!checkFileSystem()) {
continue; // avoid sleeping continue; // avoid sleeping
} }
} }
@ -941,7 +941,7 @@ HMasterRegionInterface {
*/ */
protected boolean checkFileSystem() { protected boolean checkFileSystem() {
if (fsOk) { if (fsOk) {
if (!FSUtils.isFileSystemAvailable(fs, closed)) { if (!FSUtils.isFileSystemAvailable(fs)) {
LOG.fatal("Shutting down HBase cluster: file system not available"); LOG.fatal("Shutting down HBase cluster: file system not available");
closed.set(true); closed.set(true);
fsOk = false; fsOk = false;

View File

@ -139,6 +139,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
30 * 1000), stop); 30 * 1000), stop);
} }
/** {@inheritDoc} */
public void closing(final Text regionName) { public void closing(final Text regionName) {
lock.writeLock().lock(); lock.writeLock().lock();
try { try {
@ -154,6 +155,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
} }
} }
/** {@inheritDoc} */
public void closed(final Text regionName) { public void closed(final Text regionName) {
lock.writeLock().lock(); lock.writeLock().lock();
try { try {
@ -458,9 +460,17 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
// get it when the master is panicing because for instance // get it when the master is panicing because for instance
// the HDFS has been yanked out from under it. Be wary of // the HDFS has been yanked out from under it. Be wary of
// this message. // this message.
if (checkFileSystem()) { try {
closeAllRegions(); if (checkFileSystem()) {
restart = true; closeAllRegions();
restart = true;
}
} catch (Exception e) {
LOG.fatal("file system available check failed. " +
"Shutting down server.", e);
this.stopRequested.set(true);
this.fsOk = false;
this.abortRequested = true;
} }
break; break;
@ -945,6 +955,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
public byte [] get(final Text regionName, final Text row, public byte [] get(final Text regionName, final Text row,
final Text column) throws IOException { final Text column) throws IOException {
checkOpen();
requestCount.incrementAndGet(); requestCount.incrementAndGet();
try { try {
return getRegion(regionName).get(row, column); return getRegion(regionName).get(row, column);
@ -959,6 +970,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
public byte [][] get(final Text regionName, final Text row, public byte [][] get(final Text regionName, final Text row,
final Text column, final int numVersions) throws IOException { final Text column, final int numVersions) throws IOException {
checkOpen();
requestCount.incrementAndGet(); requestCount.incrementAndGet();
try { try {
return getRegion(regionName).get(row, column, numVersions); return getRegion(regionName).get(row, column, numVersions);
@ -973,6 +985,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
public byte [][] get(final Text regionName, final Text row, final Text column, public byte [][] get(final Text regionName, final Text row, final Text column,
final long timestamp, final int numVersions) throws IOException { final long timestamp, final int numVersions) throws IOException {
checkOpen();
requestCount.incrementAndGet(); requestCount.incrementAndGet();
try { try {
return getRegion(regionName).get(row, column, timestamp, numVersions); return getRegion(regionName).get(row, column, timestamp, numVersions);
@ -987,6 +1000,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
public MapWritable getRow(final Text regionName, final Text row) public MapWritable getRow(final Text regionName, final Text row)
throws IOException { throws IOException {
checkOpen();
requestCount.incrementAndGet(); requestCount.incrementAndGet();
try { try {
HRegion region = getRegion(regionName); HRegion region = getRegion(regionName);
@ -1007,6 +1021,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
/** {@inheritDoc} */ /** {@inheritDoc} */
public MapWritable next(final long scannerId) throws IOException { public MapWritable next(final long scannerId) throws IOException {
checkOpen();
requestCount.incrementAndGet(); requestCount.incrementAndGet();
try { try {
String scannerName = String.valueOf(scannerId); String scannerName = String.valueOf(scannerId);
@ -1044,7 +1059,9 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
/** {@inheritDoc} */ /** {@inheritDoc} */
public void batchUpdate(Text regionName, long timestamp, BatchUpdate b) public void batchUpdate(Text regionName, long timestamp, BatchUpdate b)
throws IOException { throws IOException {
checkOpen();
requestCount.incrementAndGet(); requestCount.incrementAndGet();
// If timestamp == LATEST_TIMESTAMP and we have deletes, then they need // If timestamp == LATEST_TIMESTAMP and we have deletes, then they need
// special treatment. For these we need to first find the latest cell so // special treatment. For these we need to first find the latest cell so
@ -1093,9 +1110,12 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
// remote scanner interface // remote scanner interface
// //
/** {@inheritDoc} */
public long openScanner(Text regionName, Text[] cols, Text firstRow, public long openScanner(Text regionName, Text[] cols, Text firstRow,
final long timestamp, final RowFilterInterface filter) final long timestamp, final RowFilterInterface filter)
throws IOException { throws IOException {
checkOpen();
requestCount.incrementAndGet(); requestCount.incrementAndGet();
try { try {
HRegion r = getRegion(regionName); HRegion r = getRegion(regionName);
@ -1110,7 +1130,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
leases.createLease(scannerId, scannerId, new ScannerListener(scannerName)); leases.createLease(scannerId, scannerId, new ScannerListener(scannerName));
return scannerId; return scannerId;
} catch (IOException e) { } catch (IOException e) {
LOG.error("Opening scanner (fsOk: " + this.fsOk + ")", LOG.error("Error opening scanner (fsOk: " + this.fsOk + ")",
RemoteExceptionHandler.checkIOException(e)); RemoteExceptionHandler.checkIOException(e));
checkFileSystem(); checkFileSystem();
throw e; throw e;
@ -1119,6 +1139,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
/** {@inheritDoc} */ /** {@inheritDoc} */
public void close(final long scannerId) throws IOException { public void close(final long scannerId) throws IOException {
checkOpen();
requestCount.incrementAndGet(); requestCount.incrementAndGet();
try { try {
String scannerName = String.valueOf(scannerId); String scannerName = String.valueOf(scannerId);
@ -1254,6 +1275,20 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
} }
} }
/**
* Called to verify that this server is up and running.
*
* @throws IOException
*/
private void checkOpen() throws IOException {
if (stopRequested.get() || abortRequested) {
throw new IOException("Server not running");
}
if (!fsOk) {
throw new IOException("File system not available");
}
}
/** /**
* Checks to see if the file system is still accessible. * Checks to see if the file system is still accessible.
* If not, sets abortRequested and stopRequested * If not, sets abortRequested and stopRequested
@ -1265,10 +1300,14 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
FileSystem fs = null; FileSystem fs = null;
try { try {
fs = FileSystem.get(this.conf); fs = FileSystem.get(this.conf);
} catch (IOException e) { if (fs != null && !FSUtils.isFileSystemAvailable(fs)) {
LOG.fatal("Shutting down HRegionServer: file system not available");
this.abortRequested = true;
this.stopRequested.set(true);
fsOk = false;
}
} catch (Exception e) {
LOG.error("Failed get of filesystem", e); LOG.error("Failed get of filesystem", e);
}
if (fs != null && !FSUtils.isFileSystemAvailable(fs, stopRequested)) {
LOG.fatal("Shutting down HRegionServer: file system not available"); LOG.fatal("Shutting down HRegionServer: file system not available");
this.abortRequested = true; this.abortRequested = true;
this.stopRequested.set(true); this.stopRequested.set(true);
@ -1301,6 +1340,7 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
return regionsToCheck; return regionsToCheck;
} }
/** {@inheritDoc} */
public long getProtocolVersion(final String protocol, public long getProtocolVersion(final String protocol,
@SuppressWarnings("unused") final long clientVersion) @SuppressWarnings("unused") final long clientVersion)
throws IOException { throws IOException {

View File

@ -26,7 +26,6 @@ import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory; import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path; import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.dfs.DistributedFileSystem; import org.apache.hadoop.dfs.DistributedFileSystem;
/** /**
@ -38,48 +37,37 @@ public class FSUtils {
/** /**
* Not instantiable * Not instantiable
*/ */
private FSUtils() {super();} private FSUtils() {}
/** /**
* Checks to see if the specified file system is available * Checks to see if the specified file system is available
* *
* @param fs * @param fs
* @param closed Optional flag. If non-null and set, will abort test of
* filesytem. Presumption is a flag shared by multiple threads. Another
* may have already determined the filesystem -- or something else -- bad.
* @return true if the specified file system is available. * @return true if the specified file system is available.
*/ */
public static boolean isFileSystemAvailable(final FileSystem fs, public static boolean isFileSystemAvailable(final FileSystem fs) {
final AtomicBoolean closed) {
if (!(fs instanceof DistributedFileSystem)) { if (!(fs instanceof DistributedFileSystem)) {
return true; return true;
} }
String exception = "";
boolean available = false; boolean available = false;
DistributedFileSystem dfs = (DistributedFileSystem) fs; DistributedFileSystem dfs = (DistributedFileSystem) fs;
int maxTries = dfs.getConf().getInt("hbase.client.retries.number", 3); try {
Path root = if (dfs.exists(new Path("/"))) {
fs.makeQualified(new Path(dfs.getConf().get(HConstants.HBASE_DIR, "/"))); available = true;
for (int i = 0; i < maxTries && (closed == null || !closed.get()); i++) {
IOException ex = null;
try {
if (dfs.exists(root)) {
available = true;
break;
}
} catch (IOException e) {
ex = e;
} }
String exception = (ex == null)? "": ": " + ex.getMessage(); } catch (IOException e) {
LOG.info("Failed exists test on " + root + " by thread " + exception = e.getMessage();
Thread.currentThread().getName() + " (Attempt " + i + " of " +
maxTries +"): " + exception);
} }
LOG.info("Failed file system available test. Thread: " +
Thread.currentThread().getName() + ": " + exception);
try { try {
if (!available) { if (!available) {
fs.close(); fs.close();
} }
} catch (IOException e) { } catch (Exception e) {
LOG.error("file system close failed: ", e); LOG.error("file system close failed: ", e);
} }
return available; return available;

View File

@ -75,4 +75,8 @@
the master will notice a dead region server sooner. The default is 15 seconds. the master will notice a dead region server sooner. The default is 15 seconds.
</description> </description>
</property> </property>
<property>
<name>hbase.rootdir</name>
<value>/hbase</value>
<description>location of HBase instance in dfs</description></property>
</configuration> </configuration>

View File

@ -48,6 +48,7 @@ public class MiniHBaseCluster implements HConstants {
private Configuration conf; private Configuration conf;
private MiniDFSCluster cluster; private MiniDFSCluster cluster;
private FileSystem fs; private FileSystem fs;
private boolean shutdownDFS;
private Path parentdir; private Path parentdir;
private MasterThread masterThread = null; private MasterThread masterThread = null;
ArrayList<RegionServerThread> regionThreads = ArrayList<RegionServerThread> regionThreads =
@ -84,8 +85,14 @@ public class MiniHBaseCluster implements HConstants {
/** /**
* Starts a MiniHBaseCluster on top of an existing HDFSCluster * Starts a MiniHBaseCluster on top of an existing HDFSCluster
* *
* Note that if you use this constructor, you should shut down the mini dfs ****************************************************************************
* cluster in your test case. * * * * * * N O T E * * * * *
*
* If you use this constructor, you should shut down the mini dfs cluster
* in your test case.
*
* * * * * * N O T E * * * * *
****************************************************************************
* *
* @param conf * @param conf
* @param nRegionNodes * @param nRegionNodes
@ -98,6 +105,7 @@ public class MiniHBaseCluster implements HConstants {
this.conf = conf; this.conf = conf;
this.fs = dfsCluster.getFileSystem(); this.fs = dfsCluster.getFileSystem();
this.cluster = dfsCluster; this.cluster = dfsCluster;
this.shutdownDFS = false;
init(nRegionNodes); init(nRegionNodes);
} }
@ -118,9 +126,11 @@ public class MiniHBaseCluster implements HConstants {
this.conf = conf; this.conf = conf;
this.deleteOnExit = deleteOnExit; this.deleteOnExit = deleteOnExit;
this.shutdownDFS = false;
if (miniHdfsFilesystem) { if (miniHdfsFilesystem) {
this.cluster = new MiniDFSCluster(this.conf, 2, format, (String[])null); this.cluster = new MiniDFSCluster(this.conf, 2, format, (String[])null);
this.fs = cluster.getFileSystem(); this.fs = cluster.getFileSystem();
this.shutdownDFS = true;
} else { } else {
this.cluster = null; this.cluster = null;
this.fs = FileSystem.get(conf); this.fs = FileSystem.get(conf);
@ -390,11 +400,14 @@ public class MiniHBaseCluster implements HConstants {
regionServerThreads.size() + " region server(s)"); regionServerThreads.size() + " region server(s)");
} }
/**
* Shut down the mini HBase cluster
*/
public void shutdown() { public void shutdown() {
MiniHBaseCluster.shutdown(this.masterThread, this.regionThreads); MiniHBaseCluster.shutdown(this.masterThread, this.regionThreads);
try { try {
if (cluster != null) { if (shutdownDFS && cluster != null) {
FileSystem fs = cluster.getFileSystem(); FileSystem fs = cluster.getFileSystem();
LOG.info("Shutting down Mini DFS cluster"); LOG.info("Shutting down Mini DFS cluster");

View File

@ -30,32 +30,35 @@ import org.apache.log4j.Logger;
*/ */
public class TestDFSAbort extends HBaseClusterTestCase { public class TestDFSAbort extends HBaseClusterTestCase {
/** constructor */
public TestDFSAbort() {
super();
Logger.getRootLogger().setLevel(Level.WARN);
Logger.getLogger(this.getClass().getPackage().getName()).setLevel(Level.DEBUG);
}
/** {@inheritDoc} */ /** {@inheritDoc} */
@Override @Override
public void setUp() throws Exception { public void setUp() throws Exception {
super.setUp(); try {
HTableDescriptor desc = new HTableDescriptor(getName()); super.setUp();
desc.addFamily(new HColumnDescriptor(HConstants.COLUMN_FAMILY_STR)); HTableDescriptor desc = new HTableDescriptor(getName());
HBaseAdmin admin = new HBaseAdmin(conf); desc.addFamily(new HColumnDescriptor(HConstants.COLUMN_FAMILY_STR));
admin.createTable(desc); HBaseAdmin admin = new HBaseAdmin(conf);
admin.createTable(desc);
} catch (Exception e) {
e.printStackTrace();
throw e;
}
} }
/** /**
* @throws Exception * @throws Exception
*/ */
public void testDFSAbort() throws Exception { public void testDFSAbort() throws Exception {
// By now the Mini DFS is running, Mini HBase is running and we have try {
// created a table. Now let's yank the rug out from HBase // By now the Mini DFS is running, Mini HBase is running and we have
cluster.getDFSCluster().shutdown(); // created a table. Now let's yank the rug out from HBase
// Now wait for Mini HBase Cluster to shut down cluster.getDFSCluster().shutdown();
cluster.join(); // Now wait for Mini HBase Cluster to shut down
cluster.join();
} catch (Exception e) {
e.printStackTrace();
throw e;
}
} }
/** /**