HADOOP-1990 Regression test instability affects nightly and patch builds

git-svn-id: https://svn.apache.org/repos/asf/lucene/hadoop/trunk/src/contrib/hbase@581995 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Jim Kellerman 2007-10-04 19:58:56 +00:00
parent c56582d19b
commit af8131b86d
10 changed files with 111 additions and 56 deletions

View File

@ -64,6 +64,7 @@ Trunk (unreleased changes)
HADOOP-1941 StopRowFilter throws NPE when passed null row
HADOOP-1966 Make HBase unit tests more reliable in the Hudson environment.
HADOOP-1975 HBase tests failing with java.lang.NumberFormatException
HADOOP-1990 Regression test instability affects nightly and patch builds
IMPROVEMENTS
HADOOP-1737 Make HColumnDescriptor data publically members settable

View File

@ -1049,15 +1049,6 @@ HMasterRegionInterface {
} catch(Exception iex) {
LOG.warn("meta scanner", iex);
}
try {
// TODO: Maybe do in parallel in its own thread as is done in TaskTracker
// if its taking a long time to go down.
server.join(); // Wait for server to finish.
} catch(InterruptedException iex) {
LOG.warn("server", iex);
}
LOG.info("HMaster main thread exiting");
}

View File

@ -729,12 +729,6 @@ public class HRegionServer implements HConstants, HRegionInterface, Runnable {
join(this.logRollerThread);
join(this.cacheFlusherThread);
join(this.splitOrCompactCheckerThread);
try {
this.server.join();
} catch (InterruptedException e) {
// No means of asking server if its done... .so just assume it is even
// if an interrupt.
}
}
private void join(final Thread t) {

View File

@ -87,6 +87,13 @@ public class HServerInfo implements Writable {
return startCode;
}
/**
* @param startCode the startCode to set
*/
public void setStartCode(long startCode) {
this.startCode = startCode;
}
/** {@inheritDoc} */
@Override
public String toString() {

View File

@ -20,7 +20,6 @@
package org.apache.hadoop.hbase.util;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@ -59,11 +58,11 @@ public class FSUtils {
} catch (IOException e) {
exception = e.getMessage();
}
LOG.info("Failed file system available test. Thread: " +
Thread.currentThread().getName() + ": " + exception);
try {
if (!available) {
LOG.info("Failed file system available test. Thread: " +
Thread.currentThread().getName() + ": " + exception);
fs.close();
}

View File

@ -22,14 +22,27 @@ package org.apache.hadoop.hbase;
import junit.framework.TestSuite;
import junit.textui.TestRunner;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import java.io.PrintWriter;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* Test ability of HBase to handle DFS failure
*/
public class TestDFSAbort extends HBaseClusterTestCase {
public class DFSAbort extends HBaseClusterTestCase {
private static final Log LOG =
LogFactory.getLog(DFSAbort.class.getName());
/** constructor */
public DFSAbort() {
super();
// For less frequently updated regions flush after every 2 flushes
conf.setInt("hbase.hregion.memcache.optionalflushcount", 2);
}
/** {@inheritDoc} */
@Override
public void setUp() throws Exception {
@ -54,17 +67,47 @@ public class TestDFSAbort extends HBaseClusterTestCase {
// created a table. Now let's yank the rug out from HBase
cluster.getDFSCluster().shutdown();
// Now wait for Mini HBase Cluster to shut down
cluster.join();
// cluster.join();
join();
} catch (Exception e) {
e.printStackTrace();
throw e;
}
}
private void join() {
if (this.cluster.regionThreads != null) {
synchronized(this.cluster.regionThreads) {
for(Thread t: this.cluster.regionThreads) {
join(t);
}
}
}
join(this.cluster.getMasterThread());
}
private void join(final Thread t) {
if (t == null) {
return;
}
for (int i = 0; t.isAlive(); i++) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
LOG.info("Continuing...", e);
}
if (i != 0 && i % 30 == 0) {
ReflectionUtils.printThreadInfo(new PrintWriter(System.out),
"Automatic Stack Trace every 30 seconds waiting on " +
t.getName());
}
}
}
/**
* @param args unused
*/
public static void main(@SuppressWarnings("unused") String[] args) {
TestRunner.run(new TestSuite(TestDFSAbort.class));
TestRunner.run(new TestSuite(DFSAbort.class));
}
}

View File

@ -85,6 +85,7 @@ public class MiniHBaseCluster implements HConstants {
/**
* Starts a MiniHBaseCluster on top of an existing HDFSCluster
*
*<pre>
****************************************************************************
* * * * * * N O T E * * * * *
*
@ -93,6 +94,7 @@ public class MiniHBaseCluster implements HConstants {
*
* * * * * * N O T E * * * * *
****************************************************************************
*</pre>
*
* @param conf
* @param nRegionNodes
@ -286,6 +288,13 @@ public class MiniHBaseCluster implements HConstants {
return this.masterThread.getMaster().getMasterAddress();
}
/**
* @return the thread running the HMaster
*/
public MasterThread getMasterThread() {
return this.masterThread;
}
/**
* Cause a region server to exit without cleaning up
*

View File

@ -55,6 +55,10 @@ public class MultiRegionTable extends HBaseTestCase {
MiniHBaseCluster cluster, FileSystem localFs, String tableName,
String columnName) throws IOException {
final int retries = 10;
final long waitTime =
conf.getLong("hbase.master.meta.thread.rescanfrequency", 10L * 1000L);
// This size should make it so we always split using the addContent
// below. After adding all data, the first region is 1.3M. Should
// set max filesize to be <= 1M.
@ -62,7 +66,6 @@ public class MultiRegionTable extends HBaseTestCase {
assertTrue(conf.getLong("hbase.hregion.max.filesize",
HConstants.DEFAULT_MAX_FILE_SIZE) <= 1024 * 1024);
final int retries = 10;
FileSystem fs = (cluster.getDFSCluster() == null) ?
localFs : cluster.getDFSCluster().getFileSystem();
assertNotNull(fs);
@ -89,18 +92,18 @@ public class MultiRegionTable extends HBaseTestCase {
// Now, wait until split makes it into the meta table.
for (int i = 0;
i < retries && (count(meta, HConstants.COLUMN_FAMILY_STR) <= count);
i++) {
int oldCount = count;
for (int i = 0; i < retries; i++) {
count = count(meta, HConstants.COLUMN_FAMILY_STR);
if (count > oldCount) {
break;
}
try {
Thread.sleep(5000);
Thread.sleep(waitTime);
} catch (InterruptedException e) {
// continue
}
}
int oldCount = count;
count = count(meta, HConstants.COLUMN_FAMILY_STR);
if (count <= oldCount) {
throw new IOException("Failed waiting on splits to show up");
}
@ -126,7 +129,7 @@ public class MultiRegionTable extends HBaseTestCase {
// Recalibrate will cause us to wait on new regions' deployment
recalibrate(t, new Text(columnName), retries);
recalibrate(t, new Text(columnName), retries, waitTime);
// Compact a region at a time so we can test case where one region has
// no references but the other still has some
@ -138,7 +141,7 @@ public class MultiRegionTable extends HBaseTestCase {
while (getSplitParentInfo(meta, parent).size() == 3) {
try {
Thread.sleep(5000);
Thread.sleep(waitTime);
} catch (InterruptedException e) {
// continue
}
@ -153,12 +156,13 @@ public class MultiRegionTable extends HBaseTestCase {
// Now wait until parent disappears.
LOG.info("Waiting on parent " + parent.getRegionName() + " to disappear");
for (int i = 0;
i < retries && getSplitParentInfo(meta, parent) != null;
i++) {
for (int i = 0; i < retries; i++) {
if (getSplitParentInfo(meta, parent) == null) {
break;
}
try {
Thread.sleep(5000);
Thread.sleep(waitTime);
} catch (InterruptedException e) {
// continue
}
@ -167,9 +171,12 @@ public class MultiRegionTable extends HBaseTestCase {
// Assert cleaned up.
for (int i = 0; i < retries && fs.exists(parentDir); i++) {
for (int i = 0; i < retries; i++) {
if (!fs.exists(parentDir)) {
break;
}
try {
Thread.sleep(5000);
Thread.sleep(waitTime);
} catch (InterruptedException e) {
// continue
}
@ -243,7 +250,7 @@ public class MultiRegionTable extends HBaseTestCase {
* @param retries
*/
private static void recalibrate(final HTable t, final Text column,
final int retries) throws IOException {
final int retries, final long waitTime) throws IOException {
for (int i = 0; i < retries; i++) {
try {
@ -260,7 +267,7 @@ public class MultiRegionTable extends HBaseTestCase {
} catch (NotServingRegionException x) {
System.out.println("it's alright");
try {
Thread.sleep(5000);
Thread.sleep(waitTime);
} catch (InterruptedException e) {
// continue
}

View File

@ -86,25 +86,25 @@ public class StaticTestEnvironment {
} else if(value.equalsIgnoreCase("WARN")) {
logLevel = Level.WARN;
}
}
ConsoleAppender consoleAppender = null;
for(Enumeration<Appender> e = rootLogger.getAllAppenders();
e.hasMoreElements();) {
ConsoleAppender consoleAppender = null;
for(Enumeration<Appender> e = rootLogger.getAllAppenders();
e.hasMoreElements();) {
Appender a = e.nextElement();
if(a instanceof ConsoleAppender) {
consoleAppender = (ConsoleAppender)a;
break;
}
Appender a = e.nextElement();
if(a instanceof ConsoleAppender) {
consoleAppender = (ConsoleAppender)a;
break;
}
if(consoleAppender != null) {
Layout layout = consoleAppender.getLayout();
if(layout instanceof PatternLayout) {
PatternLayout consoleLayout = (PatternLayout)layout;
consoleLayout.setConversionPattern("%d %-5p [%t] %l: %m%n");
}
}
if(consoleAppender != null) {
Layout layout = consoleAppender.getLayout();
if(layout instanceof PatternLayout) {
PatternLayout consoleLayout = (PatternLayout)layout;
consoleLayout.setConversionPattern("%d %-5p [%t] %l: %m%n");
}
}
}
Logger.getLogger(
HBaseTestCase.class.getPackage().getName()).setLevel(logLevel);
}

View File

@ -68,6 +68,10 @@ public class TestLogRolling extends HBaseTestCase {
// Increase the amount of time between client retries
conf.setLong("hbase.client.pause", 15 * 1000);
// Reduce thread wake frequency so that other threads can get
// a chance to run.
conf.setInt(HConstants.THREAD_WAKE_FREQUENCY, 2 * 1000);
String className = this.getClass().getName();
StringBuilder v = new StringBuilder(className);
while (v.length() < 1000) {