From 421400c2561af04c72617b28a832bc9fdc8af2ae Mon Sep 17 00:00:00 2001 From: mbautin Date: Wed, 18 Apr 2012 00:22:54 +0000 Subject: [PATCH] [jira] [HBASE-5763] Fix random failures in TestFSErrorsExposed Summary: TestFSErrorsExposed frequently fails due to unclean mini-cluster shutdown. Bringing datanodes back up, waiting for some time, and preemptively killing all regionservers and the master before shutdown. This is the trunk fix. The 89-fb patch is at D2739. Test Plan: Run TestFSErrorsExposed 100 times Reviewers: stack, tedyu, jdcryans, lhofhansl, jmhsieh, JIRA Reviewed By: tedyu Differential Revision: https://reviews.facebook.net/D2793 git-svn-id: https://svn.apache.org/repos/asf/hbase/trunk@1327337 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/hadoop/hbase/MiniHBaseCluster.java | 15 +++++++++++++++ .../hbase/regionserver/TestFSErrorsExposed.java | 13 +++++++++---- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java b/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java index 6af9188ca0e..72554cb0e56 100644 --- a/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java +++ b/src/test/java/org/apache/hadoop/hbase/MiniHBaseCluster.java @@ -36,6 +36,8 @@ import org.apache.hadoop.hbase.regionserver.HRegion; import org.apache.hadoop.hbase.regionserver.HRegionServer; import org.apache.hadoop.hbase.security.User; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.JVMClusterUtil.MasterThread; +import org.apache.hadoop.hbase.util.JVMClusterUtil.RegionServerThread; import org.apache.hadoop.hbase.util.JVMClusterUtil; import org.apache.hadoop.hbase.util.Threads; import org.apache.hadoop.io.MapWritable; @@ -529,4 +531,17 @@ public class MiniHBaseCluster { } return count; } + + /** + * Do a simulated kill all masters and regionservers. Useful when it is + * impossible to bring the mini-cluster back for clean shutdown. + */ + public void killAll() { + for (RegionServerThread rst : getRegionServerThreads()) { + rst.getRegionServer().abort("killAll"); + } + for (MasterThread masterThread : getMasterThreads()) { + masterThread.getMaster().abort("killAll", new Throwable()); + } + } } diff --git a/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java b/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java index 89dfbf74af8..3ed3eaf8785 100644 --- a/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java +++ b/src/test/java/org/apache/hadoop/hbase/regionserver/TestFSErrorsExposed.java @@ -50,7 +50,6 @@ import org.apache.hadoop.hbase.util.Bytes; import org.junit.Test; import org.junit.experimental.categories.Category; - /** * Test cases that ensure that file system level errors are bubbled up * appropriately to clients, rather than swallowed. @@ -163,13 +162,16 @@ public class TestFSErrorsExposed { * removes the data from HDFS underneath it, and ensures that * errors are bubbled to the client. */ - @Test + @Test(timeout=5 * 60 * 1000) public void testFullSystemBubblesFSErrors() throws Exception { try { // We set it not to run or it will trigger server shutdown while sync'ing // because all the datanodes are bad util.getConfiguration().setInt( "hbase.regionserver.optionallogflushinterval", Integer.MAX_VALUE); + + util.getConfiguration().setInt("hbase.client.retries.number", 3); + util.startMiniCluster(1); byte[] tableName = Bytes.toBytes("table"); byte[] fam = Bytes.toBytes("fam"); @@ -204,7 +206,11 @@ public class TestFSErrorsExposed { assertTrue(e.getMessage().contains("Could not seek")); } + // Restart data nodes so that HBase can shut down cleanly. + util.getDFSCluster().restartDataNodes(); + } finally { + util.getMiniHBaseCluster().killAll(); util.shutdownMiniCluster(); } } @@ -232,7 +238,7 @@ public class TestFSErrorsExposed { for (SoftReference is: inStreams) { is.get().startFaults(); } - } + } } static class FaultyInputStream extends FSDataInputStream { @@ -265,4 +271,3 @@ public class TestFSErrorsExposed { public org.apache.hadoop.hbase.ResourceCheckerJUnitRule cu = new org.apache.hadoop.hbase.ResourceCheckerJUnitRule(); } -