From b906980e149353904392bd7461766fd138eda95e Mon Sep 17 00:00:00 2001 From: Ted Yu Date: Mon, 4 Dec 2017 14:12:59 +0800 Subject: [PATCH] HBASE-19056 TestCompactionInDeadRegionServer is top of the flakies charts! Signed-off-by: zhangduo --- .../hadoop/hbase/HBaseTestingUtility.java | 15 ++++++++++++ .../TestCompactionInDeadRegionServer.java | 24 ++++++++++++++++--- 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java index 4e65651b566..92da89a12a9 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/HBaseTestingUtility.java @@ -2510,6 +2510,21 @@ public class HBaseTestingUtility extends HBaseCommonTestingUtility { return rows; } + /* + * Find any other region server which is different from the one identified by parameter + * @param rs + * @return another region server + */ + public HRegionServer getOtherRegionServer(HRegionServer rs) { + for (JVMClusterUtil.RegionServerThread rst : + getMiniHBaseCluster().getRegionServerThreads()) { + if (!(rst.getRegionServer() == rs)) { + return rst.getRegionServer(); + } + } + return null; + } + /** * Tool to get the reference to the region server object that holds the * region of the specified user table. diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactionInDeadRegionServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactionInDeadRegionServer.java index 9bd74d021a5..1b39a6d3630 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactionInDeadRegionServer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestCompactionInDeadRegionServer.java @@ -24,6 +24,8 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; @@ -59,6 +61,7 @@ import org.junit.runners.Parameterized.Parameters; @RunWith(Parameterized.class) @Category({ RegionServerTests.class, LargeTests.class }) public class TestCompactionInDeadRegionServer { + private static final Log LOG = LogFactory.getLog(TestCompactionInDeadRegionServer.class); private static final HBaseTestingUtility UTIL = new HBaseTestingUtility(); @@ -91,7 +94,7 @@ public class TestCompactionInDeadRegionServer { @Parameters(name = "{index}: wal={0}") public static List params() { return Arrays.asList(new Object[] { FSHLogProvider.class }, - new Object[] { AsyncFSWALProvider.class }); + new Object[] { AsyncFSWALProvider.class }); } @Before @@ -119,12 +122,27 @@ public class TestCompactionInDeadRegionServer { @Test public void test() throws Exception { + HRegionServer regionSvr = UTIL.getRSForFirstRegionInTable(TABLE_NAME); + HRegion region = regionSvr.getRegions(TABLE_NAME).get(0); + String regName = region.getRegionInfo().getEncodedName(); + List metaRegs = regionSvr.getRegions(TableName.META_TABLE_NAME); + if (metaRegs != null && !metaRegs.isEmpty()) { + LOG.info("meta is on the same server: " + regionSvr); + // when region is on same server as hbase:meta, reassigning meta would abort the server + // since WAL is broken. + // so the region is moved to a different server + HRegionServer otherRs = UTIL.getOtherRegionServer(regionSvr); + UTIL.moveRegionAndWait(region.getRegionInfo(), otherRs.getServerName()); + LOG.info("Moved region: " + regName + " to " + otherRs.getServerName()); + } HRegionServer rsToSuspend = UTIL.getRSForFirstRegionInTable(TABLE_NAME); - HRegion region = (HRegion) rsToSuspend.getRegions(TABLE_NAME).get(0); + region = rsToSuspend.getRegions(TABLE_NAME).get(0); + ZKWatcher watcher = UTIL.getZooKeeperWatcher(); watcher.getRecoverableZooKeeper().delete( ZNodePaths.joinZNode(watcher.getZNodePaths().rsZNode, rsToSuspend.getServerName().toString()), -1); + LOG.info("suspending " + rsToSuspend); UTIL.waitFor(60000, 1000, new ExplainingPredicate() { @Override @@ -148,7 +166,7 @@ public class TestCompactionInDeadRegionServer { fail("Should fail as our wal file has already been closed, " + "and walDir has also been renamed"); } catch (Exception e) { - // expected + LOG.debug("expected exception: ", e); } Table table = UTIL.getConnection().getTable(TABLE_NAME); // should not hit FNFE