From 5e65da64e229b737b71541c8bbc4a5514760d525 Mon Sep 17 00:00:00 2001 From: niuyulin Date: Fri, 7 May 2021 18:58:45 +0800 Subject: [PATCH] HBASE-25837 TestRollingRestart is flaky (#3220) Signed-off-by: Duo Zhang --- .../hbase/master/TestRollingRestart.java | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java index 7530121bbef..3ac054c217b 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestRollingRestart.java @@ -38,6 +38,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.RegionInfo; import org.apache.hadoop.hbase.client.RegionLocator; import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; import org.apache.hadoop.hbase.testclassification.LargeTests; import org.apache.hadoop.hbase.testclassification.MasterTests; import org.apache.hadoop.hbase.util.Bytes; @@ -68,6 +69,7 @@ public class TestRollingRestart { private static final Logger LOG = LoggerFactory.getLogger(TestRollingRestart.class); + private static HBaseTestingUtility TEST_UTIL; @Rule public TestName name = new TestName(); @@ -89,7 +91,7 @@ public class TestRollingRestart { Configuration conf = HBaseConfiguration.create(); conf.setBoolean(HConstants.HBASE_SPLIT_WAL_COORDINATED_BY_ZK, splitWALCoordinatedByZK); - HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(conf); + TEST_UTIL = new HBaseTestingUtility(conf); StartMiniClusterOption option = StartMiniClusterOption.builder() .numMasters(NUM_MASTERS).numRegionServers(NUM_RS).numDataNodes(NUM_RS).build(); TEST_UTIL.startMiniCluster(option); @@ -218,8 +220,18 @@ public class TestRollingRestart { TEST_UTIL.shutdownMiniCluster(); } + /** + * Checks if the SCP of specific dead server has been executed. + * @return true if the SCP of specific serverName has been executed, false if not + */ + private boolean isDeadServerSCPExecuted(ServerName serverName) throws IOException { + return TEST_UTIL.getMiniHBaseCluster().getMaster().getProcedures().stream() + .anyMatch(p -> p instanceof ServerCrashProcedure + && ((ServerCrashProcedure) p).getServerName().equals(serverName)); + } + private void waitForRSShutdownToStartAndFinish(MasterThread activeMaster, - ServerName serverName) throws InterruptedException { + ServerName serverName) throws InterruptedException, IOException { ServerManager sm = activeMaster.getMaster().getServerManager(); // First wait for it to be in dead list while (!sm.getDeadServers().isDeadServer(serverName)) { @@ -228,6 +240,9 @@ public class TestRollingRestart { } log("Server [" + serverName + "] marked as dead, waiting for it to " + "finish dead processing"); + + TEST_UTIL.waitFor(60000, () -> isDeadServerSCPExecuted(serverName)); + while (sm.areDeadServersInProgress()) { log("Server [" + serverName + "] still being processed, waiting"); Thread.sleep(100);