From 98b7aa77655872bc972e97251d7884745975ccb2 Mon Sep 17 00:00:00 2001 From: haxl Date: Thu, 17 Dec 2020 05:21:53 +0800 Subject: [PATCH] HBASE-25334 TestRSGroupsFallback.testFallback is flaky (#2775) Signed-off-by: stack --- .../hbase/rsgroup/TestRSGroupsFallback.java | 25 ++++++++++++------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsFallback.java b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsFallback.java index d9b35591635..15220c4b5b4 100644 --- a/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsFallback.java +++ b/hbase-rsgroup/src/test/java/org/apache/hadoop/hbase/rsgroup/TestRSGroupsFallback.java @@ -24,16 +24,17 @@ import java.util.Collections; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; import org.apache.hadoop.hbase.client.TableDescriptor; import org.apache.hadoop.hbase.client.TableDescriptorBuilder; import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil; import org.apache.hadoop.hbase.net.Address; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.JVMClusterUtil; -import org.apache.hadoop.hbase.util.Threads; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; @@ -59,6 +60,7 @@ public class TestRSGroupsFallback extends TestRSGroupsBase { public static void setUp() throws Exception { Configuration conf = TEST_UTIL.getConfiguration(); conf.setBoolean(RSGroupBasedLoadBalancer.FALLBACK_GROUP_ENABLE_KEY, true); + conf.setInt(HConstants.HBASE_BALANCER_MAX_BALANCING, 0); setUpTestBeforeClass(); master.balanceSwitch(true); } @@ -100,27 +102,32 @@ public class TestRSGroupsFallback extends TestRSGroupsBase { assertRegionsInGroup(tableName, FALLBACK_GROUP); // add a new server to default group, regions move to default group - TEST_UTIL.getMiniHBaseCluster().startRegionServerAndWait(60000); - master.balance(); + JVMClusterUtil.RegionServerThread t = + TEST_UTIL.getMiniHBaseCluster().startRegionServerAndWait(60000); + Address startRSAddress = t.getRegionServer().getServerName().getAddress(); + TEST_UTIL.waitFor(3000, () -> rsGroupAdmin.getRSGroupInfo(RSGroupInfo.DEFAULT_GROUP) + .containsServer(startRSAddress)); + assertTrue(master.balance()); assertRegionsInGroup(tableName, RSGroupInfo.DEFAULT_GROUP); // add a new server to test group, regions move back - JVMClusterUtil.RegionServerThread t = - TEST_UTIL.getMiniHBaseCluster().startRegionServerAndWait(60000); + t = TEST_UTIL.getMiniHBaseCluster().startRegionServerAndWait(60000); rsGroupAdmin.moveServers( Collections.singleton(t.getRegionServer().getServerName().getAddress()), groupName); - master.balance(); + assertTrue(master.balance()); assertRegionsInGroup(tableName, groupName); TEST_UTIL.deleteTable(tableName); } private void assertRegionsInGroup(TableName tableName, String group) throws IOException { - RSGroupInfo fallbackGroup = rsGroupAdmin.getRSGroupInfo(group); + ProcedureTestingUtility.waitAllProcedures( + TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor()); + RSGroupInfo groupInfo = rsGroupAdmin.getRSGroupInfo(group); master.getAssignmentManager().getRegionStates().getRegionsOfTable(tableName).forEach(region -> { Address regionOnServer = master.getAssignmentManager().getRegionStates() .getRegionAssignments().get(region).getAddress(); - assertTrue(fallbackGroup.getServers().contains(regionOnServer)); + assertTrue(groupInfo.getServers().contains(regionOnServer)); }); } @@ -128,7 +135,7 @@ public class TestRSGroupsFallback extends TestRSGroupsBase { for (Address server : rsGroupAdmin.getRSGroupInfo(groupName).getServers()) { AssignmentTestingUtil.crashRs(TEST_UTIL, getServerName(server), true); } - Threads.sleep(1000); + TEST_UTIL.waitFor(30000, () -> !master.getServerManager().areDeadServersInProgress()); TEST_UTIL.waitUntilNoRegionsInTransition(60000); } }