HBASE-25334 TestRSGroupsFallback.testFallback is flaky (#2728)

Signed-off-by: Duo Zhang <zhangduo@apache.org>
This commit is contained in:
haxl 2020-12-10 22:07:06 +08:00 committed by GitHub
parent 7a532f8328
commit a9b8c10f80
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 16 additions and 6 deletions

View File

@ -501,7 +501,8 @@ public class ServerManager {
* @return true if any RS are being processed as dead, false if not
*/
public boolean areDeadServersInProgress() throws IOException {
return master.getProcedures().stream().anyMatch(p -> p instanceof ServerCrashProcedure);
return master.getProcedures().stream()
.anyMatch(p -> !p.isFinished() && p instanceof ServerCrashProcedure);
}
void letRegionServersShutdown() {

View File

@ -25,6 +25,7 @@ import java.util.List;
import java.util.Set;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv;
import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure;
@ -93,15 +94,19 @@ public class TestDeadServer {
}
@Test
public void testCrashProcedureReplay() throws IOException {
public void testCrashProcedureReplay() throws Exception {
HMaster master = TEST_UTIL.getHBaseCluster().getMaster();
final ProcedureExecutor<MasterProcedureEnv> pExecutor = master.getMasterProcedureExecutor();
ServerCrashProcedure proc = new ServerCrashProcedure(
pExecutor.getEnvironment(), hostname123, false, false);
pExecutor.stop();
ProcedureTestingUtility.submitAndWait(pExecutor, proc);
assertTrue(master.getServerManager().areDeadServersInProgress());
ProcedureTestingUtility.restart(pExecutor);
ProcedureTestingUtility.waitProcedure(pExecutor, proc);
assertFalse(master.getServerManager().areDeadServersInProgress());
}
@Test

View File

@ -25,12 +25,14 @@ import java.util.Collections;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.master.assignment.AssignmentTestingUtil;
import org.apache.hadoop.hbase.net.Address;
import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility;
import org.apache.hadoop.hbase.testclassification.MediumTests;
import org.apache.hadoop.hbase.testclassification.RSGroupTests;
import org.apache.hadoop.hbase.util.Bytes;
@ -61,6 +63,7 @@ public class TestRSGroupsFallback extends TestRSGroupsBase {
public static void setUp() throws Exception {
Configuration conf = TEST_UTIL.getConfiguration();
conf.setBoolean(RSGroupBasedLoadBalancer.FALLBACK_GROUP_ENABLE_KEY, true);
conf.setInt(HConstants.HBASE_BALANCER_MAX_BALANCING, 0);
setUpTestBeforeClass();
MASTER.balanceSwitch(true);
}
@ -103,7 +106,7 @@ public class TestRSGroupsFallback extends TestRSGroupsBase {
// add a new server to default group, regions move to default group
TEST_UTIL.getMiniHBaseCluster().startRegionServerAndWait(60000);
MASTER.balance();
assertTrue(MASTER.balance());
assertRegionsInGroup(tableName, RSGroupInfo.DEFAULT_GROUP);
// add a new server to test group, regions move back
@ -111,14 +114,15 @@ public class TestRSGroupsFallback extends TestRSGroupsBase {
TEST_UTIL.getMiniHBaseCluster().startRegionServerAndWait(60000);
ADMIN.moveServersToRSGroup(
Collections.singleton(t.getRegionServer().getServerName().getAddress()), groupName);
MASTER.balance();
assertTrue(MASTER.balance());
assertRegionsInGroup(tableName, groupName);
TEST_UTIL.deleteTable(tableName);
}
private void assertRegionsInGroup(TableName table, String group) throws IOException {
TEST_UTIL.waitUntilAllRegionsAssigned(table);
ProcedureTestingUtility.waitAllProcedures(
TEST_UTIL.getMiniHBaseCluster().getMaster().getMasterProcedureExecutor());
RSGroupInfo rsGroup = ADMIN.getRSGroup(group);
MASTER.getAssignmentManager().getRegionStates().getRegionsOfTable(table).forEach(region -> {
Address regionOnServer = MASTER.getAssignmentManager().getRegionStates()