HBASE-19837 Flakey TestRegionLoad; ADDENDUM Report more often and wait less time on change (also add some debug on TestMetaShutdown test)

This commit is contained in:
Michael Stack 2018-02-05 08:39:46 -08:00
parent c5f86f2ce4
commit c245bd5c03
2 changed files with 21 additions and 16 deletions

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -64,6 +64,9 @@ public class TestRegionLoad {
@BeforeClass @BeforeClass
public static void beforeClass() throws Exception { public static void beforeClass() throws Exception {
// Make servers report eagerly. This test is about looking at the cluster status reported.
// Make it so we don't have to wait around too long to see change.
UTIL.getConfiguration().setInt("hbase.regionserver.msginterval", 500);
UTIL.startMiniCluster(4); UTIL.startMiniCluster(4);
admin = UTIL.getAdmin(); admin = UTIL.getAdmin();
admin.setBalancerRunning(false, true); admin.setBalancerRunning(false, true);
@ -114,10 +117,11 @@ public class TestRegionLoad {
} }
checkRegionsAndRegionLoads(tableRegions, regionLoads); checkRegionsAndRegionLoads(tableRegions, regionLoads);
} }
int pause = UTIL.getConfiguration().getInt("hbase.regionserver.msginterval", 3000);
// Just wait here. If this fixes the test, come back and do a better job. // Just wait here. If this fixes the test, come back and do a better job.
// Thought is that cluster status is stale. // Would have to redo the below so can wait on cluster status changing.
Threads.sleep(10000); Threads.sleep(2 * pause);
// Check RegionLoad matches the regionLoad from ClusterStatus // Check RegionLoad matches the regionLoad from ClusterStatus
ClusterStatus clusterStatus ClusterStatus clusterStatus

View File

@ -1,4 +1,4 @@
/** /*
* Licensed to the Apache Software Foundation (ASF) under one * Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file * or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information * distributed with this work for additional information
@ -23,7 +23,6 @@ import static org.junit.Assert.assertTrue;
import java.io.IOException; import java.io.IOException;
import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CoordinatedStateManager;
import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseClassTestRule;
import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HBaseTestingUtility;
import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionInfo;
@ -43,13 +42,15 @@ import org.junit.BeforeClass;
import org.junit.ClassRule; import org.junit.ClassRule;
import org.junit.Test; import org.junit.Test;
import org.junit.experimental.categories.Category; import org.junit.experimental.categories.Category;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/** /**
* Tests handling of meta-carrying region server failover. * Tests handling of meta-carrying region server failover.
*/ */
@Category(MediumTests.class) @Category(MediumTests.class)
public class TestMetaShutdownHandler { public class TestMetaShutdownHandler {
private static final Logger LOG = LoggerFactory.getLogger(TestMetaShutdownHandler.class);
@ClassRule @ClassRule
public static final HBaseClassTestRule CLASS_RULE = public static final HBaseClassTestRule CLASS_RULE =
HBaseClassTestRule.forClass(TestMetaShutdownHandler.class); HBaseClassTestRule.forClass(TestMetaShutdownHandler.class);
@ -80,7 +81,6 @@ public class TestMetaShutdownHandler {
@Test (timeout=180000) @Test (timeout=180000)
public void testExpireMetaRegionServer() throws Exception { public void testExpireMetaRegionServer() throws Exception {
MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster();
HMaster master = cluster.getMaster(); HMaster master = cluster.getMaster();
RegionStates regionStates = master.getAssignmentManager().getRegionStates(); RegionStates regionStates = master.getAssignmentManager().getRegionStates();
ServerName metaServerName = regionStates.getRegionServerOfRegion( ServerName metaServerName = regionStates.getRegionServerOfRegion(
@ -88,23 +88,23 @@ public class TestMetaShutdownHandler {
if (master.getServerName().equals(metaServerName) || metaServerName == null if (master.getServerName().equals(metaServerName) || metaServerName == null
|| !metaServerName.equals(cluster.getServerHoldingMeta())) { || !metaServerName.equals(cluster.getServerHoldingMeta())) {
// Move meta off master // Move meta off master
metaServerName = cluster.getLiveRegionServerThreads() metaServerName =
.get(0).getRegionServer().getServerName(); cluster.getLiveRegionServerThreads().get(0).getRegionServer().getServerName();
master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(), master.move(HRegionInfo.FIRST_META_REGIONINFO.getEncodedNameAsBytes(),
Bytes.toBytes(metaServerName.getServerName())); Bytes.toBytes(metaServerName.getServerName()));
TEST_UTIL.waitUntilNoRegionsInTransition(60000); TEST_UTIL.waitUntilNoRegionsInTransition(60000);
metaServerName = regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO);
} }
RegionState metaState = RegionState metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
MetaTableLocator.getMetaRegionState(master.getZooKeeper()); assertEquals("Wrong state for meta!", RegionState.State.OPEN, metaState.getState());
assertEquals("Meta should be not in transition", RegionState.State.OPEN, metaState.getState()); assertNotEquals("Meta is on master!", metaServerName, master.getServerName());
assertNotEquals("Meta should be moved off master",
metaServerName, master.getServerName());
// Delete the ephemeral node of the meta-carrying region server. // Delete the ephemeral node of the meta-carrying region server.
// This is trigger the expire of this region server on the master. // This is trigger the expire of this region server on the master.
String rsEphemeralNodePath = String rsEphemeralNodePath =
ZNodePaths.joinZNode(master.getZooKeeper().znodePaths.rsZNode, metaServerName.toString()); ZNodePaths.joinZNode(master.getZooKeeper().znodePaths.rsZNode, metaServerName.toString());
ZKUtil.deleteNode(master.getZooKeeper(), rsEphemeralNodePath); ZKUtil.deleteNode(master.getZooKeeper(), rsEphemeralNodePath);
LOG.info("Deleted the znode for the RegionServer hosting hbase:meta; waiting on SSH");
// Wait for SSH to finish // Wait for SSH to finish
final ServerManager serverManager = master.getServerManager(); final ServerManager serverManager = master.getServerManager();
final ServerName priorMetaServerName = metaServerName; final ServerName priorMetaServerName = metaServerName;
@ -115,14 +115,15 @@ public class TestMetaShutdownHandler {
&& !serverManager.areDeadServersInProgress(); && !serverManager.areDeadServersInProgress();
} }
}); });
LOG.info("Past wait on RIT");
TEST_UTIL.waitUntilNoRegionsInTransition(60000); TEST_UTIL.waitUntilNoRegionsInTransition(60000);
// Now, make sure meta is assigned // Now, make sure meta is assigned
assertTrue("Meta should be assigned", assertTrue("Meta should be assigned",
regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO)); regionStates.isRegionOnline(HRegionInfo.FIRST_META_REGIONINFO));
// Now, make sure meta is registered in zk // Now, make sure meta is registered in zk
metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper()); metaState = MetaTableLocator.getMetaRegionState(master.getZooKeeper());
assertEquals("Meta should be not in transition", RegionState.State.OPEN, metaState.getState()); assertEquals("Meta should not be in transition", RegionState.State.OPEN,
metaState.getState());
assertEquals("Meta should be assigned", metaState.getServerName(), assertEquals("Meta should be assigned", metaState.getServerName(),
regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO)); regionStates.getRegionServerOfRegion(HRegionInfo.FIRST_META_REGIONINFO));
assertNotEquals("Meta should be assigned on a different server", assertNotEquals("Meta should be assigned on a different server",