HBASE-25774 TestSyncReplicationStandbyKillRS#testStandbyKillRegionServer is flaky (#3189)
Wait for the restarter thread to finish before checking the state Add more detailed logs Signed-off-by: meiyi <myimeiyi@gmail.com>
This commit is contained in:
parent
d5c5e48839
commit
50920ee306
|
@ -68,12 +68,14 @@ public class TestSyncReplicationStandbyKillRS extends SyncReplicationTestBase {
|
||||||
UTIL1.shutdownMiniCluster();
|
UTIL1.shutdownMiniCluster();
|
||||||
|
|
||||||
JVMClusterUtil.MasterThread activeMaster = UTIL2.getMiniHBaseCluster().getMasterThread();
|
JVMClusterUtil.MasterThread activeMaster = UTIL2.getMiniHBaseCluster().getMasterThread();
|
||||||
|
String threadName = "RegionServer-Restarter";
|
||||||
Thread t = new Thread(() -> {
|
Thread t = new Thread(() -> {
|
||||||
try {
|
try {
|
||||||
List<JVMClusterUtil.RegionServerThread> regionServers =
|
List<JVMClusterUtil.RegionServerThread> regionServers =
|
||||||
UTIL2.getMiniHBaseCluster().getLiveRegionServerThreads();
|
UTIL2.getMiniHBaseCluster().getLiveRegionServerThreads();
|
||||||
for (JVMClusterUtil.RegionServerThread rst : regionServers) {
|
for (JVMClusterUtil.RegionServerThread rst : regionServers) {
|
||||||
ServerName serverName = rst.getRegionServer().getServerName();
|
ServerName serverName = rst.getRegionServer().getServerName();
|
||||||
|
LOG.debug("Going to stop [{}]", serverName);
|
||||||
rst.getRegionServer().stop("Stop RS for test");
|
rst.getRegionServer().stop("Stop RS for test");
|
||||||
waitForRSShutdownToStartAndFinish(activeMaster, serverName);
|
waitForRSShutdownToStartAndFinish(activeMaster, serverName);
|
||||||
JVMClusterUtil.RegionServerThread restarted =
|
JVMClusterUtil.RegionServerThread restarted =
|
||||||
|
@ -83,9 +85,11 @@ public class TestSyncReplicationStandbyKillRS extends SyncReplicationTestBase {
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
LOG.error("Failed to kill RS", e);
|
LOG.error("Failed to kill RS", e);
|
||||||
}
|
}
|
||||||
});
|
}, threadName);
|
||||||
t.start();
|
t.start();
|
||||||
|
|
||||||
|
LOG.debug("Going to transit peer {} to {} state", PEER_ID,
|
||||||
|
SyncReplicationState.DOWNGRADE_ACTIVE);
|
||||||
// Transit standby to DA to replay logs
|
// Transit standby to DA to replay logs
|
||||||
try {
|
try {
|
||||||
UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
|
UTIL2.getAdmin().transitReplicationPeerSyncReplicationState(PEER_ID,
|
||||||
|
@ -94,11 +98,18 @@ public class TestSyncReplicationStandbyKillRS extends SyncReplicationTestBase {
|
||||||
LOG.error("Failed to transit standby cluster to " + SyncReplicationState.DOWNGRADE_ACTIVE, e);
|
LOG.error("Failed to transit standby cluster to " + SyncReplicationState.DOWNGRADE_ACTIVE, e);
|
||||||
}
|
}
|
||||||
|
|
||||||
while (UTIL2.getAdmin().getReplicationPeerSyncReplicationState(PEER_ID)
|
LOG.debug("Waiting for the restarter thread {} to quit", threadName);
|
||||||
!= SyncReplicationState.DOWNGRADE_ACTIVE) {
|
t.join();
|
||||||
|
|
||||||
|
while (UTIL2.getAdmin()
|
||||||
|
.getReplicationPeerSyncReplicationState(PEER_ID) != SyncReplicationState.DOWNGRADE_ACTIVE) {
|
||||||
|
LOG.debug("Waiting for peer {} to be in {} state", PEER_ID,
|
||||||
|
SyncReplicationState.DOWNGRADE_ACTIVE);
|
||||||
Thread.sleep(SLEEP_TIME);
|
Thread.sleep(SLEEP_TIME);
|
||||||
}
|
}
|
||||||
|
LOG.debug("Going to verify the result, {} records expected", COUNT);
|
||||||
verify(UTIL2, 0, COUNT);
|
verify(UTIL2, 0, COUNT);
|
||||||
|
LOG.debug("Verification successfully done");
|
||||||
}
|
}
|
||||||
|
|
||||||
private void waitForRSShutdownToStartAndFinish(JVMClusterUtil.MasterThread activeMaster,
|
private void waitForRSShutdownToStartAndFinish(JVMClusterUtil.MasterThread activeMaster,
|
||||||
|
@ -106,15 +117,14 @@ public class TestSyncReplicationStandbyKillRS extends SyncReplicationTestBase {
|
||||||
ServerManager sm = activeMaster.getMaster().getServerManager();
|
ServerManager sm = activeMaster.getMaster().getServerManager();
|
||||||
// First wait for it to be in dead list
|
// First wait for it to be in dead list
|
||||||
while (!sm.getDeadServers().isDeadServer(serverName)) {
|
while (!sm.getDeadServers().isDeadServer(serverName)) {
|
||||||
LOG.debug("Waiting for [" + serverName + "] to be listed as dead in master");
|
LOG.debug("Waiting for {} to be listed as dead in master", serverName);
|
||||||
Thread.sleep(SLEEP_TIME);
|
Thread.sleep(SLEEP_TIME);
|
||||||
}
|
}
|
||||||
LOG.debug("Server [" + serverName + "] marked as dead, waiting for it to " +
|
LOG.debug("Server {} marked as dead, waiting for it to finish dead processing", serverName);
|
||||||
"finish dead processing");
|
|
||||||
while (sm.areDeadServersInProgress()) {
|
while (sm.areDeadServersInProgress()) {
|
||||||
LOG.debug("Server [" + serverName + "] still being processed, waiting");
|
LOG.debug("Server {} still being processed, waiting", serverName);
|
||||||
Thread.sleep(SLEEP_TIME);
|
Thread.sleep(SLEEP_TIME);
|
||||||
}
|
}
|
||||||
LOG.debug("Server [" + serverName + "] done with server shutdown processing");
|
LOG.debug("Server {} done with server shutdown processing", serverName);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue