Drain all disruption events until all nodes are healthy on AbstractCoordinatorTestCase (#62987)

Backport of #62954
Closes #61711
This commit is contained in:
Francisco Fernández Castaño 2020-09-29 10:38:10 +02:00 committed by GitHub
parent f23603dafd
commit e0f6b296ff
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 11 additions and 3 deletions

View File

@ -371,14 +371,20 @@ public class AbstractCoordinatorTestCase extends ESTestCase {
final int thisStep = step; // for lambdas
if (randomSteps <= step && finishTime == -1) {
finishTime = deterministicTaskQueue.getLatestDeferredExecutionTime();
if (coolDown) {
// Heal all nodes BEFORE finishTime is set so it can take into account any pending disruption that
// would prevent the cluster to reach a stable state after cooling down. Additionally, avoid any new disruptions
// to happen in this phase.
// See #61711 for a particular instance where having unhealthy nodes while cooling down can be problematic.
disconnectedNodes.clear();
blackholedNodes.clear();
deterministicTaskQueue.setExecutionDelayVariabilityMillis(DEFAULT_DELAY_VARIABILITY);
logger.debug("----> [runRandomly {}] reducing delay variability and running until [{}ms]", step, finishTime);
} else {
logger.debug("----> [runRandomly {}] running until [{}ms] with delay variability of [{}ms]", step, finishTime,
deterministicTaskQueue.getExecutionDelayVariabilityMillis());
}
finishTime = deterministicTaskQueue.getLatestDeferredExecutionTime();
}
try {
@ -445,12 +451,14 @@ public class AbstractCoordinatorTestCase extends ESTestCase {
}
break;
case 1:
if (clusterNode.disconnect()) {
// Avoid disruptions during cool down period
if (finishTime == -1 && clusterNode.disconnect()) {
logger.debug("----> [runRandomly {}] disconnecting {}", step, clusterNode.getId());
}
break;
case 2:
if (clusterNode.blackhole()) {
// Avoid disruptions during cool down period
if (finishTime == -1 && clusterNode.blackhole()) {
logger.debug("----> [runRandomly {}] blackholing {}", step, clusterNode.getId());
}
break;