Drain all disruption events until all nodes are healthy on AbstractCoordinatorTestCase (#62987)
Backport of #62954 Closes #61711
This commit is contained in:
parent
f23603dafd
commit
e0f6b296ff
|
@ -371,14 +371,20 @@ public class AbstractCoordinatorTestCase extends ESTestCase {
|
|||
final int thisStep = step; // for lambdas
|
||||
|
||||
if (randomSteps <= step && finishTime == -1) {
|
||||
finishTime = deterministicTaskQueue.getLatestDeferredExecutionTime();
|
||||
if (coolDown) {
|
||||
// Heal all nodes BEFORE finishTime is set so it can take into account any pending disruption that
|
||||
// would prevent the cluster to reach a stable state after cooling down. Additionally, avoid any new disruptions
|
||||
// to happen in this phase.
|
||||
// See #61711 for a particular instance where having unhealthy nodes while cooling down can be problematic.
|
||||
disconnectedNodes.clear();
|
||||
blackholedNodes.clear();
|
||||
deterministicTaskQueue.setExecutionDelayVariabilityMillis(DEFAULT_DELAY_VARIABILITY);
|
||||
logger.debug("----> [runRandomly {}] reducing delay variability and running until [{}ms]", step, finishTime);
|
||||
} else {
|
||||
logger.debug("----> [runRandomly {}] running until [{}ms] with delay variability of [{}ms]", step, finishTime,
|
||||
deterministicTaskQueue.getExecutionDelayVariabilityMillis());
|
||||
}
|
||||
finishTime = deterministicTaskQueue.getLatestDeferredExecutionTime();
|
||||
}
|
||||
|
||||
try {
|
||||
|
@ -445,12 +451,14 @@ public class AbstractCoordinatorTestCase extends ESTestCase {
|
|||
}
|
||||
break;
|
||||
case 1:
|
||||
if (clusterNode.disconnect()) {
|
||||
// Avoid disruptions during cool down period
|
||||
if (finishTime == -1 && clusterNode.disconnect()) {
|
||||
logger.debug("----> [runRandomly {}] disconnecting {}", step, clusterNode.getId());
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
if (clusterNode.blackhole()) {
|
||||
// Avoid disruptions during cool down period
|
||||
if (finishTime == -1 && clusterNode.blackhole()) {
|
||||
logger.debug("----> [runRandomly {}] blackholing {}", step, clusterNode.getId());
|
||||
}
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue