Fix RareClusterStateIT (#42430) (#42580)

* It looks like we might be cancelling a previous publication instead of
the one triggered by the given request with a very low likelihood.
   * Fixed by adding a wait for no in-progress publications
   * Also added debug logging that would've identified this problem
* Closes #36813
This commit is contained in:
Armin Braun 2019-05-27 13:57:17 +02:00 committed by GitHub
parent c4f44024af
commit a94d24ae5a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 11 additions and 3 deletions

View File

@ -1198,10 +1198,14 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
*/
boolean cancelCommittedPublication() {
synchronized (mutex) {
if (currentPublication.isPresent() && currentPublication.get().isCommitted()) {
currentPublication.get().cancel("cancelCommittedPublication");
if (currentPublication.isPresent()) {
final CoordinatorPublication publication = currentPublication.get();
if (publication.isCommitted()) {
publication.cancel("cancelCommittedPublication");
logger.debug("Cancelled publication of [{}].", publication);
return true;
}
}
return false;
}
}

View File

@ -141,6 +141,10 @@ public class RareClusterStateIT extends ESIntegTestCase {
private <Req extends ActionRequest, Res extends ActionResponse> ActionFuture<Res> executeAndCancelCommittedPublication(
ActionRequestBuilder<Req, Res> req) throws Exception {
// Wait for no publication in progress to not accidentally cancel a publication different from the one triggered by the given
// request.
assertBusy(
() -> assertFalse(((Coordinator) internalCluster().getCurrentMasterNodeInstance(Discovery.class)).publicationInProgress()));
ActionFuture<Res> future = req.execute();
assertBusy(
() -> assertTrue(((Coordinator)internalCluster().getCurrentMasterNodeInstance(Discovery.class)).cancelCommittedPublication()));