Fix RareClusterStateIT (#42430) (#42580)

* It looks like we might be cancelling a previous publication instead of
the one triggered by the given request with a very low likelihood.
   * Fixed by adding a wait for no in-progress publications
   * Also added debug logging that would've identified this problem
* Closes #36813
This commit is contained in:
Armin Braun 2019-05-27 13:57:17 +02:00 committed by GitHub
parent c4f44024af
commit a94d24ae5a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 11 additions and 3 deletions

View File

@ -1198,9 +1198,13 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
*/ */
boolean cancelCommittedPublication() { boolean cancelCommittedPublication() {
synchronized (mutex) { synchronized (mutex) {
if (currentPublication.isPresent() && currentPublication.get().isCommitted()) { if (currentPublication.isPresent()) {
currentPublication.get().cancel("cancelCommittedPublication"); final CoordinatorPublication publication = currentPublication.get();
return true; if (publication.isCommitted()) {
publication.cancel("cancelCommittedPublication");
logger.debug("Cancelled publication of [{}].", publication);
return true;
}
} }
return false; return false;
} }

View File

@ -141,6 +141,10 @@ public class RareClusterStateIT extends ESIntegTestCase {
private <Req extends ActionRequest, Res extends ActionResponse> ActionFuture<Res> executeAndCancelCommittedPublication( private <Req extends ActionRequest, Res extends ActionResponse> ActionFuture<Res> executeAndCancelCommittedPublication(
ActionRequestBuilder<Req, Res> req) throws Exception { ActionRequestBuilder<Req, Res> req) throws Exception {
// Wait for no publication in progress to not accidentally cancel a publication different from the one triggered by the given
// request.
assertBusy(
() -> assertFalse(((Coordinator) internalCluster().getCurrentMasterNodeInstance(Discovery.class)).publicationInProgress()));
ActionFuture<Res> future = req.execute(); ActionFuture<Res> future = req.execute();
assertBusy( assertBusy(
() -> assertTrue(((Coordinator)internalCluster().getCurrentMasterNodeInstance(Discovery.class)).cancelCommittedPublication())); () -> assertTrue(((Coordinator)internalCluster().getCurrentMasterNodeInstance(Discovery.class)).cancelCommittedPublication()));