From a94d24ae5ad2f7a1695cf83c2ad191c456c7dab4 Mon Sep 17 00:00:00 2001 From: Armin Braun Date: Mon, 27 May 2019 13:57:17 +0200 Subject: [PATCH] Fix RareClusterStateIT (#42430) (#42580) * It looks like we might be cancelling a previous publication instead of the one triggered by the given request with a very low likelihood. * Fixed by adding a wait for no in-progress publications * Also added debug logging that would've identified this problem * Closes #36813 --- .../cluster/coordination/Coordinator.java | 10 +++++++--- .../cluster/coordination/RareClusterStateIT.java | 4 ++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java index e6acac17da9..af9a38bec49 100644 --- a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java +++ b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java @@ -1198,9 +1198,13 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery */ boolean cancelCommittedPublication() { synchronized (mutex) { - if (currentPublication.isPresent() && currentPublication.get().isCommitted()) { - currentPublication.get().cancel("cancelCommittedPublication"); - return true; + if (currentPublication.isPresent()) { + final CoordinatorPublication publication = currentPublication.get(); + if (publication.isCommitted()) { + publication.cancel("cancelCommittedPublication"); + logger.debug("Cancelled publication of [{}].", publication); + return true; + } } return false; } diff --git a/server/src/test/java/org/elasticsearch/cluster/coordination/RareClusterStateIT.java b/server/src/test/java/org/elasticsearch/cluster/coordination/RareClusterStateIT.java index 4c0168c6e3a..614bede3288 100644 --- a/server/src/test/java/org/elasticsearch/cluster/coordination/RareClusterStateIT.java +++ b/server/src/test/java/org/elasticsearch/cluster/coordination/RareClusterStateIT.java @@ -141,6 +141,10 @@ public class RareClusterStateIT extends ESIntegTestCase { private ActionFuture executeAndCancelCommittedPublication( ActionRequestBuilder req) throws Exception { + // Wait for no publication in progress to not accidentally cancel a publication different from the one triggered by the given + // request. + assertBusy( + () -> assertFalse(((Coordinator) internalCluster().getCurrentMasterNodeInstance(Discovery.class)).publicationInProgress())); ActionFuture future = req.execute(); assertBusy( () -> assertTrue(((Coordinator)internalCluster().getCurrentMasterNodeInstance(Discovery.class)).cancelCommittedPublication()));