Fix RareClusterStateIT (#42430) (#42580)

* It looks like we might be cancelling a previous publication instead of the one triggered by the given request with a very low likelihood. * Fixed by adding a wait for no in-progress publications * Also added debug logging that would've identified this problem * Closes #36813
2019-05-27 13:57:17 +02:00 · 2019-05-27 13:57:17 +02:00 · a94d24ae5a
parent c4f44024af
commit a94d24ae5a
2 changed files with 11 additions and 3 deletions
--- a/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java
+++ b/server/src/main/java/org/elasticsearch/cluster/coordination/Coordinator.java
@ -1198,9 +1198,13 @@ public class Coordinator extends AbstractLifecycleComponent implements Discovery
     */
    boolean cancelCommittedPublication() {
        synchronized (mutex) {
-            if (currentPublication.isPresent() && currentPublication.get().isCommitted()) {
-                currentPublication.get().cancel("cancelCommittedPublication");
-                return true;
+            if (currentPublication.isPresent()) {
+                final CoordinatorPublication publication = currentPublication.get();
+                if (publication.isCommitted()) {
+                    publication.cancel("cancelCommittedPublication");
+                    logger.debug("Cancelled publication of [{}].", publication);
+                    return true;
+                }
            }
            return false;
        }
--- a/server/src/test/java/org/elasticsearch/cluster/coordination/RareClusterStateIT.java
+++ b/server/src/test/java/org/elasticsearch/cluster/coordination/RareClusterStateIT.java
@ -141,6 +141,10 @@ public class RareClusterStateIT extends ESIntegTestCase {

    private <Req extends ActionRequest, Res extends ActionResponse> ActionFuture<Res> executeAndCancelCommittedPublication(
            ActionRequestBuilder<Req, Res> req) throws Exception {
+        // Wait for no publication in progress to not accidentally cancel a publication different from the one triggered by the given
+        // request.
+        assertBusy(
+            () -> assertFalse(((Coordinator) internalCluster().getCurrentMasterNodeInstance(Discovery.class)).publicationInProgress()));
        ActionFuture<Res> future = req.execute();
        assertBusy(
            () -> assertTrue(((Coordinator)internalCluster().getCurrentMasterNodeInstance(Discovery.class)).cancelCommittedPublication()));