diff --git a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SnapshotLifecycleServiceTests.java b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SnapshotLifecycleServiceTests.java index 874c2ce452d..d55d25d3e9c 100644 --- a/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SnapshotLifecycleServiceTests.java +++ b/x-pack/plugin/ilm/src/test/java/org/elasticsearch/xpack/slm/SnapshotLifecycleServiceTests.java @@ -31,6 +31,7 @@ import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.concurrent.CopyOnWriteArrayList; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; @@ -40,6 +41,7 @@ import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.containsString; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.greaterThan; +import static org.hamcrest.Matchers.hasItem; public class SnapshotLifecycleServiceTests extends ESTestCase { @@ -150,7 +152,6 @@ public class SnapshotLifecycleServiceTests extends ESTestCase { * Test new policies getting scheduled correctly, updated policies also being scheduled, * and deleted policies having their schedules cancelled. */ - @AwaitsFix(bugUrl = "https://github.com/elastic/elasticsearch/issues/44997") public void testPolicyCRUD() throws Exception { ClockMock clock = new ClockMock(); final AtomicInteger triggerCount = new AtomicInteger(0); @@ -206,13 +207,20 @@ public class SnapshotLifecycleServiceTests extends ESTestCase { sls.clusterChanged(event); assertThat(sls.getScheduler().scheduledJobIds(), equalTo(Collections.singleton("foo-2"))); + CopyOnWriteArrayList triggeredJobs = new CopyOnWriteArrayList<>(); trigger.set(e -> { - // Make sure the job got updated - assertThat(e.getJobName(), equalTo("foo-2")); + triggeredJobs.add(e.getJobName()); triggerCount.incrementAndGet(); }); clock.fastForwardSeconds(1); + // Let's make sure the job got updated + // We don't simply assert that triggeredJobs has one element with value "foo-2" because of a race condition that can see the + // list containing . This can happen because when we update the policy to version 2 (ie. to foo-2) we will + // cancel the existing policy (foo-1) without waiting for the thread executing foo-1 to actually interrupt + // (see org.elasticsearch.common.util.concurrent.FutureUtils#cancel) which means that foo-1 could actually get to be + // rescheduled and re-run before it is indeed cancelled. + assertBusy(() -> assertThat(triggeredJobs, hasItem("foo-2"))); assertBusy(() -> assertThat(triggerCount.get(), greaterThan(currentCount))); final int currentCount2 = triggerCount.get();