SOLR-12923: add a latch to TestTriggerListener to harden test that use it so they can deterministically know when all events have been proceeded

This hardens several flakey tests, and allows the removal of a several arbitrary sleep calls
This commit is contained in:
Chris Hostetter 2019-01-14 18:16:40 -07:00
parent d965b3547e
commit d970375cd2
1 changed files with 48 additions and 36 deletions

View File

@ -142,10 +142,13 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
triggerFinishedCount = new AtomicInteger();
failDummyAction = false;
listenerCreated = new CountDownLatch(1);
listenerEventLatch = new CountDownLatch(0);
public void testTriggerThrottling() throws Exception {
// for this test we want to create two triggers so we must assert that the actions were created twice
actionInitCalled = new CountDownLatch(2);
@ -441,7 +444,6 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
public void testNodeAddedTrigger() throws Exception {
SolrClient solrClient = cluster.simGetSolrClient();
@ -495,7 +497,6 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
public void testNodeLostTrigger() throws Exception {
SolrClient solrClient = cluster.simGetSolrClient();
@ -727,7 +728,6 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
// @BadApple(bugUrl="")
public void testEventFromRestoredState() throws Exception {
SolrClient solrClient = cluster.simGetSolrClient();
@ -835,7 +835,6 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
public void testNodeMarkersRegistration() throws Exception {
// for this test we want to create two triggers so we must assert that the actions were created twice
actionInitCalled = new CountDownLatch(2);
@ -972,6 +971,7 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
static final Map<String, List<CapturedEvent>> listenerEvents = new ConcurrentHashMap<>();
static final List<CapturedEvent> allListenerEvents = Collections.synchronizedList(new ArrayList<>());
static volatile CountDownLatch listenerCreated = new CountDownLatch(1);
static volatile CountDownLatch listenerEventLatch = new CountDownLatch(0);
static volatile boolean failDummyAction = false;
public static class TestTriggerListener extends TriggerListenerBase {
@ -984,10 +984,18 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
public synchronized void onEvent(TriggerEvent event, TriggerEventProcessorStage stage, String actionName,
ActionContext context, Throwable error, String message) {
List<CapturedEvent> lst = listenerEvents.computeIfAbsent(, s -> new ArrayList<>());
CapturedEvent ev = new CapturedEvent(cluster.getTimeSource().getTimeNs(), context, config, stage, actionName, event, message);
final CountDownLatch latch = listenerEventLatch;
synchronized (latch) {
if (0 == latch.getCount()) {
log.warn("Ignoring captured event since latch is 'full': {}", ev);
} else {
List<CapturedEvent> lst = listenerEvents.computeIfAbsent(, s -> new ArrayList<>());
@ -1004,6 +1012,8 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
public void testListeners() throws Exception {
listenerEventLatch = new CountDownLatch(4 + 5);
SolrClient solrClient = cluster.simGetSolrClient();
("{" +
@ -1018,10 +1028,6 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
"]" +
if (!actionInitCalled.await(3000 / SPEED, TimeUnit.MILLISECONDS)) {
fail("The TriggerAction should have been created by now");
("{" +
"'set-listener' : " +
@ -1049,6 +1055,8 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
assertTrue("The TriggerAction should have been init'ed w/in a reasonable amount of time",
actionInitCalled.await(10, TimeUnit.SECONDS));
failDummyAction = false;
@ -1058,9 +1066,9 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
assertTrue("The trigger did not fire at all", await);
assertEquals("both listeners should have fired", 2, listenerEvents.size());
assertTrue("the listeners shou;d have recorded all events w/in a reasonable amount of time",
listenerEventLatch.await(10, TimeUnit.SECONDS));
assertEquals("at least 2 event types should have been recorded", 2, listenerEvents.size());
// check foo events
List<CapturedEvent> testEvents = listenerEvents.get("foo");
@ -1118,12 +1126,15 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
triggerFiredLatch = new CountDownLatch(1);
failDummyAction = true;
listenerEventLatch = new CountDownLatch(4 + 4); // fewer total due to failDummyAction
newNode = cluster.simAddNode();
await = triggerFiredLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
assertTrue("The trigger did not fire at all", await);
assertTrue("the listeners shoud have recorded all events w/in a reasonable amount of time",
listenerEventLatch.await(10, TimeUnit.SECONDS));
assertEquals("at least 2 event types should have been recorded", 2, listenerEvents.size());
// check foo events
testEvents = listenerEvents.get("foo");
@ -1160,10 +1171,10 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
public void testCooldown() throws Exception {
SolrClient solrClient = cluster.simGetSolrClient();
failDummyAction = false;
listenerEventLatch = new CountDownLatch(1);
waitForSeconds = 1;
("{" +
@ -1189,6 +1200,8 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
assertTrue("The TriggerAction should have been init'ed w/in a reasonable amount of time",
actionInitCalled.await(10, TimeUnit.SECONDS));
listenerCreated = new CountDownLatch(1);
@ -1197,16 +1210,17 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
boolean await = triggerFiredLatch.await(45000 / SPEED, TimeUnit.MILLISECONDS);
assertTrue("The trigger did not fire at all", await);
// wait for listener to capture the SUCCEEDED stage
assertTrue("the listener should have recorded all events w/in a reasonable amount of time",
listenerEventLatch.await(10, TimeUnit.SECONDS));
List<CapturedEvent> capturedEvents = listenerEvents.get("bar");
assertNotNull("no events for 'bar'!", capturedEvents);
// we may get a few IGNORED events if other tests caused events within cooldown period
assertTrue(capturedEvents.toString(), capturedEvents.size() > 0);
long prevTimestamp = capturedEvents.get(capturedEvents.size() - 1).timestamp;
assertEquals(capturedEvents.toString(), 1, capturedEvents.size());
long prevTimestamp = capturedEvents.get(0).timestamp;
// reset the trigger and captured events
listenerEventLatch = new CountDownLatch(1);
triggerFiredLatch = new CountDownLatch(1);
triggerFired.compareAndSet(true, false);
@ -1214,14 +1228,14 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
String newNode2 = cluster.simAddNode();
await = triggerFiredLatch.await(20000 / SPEED, TimeUnit.MILLISECONDS);
assertTrue("The trigger did not fire at all", await);
// wait for listener to capture the SUCCEEDED stage
assertTrue("the listener should have recorded all events w/in a reasonable amount of time",
listenerEventLatch.await(10, TimeUnit.SECONDS));
// there must be exactly one SUCCEEDED event
capturedEvents = listenerEvents.get("bar");
assertTrue(capturedEvents.toString(), capturedEvents.size() >= 1);
CapturedEvent ev = capturedEvents.get(capturedEvents.size() - 1);
assertEquals(capturedEvents.toString(), 1, capturedEvents.size());
CapturedEvent ev = capturedEvents.get(0);
assertEquals(ev.toString(), TriggerEventProcessorStage.SUCCEEDED, ev.stage);
// the difference between timestamps of the first SUCCEEDED and the last SUCCEEDED
// must be larger than cooldown period
@ -1267,7 +1281,6 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
//@AwaitsFix(bugUrl="") // this test is way to sensitive to timing, must be beasted before returned
public void testSearchRate() throws Exception {
SolrClient solrClient = cluster.simGetSolrClient();
String COLL1 = "collection1";
@ -1276,6 +1289,8 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
CloudTestUtils.waitForState(cluster, COLL1, 10, TimeUnit.SECONDS, CloudTestUtils.clusterShape(1, 2, false, true));
listenerEventLatch = new CountDownLatch(4);
("{" +
"'set-trigger' : {" +
@ -1308,10 +1323,6 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
// SolrParams query = params(CommonParams.Q, "*:*");
// for (int i = 0; i < 500; i++) {
// solrClient.query(COLL1, query);
// }
cluster.getSimClusterStateProvider().simSetCollectionValue(COLL1, "QUERY./select.requestTimes:1minRate", 500, false, true);
@ -1319,12 +1330,13 @@ public class TestSimTriggerIntegration extends SimSolrCloudTestCase {
assertTrue("The trigger did not start in time", await);
await = triggerFinishedLatch.await(60000 / SPEED, TimeUnit.MILLISECONDS);
assertTrue("The trigger did not finish in time", await);
// wait for listener to capture the SUCCEEDED stage
List<CapturedEvent> events = listenerEvents.get("srt");
assertTrue("the listener should have recorded all events w/in a reasonable amount of time",
listenerEventLatch.await(10, TimeUnit.SECONDS));
List<CapturedEvent> events = new ArrayList<>(listenerEvents.get("srt"));
assertNotNull("Could not find events for srt", events);
assertEquals(listenerEvents.toString(), 4, events.size());
assertEquals(events.toString(), 4, events.size());
assertEquals("AFTER_ACTION", events.get(0).stage.toString());
assertEquals("compute", events.get(0).actionName);
assertEquals("AFTER_ACTION", events.get(1).stage.toString());