From 83cca5cd06aa0fdd9ce98415d6e9a0b1ecdfc7f5 Mon Sep 17 00:00:00 2001 From: Shalin Shekhar Mangar Date: Fri, 30 Mar 2018 16:42:35 +0530 Subject: [PATCH] SOLR-12133: Fix failures in TriggerIntegrationTest.testEventQueue due to race conditions --- .../cloud/autoscaling/ScheduledTriggers.java | 13 +++++++++ .../autoscaling/TriggerIntegrationTest.java | 28 ++++++++++--------- .../apache/solr/common/util/ExecutorUtil.java | 4 +++ 3 files changed, 32 insertions(+), 13 deletions(-) diff --git a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java index 0e21b049f6e..28efe92de8f 100644 --- a/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java +++ b/solr/core/src/java/org/apache/solr/cloud/autoscaling/ScheduledTriggers.java @@ -66,6 +66,7 @@ import static org.apache.solr.common.params.AutoScalingParams.ACTION_THROTTLE_PE import static org.apache.solr.common.params.AutoScalingParams.TRIGGER_COOLDOWN_PERIOD_SECONDS; import static org.apache.solr.common.params.AutoScalingParams.TRIGGER_CORE_POOL_SIZE; import static org.apache.solr.common.params.AutoScalingParams.TRIGGER_SCHEDULE_DELAY_SECONDS; +import static org.apache.solr.common.util.ExecutorUtil.awaitTermination; /** * Responsible for scheduling active triggers, starting and stopping them and @@ -497,9 +498,21 @@ public class ScheduledTriggers implements Closeable { } // shutdown and interrupt all running tasks because there's no longer any // guarantee about cluster state + log.debug("Shutting down scheduled thread pool executor now"); scheduledThreadPoolExecutor.shutdownNow(); + + log.debug("Shutting down action executor now"); actionExecutor.shutdownNow(); + listeners.close(); + + log.debug("Awaiting termination for action executor"); + awaitTermination(actionExecutor); + + log.debug("Awaiting termination for scheduled thread pool executor"); + awaitTermination(scheduledThreadPoolExecutor); + + log.debug("ScheduledTriggers closed completely"); } private class TriggerWrapper implements Runnable, Closeable { diff --git a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java index 5dfe34c35bb..2902c488886 100644 --- a/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java +++ b/solr/core/src/test/org/apache/solr/cloud/autoscaling/TriggerIntegrationTest.java @@ -64,15 +64,16 @@ import static org.apache.solr.common.cloud.ZkStateReader.SOLR_AUTOSCALING_CONF_P public class TriggerIntegrationTest extends SolrCloudTestCase { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); - private static CountDownLatch actionConstructorCalled; - private static CountDownLatch actionInitCalled; - private static CountDownLatch triggerFiredLatch; - private static int waitForSeconds = 1; - private static CountDownLatch actionStarted; - private static CountDownLatch actionInterrupted; - private static CountDownLatch actionCompleted; + private static volatile CountDownLatch actionConstructorCalled; + private static volatile CountDownLatch actionInitCalled; + private static volatile CountDownLatch triggerFiredLatch; + private static volatile int waitForSeconds = 1; + private static volatile CountDownLatch actionStarted; + private static volatile CountDownLatch actionInterrupted; + private static volatile CountDownLatch actionCompleted; private static AtomicBoolean triggerFired; private static Set events = ConcurrentHashMap.newKeySet(); + public static volatile long eventQueueActionWait = 5000; private static SolrCloudManager cloudManager; // use the same time source as triggers use @@ -166,6 +167,7 @@ public class TriggerIntegrationTest extends SolrCloudTestCase { events.clear(); listenerEvents.clear(); lastActionExecutedAt.set(0); + eventQueueActionWait = 5000; while (cluster.getJettySolrRunners().size() < 2) { // perhaps a test stopped a node but didn't start it back // lets start a node @@ -415,14 +417,17 @@ public class TriggerIntegrationTest extends SolrCloudTestCase { public void process(TriggerEvent event, ActionContext actionContext) { log.info("-- event: " + event); events.add(event); + long eventQueueActionWaitCopy = eventQueueActionWait; getActionStarted().countDown(); try { - Thread.sleep(eventQueueActionWait); + log.info("-- Going to sleep for {} ms", eventQueueActionWaitCopy); + Thread.sleep(eventQueueActionWaitCopy); + log.info("-- Woke up after sleeping for {} ms", eventQueueActionWaitCopy); triggerFired.compareAndSet(false, true); getActionCompleted().countDown(); } catch (InterruptedException e) { + log.info("-- Interrupted"); getActionInterrupted().countDown(); - return; } } @@ -434,10 +439,7 @@ public class TriggerIntegrationTest extends SolrCloudTestCase { } } - public static long eventQueueActionWait = 5000; - @Test - @BadApple(bugUrl="https://issues.apache.org/jira/browse/SOLR-12028") public void testEventQueue() throws Exception { waitForSeconds = 1; CloudSolrClient solrClient = cluster.getSolrClient(); @@ -471,6 +473,7 @@ public class TriggerIntegrationTest extends SolrCloudTestCase { JettySolrRunner newNode = cluster.startJettySolrRunner(); boolean await = actionStarted.await(60, TimeUnit.SECONDS); assertTrue("action did not start", await); + eventQueueActionWait = 1; // event should be there NodeAddedTrigger.NodeAddedEvent nodeAddedEvent = (NodeAddedTrigger.NodeAddedEvent) events.iterator().next(); assertNotNull(nodeAddedEvent); @@ -478,7 +481,6 @@ public class TriggerIntegrationTest extends SolrCloudTestCase { assertFalse(triggerFired.get()); events.clear(); actionStarted = new CountDownLatch(1); - eventQueueActionWait = 1; // kill overseer leader cluster.stopJettySolrRunner(overseerLeaderIndex); Thread.sleep(5000); diff --git a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java index a0457262053..74580167984 100644 --- a/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java +++ b/solr/solrj/src/java/org/apache/solr/common/util/ExecutorUtil.java @@ -73,6 +73,10 @@ public class ExecutorUtil { public static void shutdownAndAwaitTermination(ExecutorService pool) { pool.shutdown(); // Disable new tasks from being submitted + awaitTermination(pool); + } + + public static void awaitTermination(ExecutorService pool) { boolean shutdown = false; while (!shutdown) { try {