diff --git a/solr/core/src/test/org/apache/solr/handler/admin/ThreadDumpHandlerTest.java b/solr/core/src/test/org/apache/solr/handler/admin/ThreadDumpHandlerTest.java index aab2b776384..1828b90629f 100644 --- a/solr/core/src/test/org/apache/solr/handler/admin/ThreadDumpHandlerTest.java +++ b/solr/core/src/test/org/apache/solr/handler/admin/ThreadDumpHandlerTest.java @@ -36,7 +36,6 @@ import org.junit.BeforeClass; * Fixing the ownershipt should be possible using latches, but fixing the '*-blocked' threads may not be possible * w/o polling */ -@SolrTestCaseJ4.AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-14635") public class ThreadDumpHandlerTest extends SolrTestCaseJ4 { private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); @@ -47,6 +46,16 @@ public class ThreadDumpHandlerTest extends SolrTestCaseJ4 { } public void testMonitor() throws Exception { + doTestMonitor(false); + } + + /* checking for the BLOCKED thread requires some polling, so only do it nightly */ + @Nightly + public void testMonitorBlocked() throws Exception { + doTestMonitor(true); + } + + public void doTestMonitor(final boolean checkBlockedThreadViaPolling) throws Exception { assumeTrue("monitor checking not supported on this JVM", ManagementFactory.getThreadMXBean().isObjectMonitorUsageSupported()); @@ -54,51 +63,92 @@ public class ThreadDumpHandlerTest extends SolrTestCaseJ4 { final class TestMonitorStruct { /* empty */ } final List failures = new ArrayList<>(); - final CountDownLatch latch = new CountDownLatch(1); + final CountDownLatch lockIsHeldLatch = new CountDownLatch(1); + final CountDownLatch doneWithTestLatch = new CountDownLatch(1); final Object monitor = new TestMonitorStruct(); - final Thread owner = new Thread(() -> { + final Thread ownerT = new Thread(() -> { synchronized (monitor) { - log.info("monitor owner waiting for latch to release me..."); + lockIsHeldLatch.countDown(); + log.info("monitor ownerT waiting for doneWithTestLatch to release me..."); try { - if ( ! latch.await(5, TimeUnit.SECONDS ) ){ - failures.add("owner: never saw latch release"); + if ( ! doneWithTestLatch.await(30, TimeUnit.SECONDS ) ){ + failures.add("ownerT: never saw doneWithTestLatch released"); } } catch (InterruptedException ie) { - failures.add("owner: " + ie.toString()); + failures.add("ownerT: " + ie.toString()); } } }, "test-thread-monitor-owner"); - final Thread blocked = new Thread(() -> { - log.info("blocked waiting for monitor..."); + + // only used if checkBlockedThreadViaPolling + // don't start until after lockIsHeldLatch fires + final Thread blockedT = new Thread(() -> { + log.info("blockedT waiting for monitor..."); synchronized (monitor) { log.info("monitor now unblocked"); } }, "test-thread-monitor-blocked"); + try { - owner.start(); - blocked.start(); - + ownerT.start(); + if ( ! lockIsHeldLatch.await(30, TimeUnit.SECONDS ) ){ + failures.add("never saw lockIsHeldLatch released"); + return; + } assertQ(req("qt", "/admin/threads", "indent", "true") - // monitor owner thread (which is also currently waiting on CountDownLatch) + // monitor owner 'ownerT' + // (which *MAY* also be waiting on doneWithTestLatch, but may not have reached that line yet) , "//lst[@name='thread'][str[@name='name'][.='test-thread-monitor-owner']]" - + " [lst[@name='lock-waiting'][null[@name='owner']]]" // latch + " [arr[@name='monitors-locked']/str[contains(.,'TestMonitorStruct')]]" - // blocked thread, waiting on the monitor - , "//lst[@name='thread'][str[@name='name'][.='test-thread-monitor-blocked']]" - + " [lst[@name='lock-waiting'][lst[@name='owner']/str[.='test-thread-monitor-owner']]]" ); - + + if (checkBlockedThreadViaPolling) { + log.info("Also checking with blockedT thread setup via polling..."); + try { + blockedT.setPriority(Thread.MAX_PRIORITY); + } catch (Exception e) { + log.warn("Couldn't set blockedT priority", e); + } + blockedT.start(); + // there is no way to "await" on the situation of the 'blockedT' thread actually reaching the + // "synchronized" block and becoming BLOCKED ... we just have to Poll for it... + for (int i = 0; i < 500 && (! Thread.State.BLOCKED.equals(blockedT.getState())); i++) { + Thread.sleep(10); // 10ms at a time, at most 5 sec total + } + if (Thread.State.BLOCKED.equals(blockedT.getState())) { + assertQ(req("qt", "/admin/threads", "indent", "true") + // same monitor owner 'ownerT' + , "//lst[@name='thread'][str[@name='name'][.='test-thread-monitor-owner']]" + + " [arr[@name='monitors-locked']/str[contains(.,'TestMonitorStruct')]]" + // blocked thread 'blockedT', waiting on the monitor + , "//lst[@name='thread'][str[@name='name'][.='test-thread-monitor-blocked']]" + + " [str[@name='state'][.='BLOCKED']]" + + " [lst[@name='lock-waiting'][lst[@name='owner']/str[.='test-thread-monitor-owner']]]" + ); + } + } } finally { - latch.countDown(); - owner.join(1000); - assertFalse("owner is still alive", owner.isAlive()); - blocked.join(1000); - assertFalse("blocked is still alive", blocked.isAlive()); + lockIsHeldLatch.countDown(); + doneWithTestLatch.countDown(); + ownerT.join(1000); + assertFalse("ownerT is still alive", ownerT.isAlive()); + blockedT.join(1000); + assertFalse("blockedT is still alive", blockedT.isAlive()); } } public void testOwnableSync() throws Exception { + doTestOwnableSync(false); + } + + /* checking for the WAITING thread requires some polling, so only do it nightly */ + @Nightly + public void testOwnableSyncWaiting() throws Exception { + doTestOwnableSync(true); + } + + public void doTestOwnableSync(final boolean checkWaitingThreadViaPolling) throws Exception { assumeTrue("ownable sync checking not supported on this JVM", ManagementFactory.getThreadMXBean().isSynchronizerUsageSupported()); @@ -106,25 +156,30 @@ public class ThreadDumpHandlerTest extends SolrTestCaseJ4 { final class TestReentrantLockStruct extends ReentrantLock { /* empty */ } final List failures = new ArrayList<>(); - final CountDownLatch latch = new CountDownLatch(1); + final CountDownLatch lockIsHeldLatch = new CountDownLatch(1); + final CountDownLatch doneWithTestLatch = new CountDownLatch(1); final ReentrantLock lock = new ReentrantLock(); - final Thread owner = new Thread(() -> { + final Thread ownerT = new Thread(() -> { lock.lock(); try { - log.info("lock owner waiting for latch to release me..."); + lockIsHeldLatch.countDown(); + log.info("lock ownerT waiting for doneWithTestLatch to release me..."); try { - if ( ! latch.await(5, TimeUnit.SECONDS ) ){ - failures.add("owner: never saw latch release"); + if ( ! doneWithTestLatch.await(5, TimeUnit.SECONDS ) ){ + failures.add("ownerT: never saw doneWithTestLatch release"); } } catch (InterruptedException ie) { - failures.add("owner: " + ie.toString()); + failures.add("ownerT: " + ie.toString()); } } finally { lock.unlock(); } }, "test-thread-sync-lock-owner"); - final Thread blocked = new Thread(() -> { - log.info("blocked waiting for lock..."); + + // only used if checkWaitingThreadViaPolling + // don't start until after lockIsHeldLatch fires + final Thread blockedT = new Thread(() -> { + log.info("blockedT waiting for lock..."); lock.lock(); try { log.info("lock now unblocked"); @@ -133,25 +188,51 @@ public class ThreadDumpHandlerTest extends SolrTestCaseJ4 { } }, "test-thread-sync-lock-blocked"); try { - owner.start(); - blocked.start(); - + ownerT.start(); + if ( ! lockIsHeldLatch.await(30, TimeUnit.SECONDS ) ){ + failures.add("never saw lockIsHeldLatch released"); + return; + } assertQ(req("qt", "/admin/threads", "indent", "true") - // lock owner thread (which is also currently waiting on CountDownLatch) + // lock owner 'ownerT' + // (which *MAY* also be waiting on doneWithTestLatch, but may not have reached that line yet) , "//lst[@name='thread'][str[@name='name'][.='test-thread-sync-lock-owner']]" - + " [lst[@name='lock-waiting'][null[@name='owner']]]" // latch + " [arr[@name='synchronizers-locked']/str[contains(.,'ReentrantLock')]]" - // blocked thread, waiting on the lock - , "//lst[@name='thread'][str[@name='name'][.='test-thread-sync-lock-blocked']]" - + " [lst[@name='lock-waiting'][lst[@name='owner']/str[.='test-thread-sync-lock-owner']]]" ); + if (checkWaitingThreadViaPolling) { + log.info("Also checking with blockedT thread setup via polling..."); + try { + blockedT.setPriority(Thread.MAX_PRIORITY); + } catch (Exception e) { + log.warn("Couldn't set blockedT priority", e); + } + blockedT.start(); + // there is no way to "await" on the situation of the 'blockedT' thread actually reaches the lock() + // call and WAITING in the queue ... we just have to Poll for it... + for (int i = 0; i < 500 && (! lock.hasQueuedThread(blockedT)); i++) { + Thread.sleep(10); // 10ms at a time, at most 5 sec total + } + if (lock.hasQueuedThread(blockedT)) { + assertQ(req("qt", "/admin/threads", "indent", "true") + // same lock owner 'ownerT' + , "//lst[@name='thread'][str[@name='name'][.='test-thread-sync-lock-owner']]" + + " [arr[@name='synchronizers-locked']/str[contains(.,'ReentrantLock')]]" + // blocked thread 'blockedT', waiting on the lock + , "//lst[@name='thread'][str[@name='name'][.='test-thread-sync-lock-blocked']]" + + " [str[@name='state'][.='WAITING']]" + + " [lst[@name='lock-waiting'][lst[@name='owner']/str[.='test-thread-sync-lock-owner']]]" + ); + + } + } } finally { - latch.countDown(); - owner.join(1000); - assertFalse("owner is still alive", owner.isAlive()); - blocked.join(1000); - assertFalse("blocked is still alive", blocked.isAlive()); + lockIsHeldLatch.countDown(); + doneWithTestLatch.countDown(); + ownerT.join(1000); + assertFalse("ownerT is still alive", ownerT.isAlive()); + blockedT.join(1000); + assertFalse("blockedT is still alive", blockedT.isAlive()); } }