SOLR-14635: re-enable test with better gatekeeping of expected 'ownership threads' and (nightly only) polling of 'locked/waiting threads' before making asserts

This commit is contained in:
Chris Hostetter 2020-07-13 12:29:04 -07:00
parent e9e2ff9b12
commit e3c2a93edb
1 changed files with 125 additions and 44 deletions

View File

@ -36,7 +36,6 @@ import org.junit.BeforeClass;
* Fixing the ownershipt should be possible using latches, but fixing the '*-blocked' threads may not be possible
* w/o polling
*/
@SolrTestCaseJ4.AwaitsFix(bugUrl="https://issues.apache.org/jira/browse/SOLR-14635")
public class ThreadDumpHandlerTest extends SolrTestCaseJ4 {
private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
@ -47,6 +46,16 @@ public class ThreadDumpHandlerTest extends SolrTestCaseJ4 {
}
public void testMonitor() throws Exception {
doTestMonitor(false);
}
/* checking for the BLOCKED thread requires some polling, so only do it nightly */
@Nightly
public void testMonitorBlocked() throws Exception {
doTestMonitor(true);
}
public void doTestMonitor(final boolean checkBlockedThreadViaPolling) throws Exception {
assumeTrue("monitor checking not supported on this JVM",
ManagementFactory.getThreadMXBean().isObjectMonitorUsageSupported());
@ -54,51 +63,92 @@ public class ThreadDumpHandlerTest extends SolrTestCaseJ4 {
final class TestMonitorStruct { /* empty */ }
final List<String> failures = new ArrayList<>();
final CountDownLatch latch = new CountDownLatch(1);
final CountDownLatch lockIsHeldLatch = new CountDownLatch(1);
final CountDownLatch doneWithTestLatch = new CountDownLatch(1);
final Object monitor = new TestMonitorStruct();
final Thread owner = new Thread(() -> {
final Thread ownerT = new Thread(() -> {
synchronized (monitor) {
log.info("monitor owner waiting for latch to release me...");
lockIsHeldLatch.countDown();
log.info("monitor ownerT waiting for doneWithTestLatch to release me...");
try {
if ( ! latch.await(5, TimeUnit.SECONDS ) ){
failures.add("owner: never saw latch release");
if ( ! doneWithTestLatch.await(30, TimeUnit.SECONDS ) ){
failures.add("ownerT: never saw doneWithTestLatch released");
}
} catch (InterruptedException ie) {
failures.add("owner: " + ie.toString());
failures.add("ownerT: " + ie.toString());
}
}
}, "test-thread-monitor-owner");
final Thread blocked = new Thread(() -> {
log.info("blocked waiting for monitor...");
// only used if checkBlockedThreadViaPolling
// don't start until after lockIsHeldLatch fires
final Thread blockedT = new Thread(() -> {
log.info("blockedT waiting for monitor...");
synchronized (monitor) {
log.info("monitor now unblocked");
}
}, "test-thread-monitor-blocked");
try {
owner.start();
blocked.start();
try {
ownerT.start();
if ( ! lockIsHeldLatch.await(30, TimeUnit.SECONDS ) ){
failures.add("never saw lockIsHeldLatch released");
return;
}
assertQ(req("qt", "/admin/threads", "indent", "true")
// monitor owner thread (which is also currently waiting on CountDownLatch)
// monitor owner 'ownerT'
// (which *MAY* also be waiting on doneWithTestLatch, but may not have reached that line yet)
, "//lst[@name='thread'][str[@name='name'][.='test-thread-monitor-owner']]"
+ " [lst[@name='lock-waiting'][null[@name='owner']]]" // latch
+ " [arr[@name='monitors-locked']/str[contains(.,'TestMonitorStruct')]]"
// blocked thread, waiting on the monitor
, "//lst[@name='thread'][str[@name='name'][.='test-thread-monitor-blocked']]"
+ " [lst[@name='lock-waiting'][lst[@name='owner']/str[.='test-thread-monitor-owner']]]"
);
if (checkBlockedThreadViaPolling) {
log.info("Also checking with blockedT thread setup via polling...");
try {
blockedT.setPriority(Thread.MAX_PRIORITY);
} catch (Exception e) {
log.warn("Couldn't set blockedT priority", e);
}
blockedT.start();
// there is no way to "await" on the situation of the 'blockedT' thread actually reaching the
// "synchronized" block and becoming BLOCKED ... we just have to Poll for it...
for (int i = 0; i < 500 && (! Thread.State.BLOCKED.equals(blockedT.getState())); i++) {
Thread.sleep(10); // 10ms at a time, at most 5 sec total
}
if (Thread.State.BLOCKED.equals(blockedT.getState())) {
assertQ(req("qt", "/admin/threads", "indent", "true")
// same monitor owner 'ownerT'
, "//lst[@name='thread'][str[@name='name'][.='test-thread-monitor-owner']]"
+ " [arr[@name='monitors-locked']/str[contains(.,'TestMonitorStruct')]]"
// blocked thread 'blockedT', waiting on the monitor
, "//lst[@name='thread'][str[@name='name'][.='test-thread-monitor-blocked']]"
+ " [str[@name='state'][.='BLOCKED']]"
+ " [lst[@name='lock-waiting'][lst[@name='owner']/str[.='test-thread-monitor-owner']]]"
);
}
}
} finally {
latch.countDown();
owner.join(1000);
assertFalse("owner is still alive", owner.isAlive());
blocked.join(1000);
assertFalse("blocked is still alive", blocked.isAlive());
lockIsHeldLatch.countDown();
doneWithTestLatch.countDown();
ownerT.join(1000);
assertFalse("ownerT is still alive", ownerT.isAlive());
blockedT.join(1000);
assertFalse("blockedT is still alive", blockedT.isAlive());
}
}
public void testOwnableSync() throws Exception {
doTestOwnableSync(false);
}
/* checking for the WAITING thread requires some polling, so only do it nightly */
@Nightly
public void testOwnableSyncWaiting() throws Exception {
doTestOwnableSync(true);
}
public void doTestOwnableSync(final boolean checkWaitingThreadViaPolling) throws Exception {
assumeTrue("ownable sync checking not supported on this JVM",
ManagementFactory.getThreadMXBean().isSynchronizerUsageSupported());
@ -106,25 +156,30 @@ public class ThreadDumpHandlerTest extends SolrTestCaseJ4 {
final class TestReentrantLockStruct extends ReentrantLock { /* empty */ }
final List<String> failures = new ArrayList<>();
final CountDownLatch latch = new CountDownLatch(1);
final CountDownLatch lockIsHeldLatch = new CountDownLatch(1);
final CountDownLatch doneWithTestLatch = new CountDownLatch(1);
final ReentrantLock lock = new ReentrantLock();
final Thread owner = new Thread(() -> {
final Thread ownerT = new Thread(() -> {
lock.lock();
try {
log.info("lock owner waiting for latch to release me...");
lockIsHeldLatch.countDown();
log.info("lock ownerT waiting for doneWithTestLatch to release me...");
try {
if ( ! latch.await(5, TimeUnit.SECONDS ) ){
failures.add("owner: never saw latch release");
if ( ! doneWithTestLatch.await(5, TimeUnit.SECONDS ) ){
failures.add("ownerT: never saw doneWithTestLatch release");
}
} catch (InterruptedException ie) {
failures.add("owner: " + ie.toString());
failures.add("ownerT: " + ie.toString());
}
} finally {
lock.unlock();
}
}, "test-thread-sync-lock-owner");
final Thread blocked = new Thread(() -> {
log.info("blocked waiting for lock...");
// only used if checkWaitingThreadViaPolling
// don't start until after lockIsHeldLatch fires
final Thread blockedT = new Thread(() -> {
log.info("blockedT waiting for lock...");
lock.lock();
try {
log.info("lock now unblocked");
@ -133,25 +188,51 @@ public class ThreadDumpHandlerTest extends SolrTestCaseJ4 {
}
}, "test-thread-sync-lock-blocked");
try {
owner.start();
blocked.start();
ownerT.start();
if ( ! lockIsHeldLatch.await(30, TimeUnit.SECONDS ) ){
failures.add("never saw lockIsHeldLatch released");
return;
}
assertQ(req("qt", "/admin/threads", "indent", "true")
// lock owner thread (which is also currently waiting on CountDownLatch)
// lock owner 'ownerT'
// (which *MAY* also be waiting on doneWithTestLatch, but may not have reached that line yet)
, "//lst[@name='thread'][str[@name='name'][.='test-thread-sync-lock-owner']]"
+ " [lst[@name='lock-waiting'][null[@name='owner']]]" // latch
+ " [arr[@name='synchronizers-locked']/str[contains(.,'ReentrantLock')]]"
// blocked thread, waiting on the lock
, "//lst[@name='thread'][str[@name='name'][.='test-thread-sync-lock-blocked']]"
+ " [lst[@name='lock-waiting'][lst[@name='owner']/str[.='test-thread-sync-lock-owner']]]"
);
if (checkWaitingThreadViaPolling) {
log.info("Also checking with blockedT thread setup via polling...");
try {
blockedT.setPriority(Thread.MAX_PRIORITY);
} catch (Exception e) {
log.warn("Couldn't set blockedT priority", e);
}
blockedT.start();
// there is no way to "await" on the situation of the 'blockedT' thread actually reaches the lock()
// call and WAITING in the queue ... we just have to Poll for it...
for (int i = 0; i < 500 && (! lock.hasQueuedThread(blockedT)); i++) {
Thread.sleep(10); // 10ms at a time, at most 5 sec total
}
if (lock.hasQueuedThread(blockedT)) {
assertQ(req("qt", "/admin/threads", "indent", "true")
// same lock owner 'ownerT'
, "//lst[@name='thread'][str[@name='name'][.='test-thread-sync-lock-owner']]"
+ " [arr[@name='synchronizers-locked']/str[contains(.,'ReentrantLock')]]"
// blocked thread 'blockedT', waiting on the lock
, "//lst[@name='thread'][str[@name='name'][.='test-thread-sync-lock-blocked']]"
+ " [str[@name='state'][.='WAITING']]"
+ " [lst[@name='lock-waiting'][lst[@name='owner']/str[.='test-thread-sync-lock-owner']]]"
);
}
}
} finally {
latch.countDown();
owner.join(1000);
assertFalse("owner is still alive", owner.isAlive());
blocked.join(1000);
assertFalse("blocked is still alive", blocked.isAlive());
lockIsHeldLatch.countDown();
doneWithTestLatch.countDown();
ownerT.join(1000);
assertFalse("ownerT is still alive", ownerT.isAlive());
blockedT.join(1000);
assertFalse("blockedT is still alive", blockedT.isAlive());
}
}