HADOOP-8755. Print thread dump when tests fail due to timeout. Contributed by Andrey Klochkov.
git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1384627 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
86635168fe
commit
d8e386f01a
|
@ -226,6 +226,9 @@ Release 2.0.3-alpha - Unreleased
|
|||
HADOOP-8789. Tests setLevel(Level.OFF) should be Level.ERROR.
|
||||
(Andy Isaacson via eli)
|
||||
|
||||
HADOOP-8755. Print thread dump when tests fail due to timeout. (Andrey
|
||||
Klochkov via atm)
|
||||
|
||||
OPTIMIZATIONS
|
||||
|
||||
BUG FIXES
|
||||
|
|
|
@ -0,0 +1,181 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.test;
|
||||
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.util.concurrent.BrokenBarrierException;
|
||||
import java.util.concurrent.CyclicBarrier;
|
||||
import java.util.concurrent.locks.Lock;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
import org.junit.Assert;
|
||||
import org.junit.Test;
|
||||
import org.junit.runner.notification.Failure;
|
||||
|
||||
public class TestTimedOutTestsListener {
|
||||
|
||||
public static class Deadlock {
|
||||
private CyclicBarrier barrier = new CyclicBarrier(6);
|
||||
|
||||
public Deadlock() {
|
||||
DeadlockThread[] dThreads = new DeadlockThread[6];
|
||||
|
||||
Monitor a = new Monitor("a");
|
||||
Monitor b = new Monitor("b");
|
||||
Monitor c = new Monitor("c");
|
||||
dThreads[0] = new DeadlockThread("MThread-1", a, b);
|
||||
dThreads[1] = new DeadlockThread("MThread-2", b, c);
|
||||
dThreads[2] = new DeadlockThread("MThread-3", c, a);
|
||||
|
||||
Lock d = new ReentrantLock();
|
||||
Lock e = new ReentrantLock();
|
||||
Lock f = new ReentrantLock();
|
||||
|
||||
dThreads[3] = new DeadlockThread("SThread-4", d, e);
|
||||
dThreads[4] = new DeadlockThread("SThread-5", e, f);
|
||||
dThreads[5] = new DeadlockThread("SThread-6", f, d);
|
||||
|
||||
// make them daemon threads so that the test will exit
|
||||
for (int i = 0; i < 6; i++) {
|
||||
dThreads[i].setDaemon(true);
|
||||
dThreads[i].start();
|
||||
}
|
||||
}
|
||||
|
||||
class DeadlockThread extends Thread {
|
||||
private Lock lock1 = null;
|
||||
|
||||
private Lock lock2 = null;
|
||||
|
||||
private Monitor mon1 = null;
|
||||
|
||||
private Monitor mon2 = null;
|
||||
|
||||
private boolean useSync;
|
||||
|
||||
DeadlockThread(String name, Lock lock1, Lock lock2) {
|
||||
super(name);
|
||||
this.lock1 = lock1;
|
||||
this.lock2 = lock2;
|
||||
this.useSync = true;
|
||||
}
|
||||
|
||||
DeadlockThread(String name, Monitor mon1, Monitor mon2) {
|
||||
super(name);
|
||||
this.mon1 = mon1;
|
||||
this.mon2 = mon2;
|
||||
this.useSync = false;
|
||||
}
|
||||
|
||||
public void run() {
|
||||
if (useSync) {
|
||||
syncLock();
|
||||
} else {
|
||||
monitorLock();
|
||||
}
|
||||
}
|
||||
|
||||
private void syncLock() {
|
||||
lock1.lock();
|
||||
try {
|
||||
try {
|
||||
barrier.await();
|
||||
} catch (Exception e) {
|
||||
}
|
||||
goSyncDeadlock();
|
||||
} finally {
|
||||
lock1.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private void goSyncDeadlock() {
|
||||
try {
|
||||
barrier.await();
|
||||
} catch (Exception e) {
|
||||
}
|
||||
lock2.lock();
|
||||
throw new RuntimeException("should not reach here.");
|
||||
}
|
||||
|
||||
private void monitorLock() {
|
||||
synchronized (mon1) {
|
||||
try {
|
||||
barrier.await();
|
||||
} catch (Exception e) {
|
||||
}
|
||||
goMonitorDeadlock();
|
||||
}
|
||||
}
|
||||
|
||||
private void goMonitorDeadlock() {
|
||||
try {
|
||||
barrier.await();
|
||||
} catch (Exception e) {
|
||||
}
|
||||
synchronized (mon2) {
|
||||
throw new RuntimeException(getName() + " should not reach here.");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
class Monitor {
|
||||
String name;
|
||||
|
||||
Monitor(String name) {
|
||||
this.name = name;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Test(timeout=500)
|
||||
public void testThreadDumpAndDeadlocks() throws Exception {
|
||||
new Deadlock();
|
||||
String s = null;
|
||||
while (true) {
|
||||
s = TimedOutTestsListener.buildDeadlockInfo();
|
||||
if (s != null)
|
||||
break;
|
||||
Thread.sleep(100);
|
||||
}
|
||||
|
||||
Assert.assertEquals(3, countStringOccurrences(s, "BLOCKED"));
|
||||
|
||||
Failure failure = new Failure(
|
||||
null, new Exception(TimedOutTestsListener.TEST_TIMED_OUT_PREFIX));
|
||||
StringWriter writer = new StringWriter();
|
||||
new TimedOutTestsListener(new PrintWriter(writer)).testFailure(failure);
|
||||
String out = writer.toString();
|
||||
|
||||
Assert.assertTrue(out.contains("THREAD DUMP"));
|
||||
Assert.assertTrue(out.contains("DEADLOCKS DETECTED"));
|
||||
|
||||
System.out.println(out);
|
||||
}
|
||||
|
||||
private int countStringOccurrences(String s, String substr) {
|
||||
int n = 0;
|
||||
int index = 0;
|
||||
while ((index = s.indexOf(substr, index) + 1) != 0) {
|
||||
n++;
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,166 @@
|
|||
/**
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.apache.hadoop.test;
|
||||
|
||||
import java.io.PrintWriter;
|
||||
import java.io.StringWriter;
|
||||
import java.lang.management.LockInfo;
|
||||
import java.lang.management.ManagementFactory;
|
||||
import java.lang.management.MonitorInfo;
|
||||
import java.lang.management.ThreadInfo;
|
||||
import java.lang.management.ThreadMXBean;
|
||||
import java.text.DateFormat;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.Date;
|
||||
import java.util.Map;
|
||||
|
||||
import org.junit.runner.notification.Failure;
|
||||
import org.junit.runner.notification.RunListener;
|
||||
|
||||
/**
|
||||
* JUnit run listener which prints full thread dump into System.err
|
||||
* in case a test is failed due to timeout.
|
||||
*/
|
||||
public class TimedOutTestsListener extends RunListener {
|
||||
|
||||
static final String TEST_TIMED_OUT_PREFIX = "test timed out after";
|
||||
|
||||
private static String INDENT = " ";
|
||||
|
||||
private final PrintWriter output;
|
||||
|
||||
public TimedOutTestsListener() {
|
||||
this.output = new PrintWriter(System.err);
|
||||
}
|
||||
|
||||
public TimedOutTestsListener(PrintWriter output) {
|
||||
this.output = output;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void testFailure(Failure failure) throws Exception {
|
||||
if (failure != null && failure.getMessage() != null
|
||||
&& failure.getMessage().startsWith(TEST_TIMED_OUT_PREFIX)) {
|
||||
output.println("====> TEST TIMED OUT. PRINTING THREAD DUMP. <====");
|
||||
output.println();
|
||||
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
|
||||
output.println(String.format("Timestamp: %s", dateFormat.format(new Date())));
|
||||
output.println();
|
||||
output.println(buildThreadDump());
|
||||
|
||||
String deadlocksInfo = buildDeadlockInfo();
|
||||
if (deadlocksInfo != null) {
|
||||
output.println("====> DEADLOCKS DETECTED <====");
|
||||
output.println();
|
||||
output.println(deadlocksInfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static String buildThreadDump() {
|
||||
StringBuilder dump = new StringBuilder();
|
||||
Map<Thread, StackTraceElement[]> stackTraces = Thread.getAllStackTraces();
|
||||
for (Map.Entry<Thread, StackTraceElement[]> e : stackTraces.entrySet()) {
|
||||
Thread thread = e.getKey();
|
||||
dump.append(String.format(
|
||||
"\"%s\" %s prio=%d tid=%d %s\njava.lang.Thread.State: %s",
|
||||
thread.getName(),
|
||||
(thread.isDaemon() ? "daemon" : ""),
|
||||
thread.getPriority(),
|
||||
thread.getId(),
|
||||
Thread.State.WAITING.equals(thread.getState()) ?
|
||||
"in Object.wait()" : thread.getState().name().toLowerCase(),
|
||||
Thread.State.WAITING.equals(thread.getState()) ?
|
||||
"WAITING (on object monitor)" : thread.getState()));
|
||||
for (StackTraceElement stackTraceElement : e.getValue()) {
|
||||
dump.append("\n at ");
|
||||
dump.append(stackTraceElement);
|
||||
}
|
||||
dump.append("\n");
|
||||
}
|
||||
return dump.toString();
|
||||
}
|
||||
|
||||
static String buildDeadlockInfo() {
|
||||
ThreadMXBean threadBean = ManagementFactory.getThreadMXBean();
|
||||
long[] threadIds = threadBean.findMonitorDeadlockedThreads();
|
||||
if (threadIds != null && threadIds.length > 0) {
|
||||
StringWriter stringWriter = new StringWriter();
|
||||
PrintWriter out = new PrintWriter(stringWriter);
|
||||
|
||||
ThreadInfo[] infos = threadBean.getThreadInfo(threadIds, true, true);
|
||||
for (ThreadInfo ti : infos) {
|
||||
printThreadInfo(ti, out);
|
||||
printLockInfo(ti.getLockedSynchronizers(), out);
|
||||
out.println();
|
||||
}
|
||||
|
||||
out.close();
|
||||
return stringWriter.toString();
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private static void printThreadInfo(ThreadInfo ti, PrintWriter out) {
|
||||
// print thread information
|
||||
printThread(ti, out);
|
||||
|
||||
// print stack trace with locks
|
||||
StackTraceElement[] stacktrace = ti.getStackTrace();
|
||||
MonitorInfo[] monitors = ti.getLockedMonitors();
|
||||
for (int i = 0; i < stacktrace.length; i++) {
|
||||
StackTraceElement ste = stacktrace[i];
|
||||
out.println(INDENT + "at " + ste.toString());
|
||||
for (MonitorInfo mi : monitors) {
|
||||
if (mi.getLockedStackDepth() == i) {
|
||||
out.println(INDENT + " - locked " + mi);
|
||||
}
|
||||
}
|
||||
}
|
||||
out.println();
|
||||
}
|
||||
|
||||
private static void printThread(ThreadInfo ti, PrintWriter out) {
|
||||
out.print("\"" + ti.getThreadName() + "\"" + " Id="
|
||||
+ ti.getThreadId() + " in " + ti.getThreadState());
|
||||
if (ti.getLockName() != null) {
|
||||
out.print(" on lock=" + ti.getLockName());
|
||||
}
|
||||
if (ti.isSuspended()) {
|
||||
out.print(" (suspended)");
|
||||
}
|
||||
if (ti.isInNative()) {
|
||||
out.print(" (running in native)");
|
||||
}
|
||||
out.println();
|
||||
if (ti.getLockOwnerName() != null) {
|
||||
out.println(INDENT + " owned by " + ti.getLockOwnerName() + " Id="
|
||||
+ ti.getLockOwnerId());
|
||||
}
|
||||
}
|
||||
|
||||
private static void printLockInfo(LockInfo[] locks, PrintWriter out) {
|
||||
out.println(INDENT + "Locked synchronizers: count = " + locks.length);
|
||||
for (LockInfo li : locks) {
|
||||
out.println(INDENT + " - " + li);
|
||||
}
|
||||
out.println();
|
||||
}
|
||||
|
||||
}
|
|
@ -301,6 +301,12 @@
|
|||
<java.security.krb5.conf>${project.build.directory}/test-classes/krb5.conf</java.security.krb5.conf>
|
||||
<kerberos.realm>${kerberos.realm}</kerberos.realm>
|
||||
</systemPropertyVariables>
|
||||
<properties>
|
||||
<property>
|
||||
<name>listener</name>
|
||||
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
|
||||
</property>
|
||||
</properties>
|
||||
<excludes>
|
||||
<exclude>**/${test.exclude}.java</exclude>
|
||||
<exclude>${test.exclude.pattern}</exclude>
|
||||
|
|
|
@ -189,6 +189,12 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
|||
<startKdc>${startKdc}</startKdc>
|
||||
<kdc.resource.dir>${kdc.resource.dir}</kdc.resource.dir>
|
||||
</systemPropertyVariables>
|
||||
<properties>
|
||||
<property>
|
||||
<name>listener</name>
|
||||
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
|
||||
</property>
|
||||
</properties>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
|
|
|
@ -172,6 +172,18 @@
|
|||
<effort>Max</effort>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<configuration>
|
||||
<properties>
|
||||
<property>
|
||||
<name>listener</name>
|
||||
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
|
||||
</property>
|
||||
</properties>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
|
|
|
@ -220,6 +220,18 @@
|
|||
</includes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<configuration>
|
||||
<properties>
|
||||
<property>
|
||||
<name>listener</name>
|
||||
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
|
||||
</property>
|
||||
</properties>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
|
|
|
@ -154,6 +154,18 @@
|
|||
</includes>
|
||||
</configuration>
|
||||
</plugin>
|
||||
<plugin>
|
||||
<groupId>org.apache.maven.plugins</groupId>
|
||||
<artifactId>maven-surefire-plugin</artifactId>
|
||||
<configuration>
|
||||
<properties>
|
||||
<property>
|
||||
<name>listener</name>
|
||||
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
|
||||
</property>
|
||||
</properties>
|
||||
</configuration>
|
||||
</plugin>
|
||||
</plugins>
|
||||
</build>
|
||||
|
||||
|
@ -187,4 +199,4 @@
|
|||
<module>hadoop-yarn-site</module>
|
||||
<module>hadoop-yarn-client</module>
|
||||
</modules>
|
||||
</project>
|
||||
</project>
|
||||
|
|
Loading…
Reference in New Issue