HADOOP-8755. Print thread dump when tests fail due to timeout. Contributed by Andrey Klochkov.

git-svn-id: https://svn.apache.org/repos/asf/hadoop/common/trunk@1384627 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Aaron Myers 2012-09-14 01:44:23 +00:00
parent 86635168fe
commit d8e386f01a
8 changed files with 399 additions and 1 deletions

View File

@ -226,6 +226,9 @@ Release 2.0.3-alpha - Unreleased
HADOOP-8789. Tests setLevel(Level.OFF) should be Level.ERROR.
(Andy Isaacson via eli)
HADOOP-8755. Print thread dump when tests fail due to timeout. (Andrey
Klochkov via atm)
OPTIMIZATIONS
BUG FIXES

View File

@ -0,0 +1,181 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.test;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.concurrent.BrokenBarrierException;
import java.util.concurrent.CyclicBarrier;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import org.junit.Assert;
import org.junit.Test;
import org.junit.runner.notification.Failure;
public class TestTimedOutTestsListener {
public static class Deadlock {
private CyclicBarrier barrier = new CyclicBarrier(6);
public Deadlock() {
DeadlockThread[] dThreads = new DeadlockThread[6];
Monitor a = new Monitor("a");
Monitor b = new Monitor("b");
Monitor c = new Monitor("c");
dThreads[0] = new DeadlockThread("MThread-1", a, b);
dThreads[1] = new DeadlockThread("MThread-2", b, c);
dThreads[2] = new DeadlockThread("MThread-3", c, a);
Lock d = new ReentrantLock();
Lock e = new ReentrantLock();
Lock f = new ReentrantLock();
dThreads[3] = new DeadlockThread("SThread-4", d, e);
dThreads[4] = new DeadlockThread("SThread-5", e, f);
dThreads[5] = new DeadlockThread("SThread-6", f, d);
// make them daemon threads so that the test will exit
for (int i = 0; i < 6; i++) {
dThreads[i].setDaemon(true);
dThreads[i].start();
}
}
class DeadlockThread extends Thread {
private Lock lock1 = null;
private Lock lock2 = null;
private Monitor mon1 = null;
private Monitor mon2 = null;
private boolean useSync;
DeadlockThread(String name, Lock lock1, Lock lock2) {
super(name);
this.lock1 = lock1;
this.lock2 = lock2;
this.useSync = true;
}
DeadlockThread(String name, Monitor mon1, Monitor mon2) {
super(name);
this.mon1 = mon1;
this.mon2 = mon2;
this.useSync = false;
}
public void run() {
if (useSync) {
syncLock();
} else {
monitorLock();
}
}
private void syncLock() {
lock1.lock();
try {
try {
barrier.await();
} catch (Exception e) {
}
goSyncDeadlock();
} finally {
lock1.unlock();
}
}
private void goSyncDeadlock() {
try {
barrier.await();
} catch (Exception e) {
}
lock2.lock();
throw new RuntimeException("should not reach here.");
}
private void monitorLock() {
synchronized (mon1) {
try {
barrier.await();
} catch (Exception e) {
}
goMonitorDeadlock();
}
}
private void goMonitorDeadlock() {
try {
barrier.await();
} catch (Exception e) {
}
synchronized (mon2) {
throw new RuntimeException(getName() + " should not reach here.");
}
}
}
class Monitor {
String name;
Monitor(String name) {
this.name = name;
}
}
}
@Test(timeout=500)
public void testThreadDumpAndDeadlocks() throws Exception {
new Deadlock();
String s = null;
while (true) {
s = TimedOutTestsListener.buildDeadlockInfo();
if (s != null)
break;
Thread.sleep(100);
}
Assert.assertEquals(3, countStringOccurrences(s, "BLOCKED"));
Failure failure = new Failure(
null, new Exception(TimedOutTestsListener.TEST_TIMED_OUT_PREFIX));
StringWriter writer = new StringWriter();
new TimedOutTestsListener(new PrintWriter(writer)).testFailure(failure);
String out = writer.toString();
Assert.assertTrue(out.contains("THREAD DUMP"));
Assert.assertTrue(out.contains("DEADLOCKS DETECTED"));
System.out.println(out);
}
private int countStringOccurrences(String s, String substr) {
int n = 0;
int index = 0;
while ((index = s.indexOf(substr, index) + 1) != 0) {
n++;
}
return n;
}
}

View File

@ -0,0 +1,166 @@
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.test;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.lang.management.LockInfo;
import java.lang.management.ManagementFactory;
import java.lang.management.MonitorInfo;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Map;
import org.junit.runner.notification.Failure;
import org.junit.runner.notification.RunListener;
/**
* JUnit run listener which prints full thread dump into System.err
* in case a test is failed due to timeout.
*/
public class TimedOutTestsListener extends RunListener {
static final String TEST_TIMED_OUT_PREFIX = "test timed out after";
private static String INDENT = " ";
private final PrintWriter output;
public TimedOutTestsListener() {
this.output = new PrintWriter(System.err);
}
public TimedOutTestsListener(PrintWriter output) {
this.output = output;
}
@Override
public void testFailure(Failure failure) throws Exception {
if (failure != null && failure.getMessage() != null
&& failure.getMessage().startsWith(TEST_TIMED_OUT_PREFIX)) {
output.println("====> TEST TIMED OUT. PRINTING THREAD DUMP. <====");
output.println();
DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss,SSS");
output.println(String.format("Timestamp: %s", dateFormat.format(new Date())));
output.println();
output.println(buildThreadDump());
String deadlocksInfo = buildDeadlockInfo();
if (deadlocksInfo != null) {
output.println("====> DEADLOCKS DETECTED <====");
output.println();
output.println(deadlocksInfo);
}
}
}
static String buildThreadDump() {
StringBuilder dump = new StringBuilder();
Map<Thread, StackTraceElement[]> stackTraces = Thread.getAllStackTraces();
for (Map.Entry<Thread, StackTraceElement[]> e : stackTraces.entrySet()) {
Thread thread = e.getKey();
dump.append(String.format(
"\"%s\" %s prio=%d tid=%d %s\njava.lang.Thread.State: %s",
thread.getName(),
(thread.isDaemon() ? "daemon" : ""),
thread.getPriority(),
thread.getId(),
Thread.State.WAITING.equals(thread.getState()) ?
"in Object.wait()" : thread.getState().name().toLowerCase(),
Thread.State.WAITING.equals(thread.getState()) ?
"WAITING (on object monitor)" : thread.getState()));
for (StackTraceElement stackTraceElement : e.getValue()) {
dump.append("\n at ");
dump.append(stackTraceElement);
}
dump.append("\n");
}
return dump.toString();
}
static String buildDeadlockInfo() {
ThreadMXBean threadBean = ManagementFactory.getThreadMXBean();
long[] threadIds = threadBean.findMonitorDeadlockedThreads();
if (threadIds != null && threadIds.length > 0) {
StringWriter stringWriter = new StringWriter();
PrintWriter out = new PrintWriter(stringWriter);
ThreadInfo[] infos = threadBean.getThreadInfo(threadIds, true, true);
for (ThreadInfo ti : infos) {
printThreadInfo(ti, out);
printLockInfo(ti.getLockedSynchronizers(), out);
out.println();
}
out.close();
return stringWriter.toString();
} else {
return null;
}
}
private static void printThreadInfo(ThreadInfo ti, PrintWriter out) {
// print thread information
printThread(ti, out);
// print stack trace with locks
StackTraceElement[] stacktrace = ti.getStackTrace();
MonitorInfo[] monitors = ti.getLockedMonitors();
for (int i = 0; i < stacktrace.length; i++) {
StackTraceElement ste = stacktrace[i];
out.println(INDENT + "at " + ste.toString());
for (MonitorInfo mi : monitors) {
if (mi.getLockedStackDepth() == i) {
out.println(INDENT + " - locked " + mi);
}
}
}
out.println();
}
private static void printThread(ThreadInfo ti, PrintWriter out) {
out.print("\"" + ti.getThreadName() + "\"" + " Id="
+ ti.getThreadId() + " in " + ti.getThreadState());
if (ti.getLockName() != null) {
out.print(" on lock=" + ti.getLockName());
}
if (ti.isSuspended()) {
out.print(" (suspended)");
}
if (ti.isInNative()) {
out.print(" (running in native)");
}
out.println();
if (ti.getLockOwnerName() != null) {
out.println(INDENT + " owned by " + ti.getLockOwnerName() + " Id="
+ ti.getLockOwnerId());
}
}
private static void printLockInfo(LockInfo[] locks, PrintWriter out) {
out.println(INDENT + "Locked synchronizers: count = " + locks.length);
for (LockInfo li : locks) {
out.println(INDENT + " - " + li);
}
out.println();
}
}

View File

@ -301,6 +301,12 @@
<java.security.krb5.conf>${project.build.directory}/test-classes/krb5.conf</java.security.krb5.conf>
<kerberos.realm>${kerberos.realm}</kerberos.realm>
</systemPropertyVariables>
<properties>
<property>
<name>listener</name>
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
</property>
</properties>
<excludes>
<exclude>**/${test.exclude}.java</exclude>
<exclude>${test.exclude.pattern}</exclude>

View File

@ -189,6 +189,12 @@ http://maven.apache.org/xsd/maven-4.0.0.xsd">
<startKdc>${startKdc}</startKdc>
<kdc.resource.dir>${kdc.resource.dir}</kdc.resource.dir>
</systemPropertyVariables>
<properties>
<property>
<name>listener</name>
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
</property>
</properties>
</configuration>
</plugin>
<plugin>

View File

@ -172,6 +172,18 @@
<effort>Max</effort>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<properties>
<property>
<name>listener</name>
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
</property>
</properties>
</configuration>
</plugin>
</plugins>
</build>

View File

@ -220,6 +220,18 @@
</includes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<properties>
<property>
<name>listener</name>
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
</property>
</properties>
</configuration>
</plugin>
</plugins>
</build>

View File

@ -154,6 +154,18 @@
</includes>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-surefire-plugin</artifactId>
<configuration>
<properties>
<property>
<name>listener</name>
<value>org.apache.hadoop.test.TimedOutTestsListener</value>
</property>
</properties>
</configuration>
</plugin>
</plugins>
</build>
@ -187,4 +199,4 @@
<module>hadoop-yarn-site</module>
<module>hadoop-yarn-client</module>
</modules>
</project>
</project>