352684 Implemented extra stack trace logging mode

This commit is contained in:
Michael Gorovoy 2011-08-11 20:54:45 -04:00
parent f39ffad9a9
commit 157e1c3fe5
4 changed files with 293 additions and 69 deletions

View File

@ -9,12 +9,13 @@
<Set name="scanInterval">2000</Set>
<Set name="busyThreshold">90</Set>
<Set name="stackDepth">3</Set>
<Set name="trailLength">2</Set>
<!-- To enable logging CPU utilization for threads above specified threshold, -->
<!-- uncomment the following lines, changing dump interval (in milliseconds) -->
<!-- and dump threshold (in percent) as desired. -->
<!-- uncomment the following lines, changing log interval (in milliseconds) -->
<!-- and log threshold (in percent) as desired. -->
<!--
<Set name="dumpInterval">10000</Arg>
<Set name="dumpThreshold">1</Arg>
<Set name="logInterval">10000</Arg>
<Set name="logThreshold">1</Arg>
-->
</New>
</Arg>

View File

@ -36,6 +36,7 @@ public class ThreadMonitor extends AbstractLifeCycle implements Runnable
private int _busyThreshold;
private int _logThreshold;
private int _stackDepth;
private int _trailLength;
private ThreadMXBean _threadBean;
@ -53,7 +54,32 @@ public class ThreadMonitor extends AbstractLifeCycle implements Runnable
*/
public ThreadMonitor() throws Exception
{
this(5000, 95, 3);
this(5000);
}
/* ------------------------------------------------------------ */
/**
* Instantiates a new thread monitor.
*
* @param intervalMs scan interval
* @throws Exception
*/
public ThreadMonitor(int intervalMs) throws Exception
{
this(intervalMs, 95);
}
/* ------------------------------------------------------------ */
/**
* Instantiates a new thread monitor.
*
* @param intervalMs scan interval
* @param threshold busy threshold
* @throws Exception
*/
public ThreadMonitor(int intervalMs, int threshold) throws Exception
{
this(intervalMs, threshold, 3);
}
/* ------------------------------------------------------------ */
@ -66,10 +92,26 @@ public class ThreadMonitor extends AbstractLifeCycle implements Runnable
* @throws Exception
*/
public ThreadMonitor(int intervalMs, int threshold, int depth) throws Exception
{
this(intervalMs, threshold, depth, 3);
}
/* ------------------------------------------------------------ */
/**
* Instantiates a new thread monitor.
*
* @param intervalMs scan interval
* @param threshold busy threshold
* @param depth stack compare depth
* @param trail length of stack trail
* @throws Exception
*/
public ThreadMonitor(int intervalMs, int threshold, int depth, int trail) throws Exception
{
_scanInterval = intervalMs;
_busyThreshold = threshold;
_stackDepth = depth;
_trailLength = trail;
_logger = Log.getLogger(ThreadMonitor.class.getName());
_monitorInfo = new HashMap<Long, ThreadMonitorInfo>();
@ -78,66 +120,144 @@ public class ThreadMonitor extends AbstractLifeCycle implements Runnable
}
/* ------------------------------------------------------------ */
/**
* Gets the scan interval.
*
* @return the scan interval
*/
public int getScanInterval()
{
return _scanInterval;
}
/* ------------------------------------------------------------ */
/**
* Sets the scan interval.
*
* @param ms the new scan interval
*/
public void setScanInterval(int ms)
{
_scanInterval = ms;
}
/* ------------------------------------------------------------ */
/**
* Gets the log interval.
*
* @return the log interval
*/
public int getLogInterval()
{
return _logInterval;
}
/* ------------------------------------------------------------ */
/**
* Sets the log interval.
*
* @param ms the new log interval
*/
public void setLogInterval(int ms)
{
_logInterval = ms;
}
/* ------------------------------------------------------------ */
/**
* Gets the busy threshold.
*
* @return the busy threshold
*/
public int getBusyThreshold()
{
return _busyThreshold;
}
/* ------------------------------------------------------------ */
/**
* Sets the busy threshold.
*
* @param percent the new busy threshold
*/
public void setBusyThreshold(int percent)
{
_busyThreshold = percent;
}
/* ------------------------------------------------------------ */
/**
* Gets the log threshold.
*
* @return the log threshold
*/
public int getLogThreshold()
{
return _logThreshold;
}
/* ------------------------------------------------------------ */
/**
* Sets the log threshold.
*
* @param percent the new log threshold
*/
public void setLogThreshold(int percent)
{
_logThreshold = percent;
}
/* ------------------------------------------------------------ */
/**
* Gets the stack depth.
*
* @return the stack depth
*/
public int getStackDepth()
{
return _stackDepth;
}
/* ------------------------------------------------------------ */
/**
* Sets the stack depth.
*
* @param stackDepth the new stack depth
*/
public void setStackDepth(int stackDepth)
{
_stackDepth = stackDepth;
}
/* ------------------------------------------------------------ */
/**
* Sets the stack trace trail length.
*
* @param trailLength the new trail length
*/
public void setTrailLength(int trailLength)
{
_trailLength = trailLength;
}
/* ------------------------------------------------------------ */
/**
* Gets the stack trace trail length.
*
* @return the trail length
*/
public int getTrailLength()
{
return _trailLength;
}
/* ------------------------------------------------------------ */
/**
* Enable logging of CPU usage.
*
* @param frequencyMs the logging frequency
* @param thresholdPercent the logging threshold
*/
public void logCpuUsage(int frequencyMs, int thresholdPercent)
{
setLogInterval(frequencyMs);
@ -218,13 +338,36 @@ public class ThreadMonitor extends AbstractLifeCycle implements Runnable
*/
public void run()
{
long currTime = System.currentTimeMillis();
long lastTime = currTime;
long lastDumpTime = currTime;
// Initialize repeat flag
boolean repeat = false;
boolean scanNow, logNow;
// Set next scan time and log time
long nextScanTime = System.currentTimeMillis();
long nextLogTime = nextScanTime + _logInterval;
while (!_done)
{
currTime = System.currentTimeMillis();
if (currTime < lastTime + _scanInterval)
long currTime = System.currentTimeMillis();
scanNow = (currTime > nextScanTime);
logNow = (_logInterval > 0 && currTime > nextLogTime);
if (repeat || scanNow || logNow)
{
repeat = collectThreadInfo();
logThreadInfo(logNow);
if (scanNow)
{
nextScanTime = System.currentTimeMillis() + _scanInterval;
}
if (logNow)
{
nextLogTime = System.currentTimeMillis() + _logInterval;
}
}
// Sleep only if not going to repeat scanning immediately
if (!repeat)
{
try
{
@ -234,18 +377,7 @@ public class ThreadMonitor extends AbstractLifeCycle implements Runnable
{
Log.ignore(ex);
}
continue;
}
collectThreadInfo();
lastTime = System.currentTimeMillis();
if (_logInterval > 0 && lastTime > lastDumpTime + _logInterval)
{
logCpuUsage();
lastDumpTime = lastTime;
}
logThreadState();
}
}
@ -254,16 +386,22 @@ public class ThreadMonitor extends AbstractLifeCycle implements Runnable
/**
* Collect thread info.
*/
private void collectThreadInfo()
private boolean collectThreadInfo()
{
boolean repeat = false;
try
{
Map<Thread,StackTraceElement[]> all = Thread.getAllStackTraces();
// Retrieve stack traces for all threads at once as it
// was proven to be an order of magnitude faster when
// retrieving a single thread stack trace.
Map<Thread,StackTraceElement[]> all = Thread.getAllStackTraces();
for (Map.Entry<Thread,StackTraceElement[]> entry : all.entrySet())
{
Thread thread = entry.getKey();
long threadId = thread.getId();
// Skip our own runner thread
if (threadId == _runner.getId())
{
continue;
@ -272,6 +410,7 @@ public class ThreadMonitor extends AbstractLifeCycle implements Runnable
ThreadMonitorInfo currMonitorInfo = _monitorInfo.get(Long.valueOf(threadId));
if (currMonitorInfo == null)
{
// Create thread info object for a new thread
currMonitorInfo = new ThreadMonitorInfo(thread);
currMonitorInfo.setStackTrace(entry.getValue());
currMonitorInfo.setCpuTime(getThreadCpuTime(threadId));
@ -280,19 +419,46 @@ public class ThreadMonitor extends AbstractLifeCycle implements Runnable
}
else
{
// Update the existing thread info object
currMonitorInfo.setStackTrace(entry.getValue());
currMonitorInfo.setCpuTime(getThreadCpuTime(threadId));
currMonitorInfo.setSampleTime(System.nanoTime());
currMonitorInfo.setSpinning(false);
// Stack trace count holds logging state
int count = currMonitorInfo.getTraceCount();
if (count >= 0 && currMonitorInfo.isSpinning())
{
// Thread was spinning and was logged before
if (count < _trailLength)
{
// Log another stack trace
currMonitorInfo.setTraceCount(count+1);
repeat = true;
continue;
}
// Reset spin flag and trace count
currMonitorInfo.setSpinning(false);
currMonitorInfo.setTraceCount(-1);
}
if (currMonitorInfo.getCpuUtilization() > _busyThreshold)
{
// Thread is busy
StackTraceElement[] lastStackTrace = currMonitorInfo.getStackTrace();
if (lastStackTrace != null
&& matchStackTraces(lastStackTrace, entry.getValue()))
{
// Thread is spinning
currMonitorInfo.setSpinning(true);
if (count < 0)
{
// Enable logging of spin status and stack traces
// only if the incoming trace count is negative
// that indicates a new scan for this thread
currMonitorInfo.setTraceCount(0);
repeat = (_trailLength > 0);
}
}
}
}
@ -302,13 +468,15 @@ public class ThreadMonitor extends AbstractLifeCycle implements Runnable
{
Log.debug(ex);
}
return repeat;
}
/* ------------------------------------------------------------ */
protected void logCpuUsage()
protected void logThreadInfo(boolean logAll)
{
if (_monitorInfo.size() > 0)
{
// Select thread objects for all live threads
long[] running = getAllThreadIds();
List<ThreadMonitorInfo> all = new ArrayList<ThreadMonitorInfo>();
for (int idx=0; idx<running.length; idx++)
@ -319,7 +487,8 @@ public class ThreadMonitor extends AbstractLifeCycle implements Runnable
all.add(info);
}
}
// Sort selected thread objects by their CPU utilization
Collections.sort(all, new Comparator<ThreadMonitorInfo>()
{
/* ------------------------------------------------------------ */
@ -329,37 +498,32 @@ public class ThreadMonitor extends AbstractLifeCycle implements Runnable
}
});
String format = "Thread %1$s[id:%2$d,%3$s] - %4$.2f%%";
String format = "Thread '%2$s'[%3$s,id:%1$d,cpu:%4$.2f%%]%5$s";
// Log thread information for threads that exceed logging threshold
// or log spinning threads if their trace count is zero
for (ThreadMonitorInfo info : all)
{
if (info.getCpuUtilization() > _logThreshold)
if (logAll && info.getCpuUtilization() > _logThreshold
|| info.isSpinning() && info.getTraceCount() == 0)
{
String message = String.format(format, info.getThreadName(),
info.getThreadId(), info.getThreadState(), info.getCpuUtilization());
String message = String.format(format,
info.getThreadId(), info.getThreadName(),
info.getThreadState(), info.getCpuUtilization(),
info.isSpinning() ? " SPINNING" : "");
_logger.info(message);
}
}
}
}
/* ------------------------------------------------------------ */
/**
* Output thread info to log.
*
* @param detected thread info list
*/
protected void logThreadState()
{
if (_monitorInfo.size() > 0)
{
long[] all = getAllThreadIds();
for (int idx=0; idx<all.length; idx++)
// Log stack traces for spinning threads with positive trace count
for (ThreadMonitorInfo info : all)
{
ThreadMonitorInfo info = _monitorInfo.get(all[idx]);
if (info != null && info.isSpinning())
if (info.isSpinning() && info.getTraceCount() >= 0)
{
String message = String.format("%1$s[id:%2$d,%3$s] is SPINNING",
info.getThreadName(), info.getThreadId(), info.getThreadState());
String message = String.format(format,
info.getThreadId(), info.getThreadName(),
info.getThreadState(), info.getCpuUtilization(),
" STACK TRACE");
_logger.warn(new ThreadMonitorException(message, info.getStackTrace()));
}
}

View File

@ -15,16 +15,16 @@
package org.eclipse.jetty.monitor;
/* ------------------------------------------------------------ */
/**
*/
public class ThreadMonitorInfo
{
private Thread _thread;
private StackTraceElement[] _stackTrace;
private boolean _threadSpinning;
private boolean _threadSpinning = false;
private int _traceCount = -1;
private long _prevCpuTime;
private long _prevSampleTime;
@ -118,6 +118,28 @@ public class ThreadMonitorInfo
_threadSpinning = value;
}
/* ------------------------------------------------------------ */
/**
* Sets the trace count.
*
* @param traceCount the new trace count
*/
public void setTraceCount(int traceCount)
{
_traceCount = traceCount;
}
/* ------------------------------------------------------------ */
/**
* Gets the trace count.
*
* @return the trace count
*/
public int getTraceCount()
{
return _traceCount;
}
/* ------------------------------------------------------------ */
/**
* @return the CPU time of the thread

View File

@ -16,6 +16,7 @@ package org.eclipse.jetty.monitor;
import static org.junit.Assert.assertTrue;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;
import org.junit.Test;
@ -26,32 +27,39 @@ import org.junit.Test;
*/
public class ThreadMonitorTest
{
public final static int DURATION=9000;
public final static int DURATION=4000;
@Test
public void monitorTest() throws Exception
{
final AtomicInteger countCpuLogs=new AtomicInteger(0);
final AtomicInteger countStateLogs=new AtomicInteger(0);
final AtomicInteger countLogs=new AtomicInteger(0);
final AtomicInteger countSpin=new AtomicInteger(0);
ThreadMonitor monitor = new ThreadMonitor(1000,50,1)
ThreadMonitor monitor = new ThreadMonitor(1000,50,1,1)
{
@Override
protected void logCpuUsage()
protected void logThreadInfo(boolean logAll)
{
countCpuLogs.incrementAndGet();
super.logCpuUsage();
}
@Override
protected void logThreadState()
{
countStateLogs.incrementAndGet();
super.logThreadState();
if (logAll)
countLogs.incrementAndGet();
else
countSpin.incrementAndGet();
super.logThreadInfo(logAll);
}
};
monitor.logCpuUsage(2000,1);
monitor.logCpuUsage(2000,0);
monitor.start();
Random rnd = new Random();
for (long cnt=0; cnt<100; cnt++)
{
long value = rnd.nextLong() % 50 + 50;
Sleeper sleeper = new Sleeper(value);
Thread runner = new Thread(sleeper);
runner.setDaemon(true);
runner.start();
}
Spinner spinner = new Spinner();
Thread runner = new Thread(spinner);
runner.start();
@ -61,8 +69,8 @@ public class ThreadMonitorTest
spinner.setDone();
monitor.stop();
assertTrue(countCpuLogs.get() >= 1);
assertTrue(countStateLogs.get() >= 1);
assertTrue(countLogs.get() >= 1);
assertTrue(countSpin.get() >= 2);
}
@ -71,8 +79,6 @@ public class ThreadMonitorTest
private volatile boolean done = false;
/* ------------------------------------------------------------ */
/**
*/
public void setDone()
{
done = true;
@ -99,4 +105,35 @@ public class ThreadMonitorTest
System.err.println("Bingo!");
}
}
private class Sleeper implements Runnable
{
private long _value;
/* ------------------------------------------------------------ */
public Sleeper(long value)
{
_value = value;
}
/* ------------------------------------------------------------ */
public void run()
{
try
{
fn(_value);
}
catch (InterruptedException e) {}
}
/* ------------------------------------------------------------ */
public long fn(long value) throws InterruptedException
{
long result = value > 1 ? fn(value-1) : 1;
Thread.sleep(50);
return result;
}
}
}