HBASE-15917 Flaky tests dashboard. (Apekshit)

Change-Id: If0e4299b173d3d76e0bdb12a3de080e5b51d8748

Signed-off-by: stack <stack@apache.org>
This commit is contained in:
Apekshit 2016-05-30 20:02:33 -07:00 committed by stack
parent e6d613de70
commit 0923346b61
2 changed files with 168 additions and 27 deletions

View File

@ -23,7 +23,15 @@ import re
import requests
import sys
def get_hanging_tests(console_url):
# Returns [[all tests], [failed tests], [timeout tests], [hanging tests]]
# Definitions:
# All tests: All testcases which were run.
# Hanging test: A testcase which started but never finished.
# Failed test: Testcase which encountered any kind of failure. It can be failing atomic tests,
# timed out tests, etc
# Timeout test: A Testcase which encountered timeout. Naturally, all timeout tests will be
# included in failed tests.
def get_bad_tests(console_url):
response = requests.get(console_url)
if response.status_code != 200:
print "Error getting consoleText. Response = {} {}".format(
@ -33,6 +41,7 @@ def get_hanging_tests(console_url):
all_tests = set()
hanging_tests = set()
failed_tests = set()
timeout_tests = set()
for line in response.content.splitlines():
result1 = re.match("^Running org.apache.hadoop.hbase.(\w*\.)*(\w*)", line)
if result1:
@ -45,9 +54,13 @@ def get_hanging_tests(console_url):
if "FAILURE!" in line:
print "Result > total tests: {:4} hanging : {:4} failed : {:4}".format(
len(all_tests), len(hanging_tests), len(failed_tests))
return [all_tests, hanging_tests, failed_tests]
result3 = re.match("^\s+(\w*).*\sTestTimedOut", line)
if result3:
test_case = result3.group(1)
print "Result > total tests: {:4} failed : {:4} timedout : {:4} hanging : {:4}".format(
len(all_tests), len(failed_tests), len(timeout_tests), len(hanging_tests))
return [all_tests, failed_tests, timeout_tests, hanging_tests]
if __name__ == "__main__":
if len(sys.argv) != 2 :
@ -55,11 +68,15 @@ if __name__ == "__main__":
print "Fetching {}".format(sys.argv[1])
[all_tests, hanging_tests, failed_tests] = get_hanging_tests(sys.argv[1])
[all_tests, failed_tests, timedout_tests, hanging_tests] = get_bad_tests(sys.argv[1])
print "Found {} hanging tests:".format(len(hanging_tests))
for test in hanging_tests:
print test
print "\n"
print "Found {} failed tests:".format(len(failed_tests))
print "Found {} failed tests of which {} timed out:".format(
len(failed_tests), len(timedout_tests))
for test in failed_tests:
print test
print "{0} {1}".format(test, ("(Timed Out)" if test in timedout_tests else ""))
print ("\nA test may have had 0 or more atomic test failures before it timed out. So a "
"'Timed Out' test may have other errors too.")

View File

@ -21,9 +21,9 @@
# Print help: report-flakies.py -h
import argparse
import findHangingTests
from jinja2 import Template
import logging
import requests
import sys
parser = argparse.ArgumentParser()
parser.add_argument("--urls", metavar="url[ max-builds]", action="append", required=True,
@ -54,7 +54,7 @@ def get_bad_tests(build_url):
logger.info("Skipping this build since it is in progress.")
return {}
console_url = build_url + "/consoleText"
return findHangingTests.get_hanging_tests(console_url)
return findHangingTests.get_bad_tests(console_url)
# If any url is of type multi-configuration project (i.e. has key 'activeConfigurations'),
@ -79,6 +79,10 @@ def expand_multi_configuration_projects(urls_list):
# Set of timeout/failed tests across all given urls.
all_timeout_tests = set()
all_failed_tests = set()
all_hanging_tests = set()
# Contains { <url> : { <bad_test> : { 'all': [<build ids>], 'failed': [<build ids>],
# 'timeout': [<build ids>], 'hanging': [<builds ids>] } } }
url_to_bad_test_results = {}
# Iterates over each url, gets test results and prints flaky tests.
expanded_urls = expand_multi_configuration_projects(args.urls)
@ -112,33 +116,41 @@ for url_max_build in expanded_urls:
# Collect list of bad tests.
bad_tests = set()
for build in build_id_to_results:
[_, timeout_tests, failed_tests] = build_id_to_results[build]
[_, failed_tests, timeout_tests, hanging_tests] = build_id_to_results[build]
# Note that timedout tests are already included in failed tests.
# Get total and failed/timeout times for each bad test.
build_counts = {key : {'total': 0, 'timeout': 0, 'fail': 0 } for key in bad_tests}
# For each bad test, get build ids where it ran, timed out, failed or hanged.
test_to_build_ids = {key : {'all' : set(), 'timeout': set(), 'failed': set(), 'hanging' : set()}
for key in bad_tests}
for build in build_id_to_results:
[all_tests, timeout_tests, failed_tests] = build_id_to_results[build]
for bad_test in bad_tests:
[all_tests, failed_tests, timeout_tests, hanging_tests] = build_id_to_results[build]
for bad_test in test_to_build_ids:
if all_tests.issuperset([bad_test]):
build_counts[bad_test]["total"] += 1
if timeout_tests.issuperset([bad_test]):
build_counts[bad_test]['timeout'] += 1
if failed_tests.issuperset([bad_test]):
build_counts[bad_test]['fail'] += 1
if hanging_tests.issuperset([bad_test]):
url_to_bad_test_results[url] = test_to_build_ids
if len(bad_tests) > 0:
if len(test_to_build_ids) > 0:
print "URL: {}".format(url)
print "{:>60} {:25} {:10} {}".format(
"Test Name", "Bad Runs(failed/timeout)", "Total Runs", "Flakyness")
for bad_test in bad_tests:
fail = build_counts[bad_test]['fail']
timeout = build_counts[bad_test]['timeout']
total = build_counts[bad_test]['total']
print "{:>60} {:10} ({:4} / {:4}) {:10} {:2.0f}%".format(
bad_test, fail + timeout, fail, timeout, total, (fail + timeout) * 100.0 / total)
print "{:>60} {:10} {:25} {}".format(
"Test Name", "Total Runs", "Bad Runs(failed/timeout/hanging)", "Flakyness")
for bad_test in test_to_build_ids:
failed = len(test_to_build_ids[bad_test]['failed'])
timeout = len(test_to_build_ids[bad_test]['timeout'])
hanging = len(test_to_build_ids[bad_test]['hanging'])
total = len(test_to_build_ids[bad_test]['all'])
print "{:>60} {:10} {:7} ( {:4} / {:5} / {:5} ) {:2.0f}%".format(
bad_test, total, failed + timeout, failed, timeout, hanging,
(failed + timeout) * 100.0 / total)
print "No flaky tests founds."
if len(build_ids) == len(build_ids_without_tests_run):
@ -164,3 +176,115 @@ if args.mvn:
with open("./failed", "w") as file:
template = Template("""
<!DOCTYPE html>
<title>Apache HBase Flaky Dashboard</title>
<style type="text/css">
table {
table-layout: fixed;
th {
font-size: 15px;
td {
font-size: 18px;
vertical-align: text-top;
overflow: hidden;
white-space: nowrap;
.show_hide_button {
font-size: 100%;
padding: .5em 1em;
border: 0 rgba(0,0,0,0);
border-radius: 10px;
<img style="vertical-align:middle; display:inline-block;" height="80px"
<span style="font-size:50px; vertical-align:middle; display:inline-block;">
Apache HBase Flaky Tests Dashboard
{% set counter = 0 %}
{% for url in results %}
{% set result = results[url] %}
{# Dedup ids since test names may duplicate across urls #}
{% set counter = counter + 1 %}
<span style="font-size:20px; font-weight:bold;">Job : {{ url |e }}
<a href="{{ url |e }}" style="text-decoration:none;">&#x1f517;</a></span>
<th width="400px">Test Name</th>
<th width="150px">Flakyness</th>
<th width="200px">Failed/Timeout/Hanging</th>
<th>Run Ids</th>
{% for test in result %}
{% set all = result[test]['all'] %}
{% set failed = result[test]['failed'] %}
{% set timeout = result[test]['timeout'] %}
{% set hanging = result[test]['hanging'] %}
{% set success = all.difference(failed).difference(hanging) %}
<td>{{ test |e }}</td>
{% set flakyness =
(failed|length + hanging|length) * 100 / all|length %}
{% if flakyness == 100 %}
<td align="middle" style="background-color:#FF9999;">
{% else %}
<td align="middle">
{% endif %}
{{ "{:.1f}% ({} / {})".format(
flakyness, failed|length + hanging|length, all|length) }}
<td align="middle">
{{ failed|length }} / {{ timeout|length }} / {{ hanging|length }}
{% set id = "details_" ~ test ~ "_" ~ counter %}
<button class="show_hide_button" onclick="toggle('{{ id }}')">
<div id="{{ id }}"
style="display: none; width:500px; white-space: normal">
{% macro print_run_ids(url, run_ids) -%}
{% for i in run_ids %}
<a href="{{ url }}/{{ i }}">{{ i }}</a>&nbsp;
{% endfor %}
{%- endmacro %}
Failed : {{ print_run_ids(url, failed) }}<br/>
Timed Out : {{ print_run_ids(url, timeout) }}<br/>
Hanging : {{ print_run_ids(url, hanging) }}<br/>
Succeeded : {{ print_run_ids(url, success) }}
{% endfor %}
{% endfor %}
<script type="text/javascript">
function toggle(id) {
if (document.getElementById(id).style["display"] == "none") {
document.getElementById(id).style["display"] = "block";
} else {
document.getElementById(id).style["display"] = "none";
with open("dashboard.html", "w") as f: