From 0923346b6117e5dc0f9d18f4c00d7fd5fe816367 Mon Sep 17 00:00:00 2001 From: Apekshit Date: Mon, 30 May 2016 20:02:33 -0700 Subject: [PATCH] HBASE-15917 Flaky tests dashboard. (Apekshit) Change-Id: If0e4299b173d3d76e0bdb12a3de080e5b51d8748 Signed-off-by: stack --- dev-support/findHangingTests.py | 31 ++++-- dev-support/report-flakies.py | 164 ++++++++++++++++++++++++++++---- 2 files changed, 168 insertions(+), 27 deletions(-) diff --git a/dev-support/findHangingTests.py b/dev-support/findHangingTests.py index ce49f48a946..9ef87080494 100755 --- a/dev-support/findHangingTests.py +++ b/dev-support/findHangingTests.py @@ -23,7 +23,15 @@ import re import requests import sys -def get_hanging_tests(console_url): +# Returns [[all tests], [failed tests], [timeout tests], [hanging tests]] +# Definitions: +# All tests: All testcases which were run. +# Hanging test: A testcase which started but never finished. +# Failed test: Testcase which encountered any kind of failure. It can be failing atomic tests, +# timed out tests, etc +# Timeout test: A Testcase which encountered timeout. Naturally, all timeout tests will be +# included in failed tests. +def get_bad_tests(console_url): response = requests.get(console_url) if response.status_code != 200: print "Error getting consoleText. Response = {} {}".format( @@ -33,6 +41,7 @@ def get_hanging_tests(console_url): all_tests = set() hanging_tests = set() failed_tests = set() + timeout_tests = set() for line in response.content.splitlines(): result1 = re.match("^Running org.apache.hadoop.hbase.(\w*\.)*(\w*)", line) if result1: @@ -45,9 +54,13 @@ def get_hanging_tests(console_url): hanging_tests.remove(test_case) if "FAILURE!" in line: failed_tests.add(test_case) - print "Result > total tests: {:4} hanging : {:4} failed : {:4}".format( - len(all_tests), len(hanging_tests), len(failed_tests)) - return [all_tests, hanging_tests, failed_tests] + result3 = re.match("^\s+(\w*).*\sTestTimedOut", line) + if result3: + test_case = result3.group(1) + timeout_tests.add(test_case) + print "Result > total tests: {:4} failed : {:4} timedout : {:4} hanging : {:4}".format( + len(all_tests), len(failed_tests), len(timeout_tests), len(hanging_tests)) + return [all_tests, failed_tests, timeout_tests, hanging_tests] if __name__ == "__main__": if len(sys.argv) != 2 : @@ -55,11 +68,15 @@ if __name__ == "__main__": sys.exit(1) print "Fetching {}".format(sys.argv[1]) - [all_tests, hanging_tests, failed_tests] = get_hanging_tests(sys.argv[1]) + [all_tests, failed_tests, timedout_tests, hanging_tests] = get_bad_tests(sys.argv[1]) print "Found {} hanging tests:".format(len(hanging_tests)) for test in hanging_tests: print test print "\n" - print "Found {} failed tests:".format(len(failed_tests)) + print "Found {} failed tests of which {} timed out:".format( + len(failed_tests), len(timedout_tests)) for test in failed_tests: - print test + print "{0} {1}".format(test, ("(Timed Out)" if test in timedout_tests else "")) + + print ("\nA test may have had 0 or more atomic test failures before it timed out. So a " + "'Timed Out' test may have other errors too.") diff --git a/dev-support/report-flakies.py b/dev-support/report-flakies.py index 1199afefec6..bdc88dc9f94 100755 --- a/dev-support/report-flakies.py +++ b/dev-support/report-flakies.py @@ -21,9 +21,9 @@ # Print help: report-flakies.py -h import argparse import findHangingTests +from jinja2 import Template import logging import requests -import sys parser = argparse.ArgumentParser() parser.add_argument("--urls", metavar="url[ max-builds]", action="append", required=True, @@ -54,7 +54,7 @@ def get_bad_tests(build_url): logger.info("Skipping this build since it is in progress.") return {} console_url = build_url + "/consoleText" - return findHangingTests.get_hanging_tests(console_url) + return findHangingTests.get_bad_tests(console_url) # If any url is of type multi-configuration project (i.e. has key 'activeConfigurations'), @@ -79,6 +79,10 @@ def expand_multi_configuration_projects(urls_list): # Set of timeout/failed tests across all given urls. all_timeout_tests = set() all_failed_tests = set() +all_hanging_tests = set() +# Contains { : { : { 'all': [], 'failed': [], +# 'timeout': [], 'hanging': [] } } } +url_to_bad_test_results = {} # Iterates over each url, gets test results and prints flaky tests. expanded_urls = expand_multi_configuration_projects(args.urls) @@ -112,33 +116,41 @@ for url_max_build in expanded_urls: # Collect list of bad tests. bad_tests = set() for build in build_id_to_results: - [_, timeout_tests, failed_tests] = build_id_to_results[build] + [_, failed_tests, timeout_tests, hanging_tests] = build_id_to_results[build] all_timeout_tests.update(timeout_tests) all_failed_tests.update(failed_tests) - bad_tests.update(timeout_tests.union(failed_tests)) + all_hanging_tests.update(hanging_tests) + # Note that timedout tests are already included in failed tests. + bad_tests.update(failed_tests.union(hanging_tests)) - # Get total and failed/timeout times for each bad test. - build_counts = {key : {'total': 0, 'timeout': 0, 'fail': 0 } for key in bad_tests} + # For each bad test, get build ids where it ran, timed out, failed or hanged. + test_to_build_ids = {key : {'all' : set(), 'timeout': set(), 'failed': set(), 'hanging' : set()} + for key in bad_tests} for build in build_id_to_results: - [all_tests, timeout_tests, failed_tests] = build_id_to_results[build] - for bad_test in bad_tests: + [all_tests, failed_tests, timeout_tests, hanging_tests] = build_id_to_results[build] + for bad_test in test_to_build_ids: if all_tests.issuperset([bad_test]): - build_counts[bad_test]["total"] += 1 + test_to_build_ids[bad_test]["all"].add(build) if timeout_tests.issuperset([bad_test]): - build_counts[bad_test]['timeout'] += 1 + test_to_build_ids[bad_test]['timeout'].add(build) if failed_tests.issuperset([bad_test]): - build_counts[bad_test]['fail'] += 1 + test_to_build_ids[bad_test]['failed'].add(build) + if hanging_tests.issuperset([bad_test]): + test_to_build_ids[bad_test]['hanging'].add(build) + url_to_bad_test_results[url] = test_to_build_ids - if len(bad_tests) > 0: + if len(test_to_build_ids) > 0: print "URL: {}".format(url) - print "{:>60} {:25} {:10} {}".format( - "Test Name", "Bad Runs(failed/timeout)", "Total Runs", "Flakyness") - for bad_test in bad_tests: - fail = build_counts[bad_test]['fail'] - timeout = build_counts[bad_test]['timeout'] - total = build_counts[bad_test]['total'] - print "{:>60} {:10} ({:4} / {:4}) {:10} {:2.0f}%".format( - bad_test, fail + timeout, fail, timeout, total, (fail + timeout) * 100.0 / total) + print "{:>60} {:10} {:25} {}".format( + "Test Name", "Total Runs", "Bad Runs(failed/timeout/hanging)", "Flakyness") + for bad_test in test_to_build_ids: + failed = len(test_to_build_ids[bad_test]['failed']) + timeout = len(test_to_build_ids[bad_test]['timeout']) + hanging = len(test_to_build_ids[bad_test]['hanging']) + total = len(test_to_build_ids[bad_test]['all']) + print "{:>60} {:10} {:7} ( {:4} / {:5} / {:5} ) {:2.0f}%".format( + bad_test, total, failed + timeout, failed, timeout, hanging, + (failed + timeout) * 100.0 / total) else: print "No flaky tests founds." if len(build_ids) == len(build_ids_without_tests_run): @@ -164,3 +176,115 @@ if args.mvn: with open("./failed", "w") as file: file.write(",".join(all_failed_tests)) + + +template = Template(""" + + + + Apache HBase Flaky Dashboard + + + +

+ +      + + Apache HBase Flaky Tests Dashboard + +

+

+ {% set counter = 0 %} + {% for url in results %} + {% set result = results[url] %} + {# Dedup ids since test names may duplicate across urls #} + {% set counter = counter + 1 %} + Job : {{ url |e }} + 🔗 +

+ + + + + + + + {% for test in result %} + {% set all = result[test]['all'] %} + {% set failed = result[test]['failed'] %} + {% set timeout = result[test]['timeout'] %} + {% set hanging = result[test]['hanging'] %} + {% set success = all.difference(failed).difference(hanging) %} + + + {% set flakyness = + (failed|length + hanging|length) * 100 / all|length %} + {% if flakyness == 100 %} + + + + + {% endfor %} +
Test NameFlakynessFailed/Timeout/HangingRun Ids
{{ test |e }} + {% else %} + + {% endif %} + {{ "{:.1f}% ({} / {})".format( + flakyness, failed|length + hanging|length, all|length) }} + + {{ failed|length }} / {{ timeout|length }} / {{ hanging|length }} + + {% set id = "details_" ~ test ~ "_" ~ counter %} + +
+ +
+


+ {% endfor %} + + + + """) + +with open("dashboard.html", "w") as f: + f.write(template.render(results=url_to_bad_test_results))