hbase/dev-support/flaky-tests/findHangingTests.py

#!/usr/bin/env python2
##
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# pylint: disable=invalid-name
# To disable 'invalid constant name' warnings.

"""
# Script to find hanging test from Jenkins build output
# usage: ./findHangingTests.py <url of Jenkins build console>
"""

import re
import sys
import requests

# If any of these strings appear in the console output, it's a build one should probably ignore
# for analyzing failed/hanging tests.
BAD_RUN_STRINGS = [
    "Slave went offline during the build",  # Machine went down, can't do anything about it.
    "The forked VM terminated without properly saying goodbye",  # JVM crashed.
]


def get_bad_tests(console_url):
    """
    Returns [[all tests], [failed tests], [timeout tests], [hanging tests]] if successfully gets
    the build information.
    If there is error getting console text or if there are blacklisted strings in console text,
    then returns None.
    """
    response = requests.get(console_url)
    if response.status_code != 200:
        print "Error getting consoleText. Response = {} {}".format(
            response.status_code, response.reason)
        return

    # All tests: All testcases which were run.
    # Hanging test: A testcase which started but never finished.
    # Failed test: Testcase which encountered any kind of failure. It can be failing atomic tests,
    #   timed out tests, etc
    # Timeout test: A Testcase which encountered timeout. Naturally, all timeout tests will be
    #   included in failed tests.
    all_tests_set = set()
    hanging_tests_set = set()
    failed_tests_set = set()
    timeout_tests_set = set()
    for line in response.content.splitlines():
        result1 = re.findall("Running org.apache.hadoop.hbase.(.*)", line)
        if len(result1) == 1:
            test_case = result1[0]
            if test_case in all_tests_set:
                print ("ERROR! Multiple tests with same name '{}'. Might get wrong results "
                       "for this test.".format(test_case))
            else:
                hanging_tests_set.add(test_case)
                all_tests_set.add(test_case)
        result2 = re.findall("Tests run:.*?- in org.apache.hadoop.hbase.(.*)", line)
        if len(result2) == 1:
            test_case = result2[0]
            if "FAILURE!" in line:
                failed_tests_set.add(test_case)
            if test_case not in hanging_tests_set:
                print ("ERROR! No test '{}' found in hanging_tests. Might get wrong results "
                       "for this test. This may also happen if maven is set to retry failing "
                       "tests.".format(test_case))
            else:
                hanging_tests_set.remove(test_case)
        result3 = re.match("^\\s+(\\w*).*\\sTestTimedOut", line)
        if result3:
            test_case = result3.group(1)
            timeout_tests_set.add(test_case)
        for bad_string in BAD_RUN_STRINGS:
            if re.match(".*" + bad_string + ".*", line):
                print "Bad string found in build:\n > {}".format(line)
    print "Result > total tests: {:4}   failed : {:4}  timedout : {:4}  hanging : {:4}".format(
        len(all_tests_set), len(failed_tests_set), len(timeout_tests_set), len(hanging_tests_set))
    return [all_tests_set, failed_tests_set, timeout_tests_set, hanging_tests_set]

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print "ERROR : Provide the jenkins job console URL as the only argument."
        sys.exit(1)

    print "Fetching {}".format(sys.argv[1])
    result = get_bad_tests(sys.argv[1])
    if not result:
        sys.exit(1)
    [all_tests, failed_tests, timedout_tests, hanging_tests] = result

    print "Found {} hanging tests:".format(len(hanging_tests))
    for test in hanging_tests:
        print test
    print "\n"
    print "Found {} failed tests of which {} timed out:".format(
        len(failed_tests), len(timedout_tests))
    for test in failed_tests:
        print "{0} {1}".format(test, ("(Timed Out)" if test in timedout_tests else ""))

    print ("\nA test may have had 0 or more atomic test failures before it timed out. So a "
           "'Timed Out' test may have other errors too.")
HBASE-26747 Use python2 instead of python in our python scripts (#4102) Signed-off-by: Xin Sun <ddupgs@gmail.com> 2022-02-09 01:55:16 -05:00			`#!/usr/bin/env python2`
HBASE-12195 Fix dev-support/findHangingTests (Manukranth Kolloju) 2014-10-07 18:03:50 -04:00			`##`
			`# Licensed to the Apache Software Foundation (ASF) under one`
			`# or more contributor license agreements. See the NOTICE file`
			`# distributed with this work for additional information`
			`# regarding copyright ownership. The ASF licenses this file`
			`# to you under the Apache License, Version 2.0 (the`
			`# "License"); you may not use this file except in compliance`
			`# with the License. You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
HBASE-16434 - Add date and count of flaky tests - Sort tests by decreasing order of flakyness - Internal links to each job's results - Correct calculation of total bad runs for a test - Fixes pylint errors Change-Id: I12ebc32ccec14c5ff389464b4de8ae93653c876c 2016-08-17 03:47:16 -04:00
			`# pylint: disable=invalid-name`
			`# To disable 'invalid constant name' warnings.`

			`"""`
			`# Script to find hanging test from Jenkins build output`
HBASE-12195 Fix dev-support/findHangingTests (Manukranth Kolloju) 2014-10-07 18:03:50 -04:00			`# usage: ./findHangingTests.py <url of Jenkins build console>`
HBASE-16434 - Add date and count of flaky tests - Sort tests by decreasing order of flakyness - Internal links to each job's results - Correct calculation of total bad runs for a test - Fixes pylint errors Change-Id: I12ebc32ccec14c5ff389464b4de8ae93653c876c 2016-08-17 03:47:16 -04:00			`"""`

HBASE-15896 Add timeout tests to flaky list from report-flakies.py - Adds timed-out tests to flaky list. Dumpes two new files for reference, 'timeout' and 'failed' for corresponding list of bad tests. - Set --max-builds for different urls separately. This is needed so that we can turn the knobs for post-commit job and flaky-tests job separately. (Apekshit) Change-Id: I88e1f9a8924eed1b5010106e73edede3aff34b0b Signed-off-by: stack <stack@apache.org> 2016-05-24 23:39:54 -04:00			`import re`
HBASE-12195 Fix dev-support/findHangingTests (Manukranth Kolloju) 2014-10-07 18:03:50 -04:00			`import sys`
HBASE-16434 - Add date and count of flaky tests - Sort tests by decreasing order of flakyness - Internal links to each job's results - Correct calculation of total bad runs for a test - Fixes pylint errors Change-Id: I12ebc32ccec14c5ff389464b4de8ae93653c876c 2016-08-17 03:47:16 -04:00			`import requests`
HBASE-16038 Ignore JVM crashes or machine shutdown failures in report-flakies. Change-Id: If49acd704e827b289c75f449a6180038b297d647 2016-06-15 18:39:49 -04:00
			`# If any of these strings appear in the console output, it's a build one should probably ignore`
			`# for analyzing failed/hanging tests.`
			`BAD_RUN_STRINGS = [`
			`"Slave went offline during the build", # Machine went down, can't do anything about it.`
			`"The forked VM terminated without properly saying goodbye", # JVM crashed.`
			`]`

HBASE-16434 - Add date and count of flaky tests - Sort tests by decreasing order of flakyness - Internal links to each job's results - Correct calculation of total bad runs for a test - Fixes pylint errors Change-Id: I12ebc32ccec14c5ff389464b4de8ae93653c876c 2016-08-17 03:47:16 -04:00
HBASE-15917 Flaky tests dashboard. (Apekshit) Change-Id: If0e4299b173d3d76e0bdb12a3de080e5b51d8748 Signed-off-by: stack <stack@apache.org> 2016-05-30 23:02:33 -04:00			`def get_bad_tests(console_url):`
HBASE-16434 - Add date and count of flaky tests - Sort tests by decreasing order of flakyness - Internal links to each job's results - Correct calculation of total bad runs for a test - Fixes pylint errors Change-Id: I12ebc32ccec14c5ff389464b4de8ae93653c876c 2016-08-17 03:47:16 -04:00			`"""`
			`Returns [[all tests], [failed tests], [timeout tests], [hanging tests]] if successfully gets`
			`the build information.`
			`If there is error getting console text or if there are blacklisted strings in console text,`
			`then returns None.`
			`"""`
HBASE-15896 Add timeout tests to flaky list from report-flakies.py - Adds timed-out tests to flaky list. Dumpes two new files for reference, 'timeout' and 'failed' for corresponding list of bad tests. - Set --max-builds for different urls separately. This is needed so that we can turn the knobs for post-commit job and flaky-tests job separately. (Apekshit) Change-Id: I88e1f9a8924eed1b5010106e73edede3aff34b0b Signed-off-by: stack <stack@apache.org> 2016-05-24 23:39:54 -04:00			`response = requests.get(console_url)`
			`if response.status_code != 200:`
			`print "Error getting consoleText. Response = {} {}".format(`
			`response.status_code, response.reason)`
HBASE-16384 Update report-flakies.py script to allow specifying a list of build ids to be excluded. Also fixes some pylint errors. Change-Id: I4620756c277c36a1ddb6d6cbd4d3e380da8442d7 2016-08-09 17:50:25 -04:00			`return`
HBASE-15896 Add timeout tests to flaky list from report-flakies.py - Adds timed-out tests to flaky list. Dumpes two new files for reference, 'timeout' and 'failed' for corresponding list of bad tests. - Set --max-builds for different urls separately. This is needed so that we can turn the knobs for post-commit job and flaky-tests job separately. (Apekshit) Change-Id: I88e1f9a8924eed1b5010106e73edede3aff34b0b Signed-off-by: stack <stack@apache.org> 2016-05-24 23:39:54 -04:00
HBASE-16434 - Add date and count of flaky tests - Sort tests by decreasing order of flakyness - Internal links to each job's results - Correct calculation of total bad runs for a test - Fixes pylint errors Change-Id: I12ebc32ccec14c5ff389464b4de8ae93653c876c 2016-08-17 03:47:16 -04:00			`# All tests: All testcases which were run.`
			`# Hanging test: A testcase which started but never finished.`
			`# Failed test: Testcase which encountered any kind of failure. It can be failing atomic tests,`
			`# timed out tests, etc`
			`# Timeout test: A Testcase which encountered timeout. Naturally, all timeout tests will be`
			`# included in failed tests.`
			`all_tests_set = set()`
			`hanging_tests_set = set()`
			`failed_tests_set = set()`
			`timeout_tests_set = set()`
HBASE-15896 Add timeout tests to flaky list from report-flakies.py - Adds timed-out tests to flaky list. Dumpes two new files for reference, 'timeout' and 'failed' for corresponding list of bad tests. - Set --max-builds for different urls separately. This is needed so that we can turn the knobs for post-commit job and flaky-tests job separately. (Apekshit) Change-Id: I88e1f9a8924eed1b5010106e73edede3aff34b0b Signed-off-by: stack <stack@apache.org> 2016-05-24 23:39:54 -04:00			`for line in response.content.splitlines():`
HBASE-18341 Generalize regex matchers in findHangingTests.py script to match new consoleText of trunk build. Change-Id: I0a4215827d3d561eef3f583da666c617f690d934 2017-07-08 03:49:31 -04:00			`result1 = re.findall("Running org.apache.hadoop.hbase.(.*)", line)`
			`if len(result1) == 1:`
			`test_case = result1[0]`
HBASE-16434 - Add date and count of flaky tests - Sort tests by decreasing order of flakyness - Internal links to each job's results - Correct calculation of total bad runs for a test - Fixes pylint errors Change-Id: I12ebc32ccec14c5ff389464b4de8ae93653c876c 2016-08-17 03:47:16 -04:00			`if test_case in all_tests_set:`
			`print ("ERROR! Multiple tests with same name '{}'. Might get wrong results "`
HBASE-15938 submit-patch.py: Don't crash if there are tests with same name. Refactor: Split out flaky dashboard html template to separate file. (Apekshit) Change-Id: Ie5875bdefbf886984a57dfc85661be2ac9592a7b Signed-off-by: stack <stack@apache.org> 2016-06-01 22:12:50 -04:00			`"for this test.".format(test_case))`
			`else:`
HBASE-16434 - Add date and count of flaky tests - Sort tests by decreasing order of flakyness - Internal links to each job's results - Correct calculation of total bad runs for a test - Fixes pylint errors Change-Id: I12ebc32ccec14c5ff389464b4de8ae93653c876c 2016-08-17 03:47:16 -04:00			`hanging_tests_set.add(test_case)`
			`all_tests_set.add(test_case)`
HBASE-18341 Generalize regex matchers in findHangingTests.py script to match new consoleText of trunk build. Change-Id: I0a4215827d3d561eef3f583da666c617f690d934 2017-07-08 03:49:31 -04:00			`result2 = re.findall("Tests run:.?- in org.apache.hadoop.hbase.(.)", line)`
			`if len(result2) == 1:`
			`test_case = result2[0]`
HBASE-15896 Add timeout tests to flaky list from report-flakies.py - Adds timed-out tests to flaky list. Dumpes two new files for reference, 'timeout' and 'failed' for corresponding list of bad tests. - Set --max-builds for different urls separately. This is needed so that we can turn the knobs for post-commit job and flaky-tests job separately. (Apekshit) Change-Id: I88e1f9a8924eed1b5010106e73edede3aff34b0b Signed-off-by: stack <stack@apache.org> 2016-05-24 23:39:54 -04:00			`if "FAILURE!" in line:`
HBASE-16434 - Add date and count of flaky tests - Sort tests by decreasing order of flakyness - Internal links to each job's results - Correct calculation of total bad runs for a test - Fixes pylint errors Change-Id: I12ebc32ccec14c5ff389464b4de8ae93653c876c 2016-08-17 03:47:16 -04:00			`failed_tests_set.add(test_case)`
			`if test_case not in hanging_tests_set:`
HBASE-19382 Update report-flakies.py script to handle yetus builds. 2017-11-29 20:53:16 -05:00			`print ("ERROR! No test '{}' found in hanging_tests. Might get wrong results "`
			`"for this test. This may also happen if maven is set to retry failing "`
			`"tests.".format(test_case))`
HBASE-15938 submit-patch.py: Don't crash if there are tests with same name. Refactor: Split out flaky dashboard html template to separate file. (Apekshit) Change-Id: Ie5875bdefbf886984a57dfc85661be2ac9592a7b Signed-off-by: stack <stack@apache.org> 2016-06-01 22:12:50 -04:00			`else:`
HBASE-16434 - Add date and count of flaky tests - Sort tests by decreasing order of flakyness - Internal links to each job's results - Correct calculation of total bad runs for a test - Fixes pylint errors Change-Id: I12ebc32ccec14c5ff389464b4de8ae93653c876c 2016-08-17 03:47:16 -04:00			`hanging_tests_set.remove(test_case)`
			`result3 = re.match("^\\s+(\\w).\\sTestTimedOut", line)`
HBASE-15917 Flaky tests dashboard. (Apekshit) Change-Id: If0e4299b173d3d76e0bdb12a3de080e5b51d8748 Signed-off-by: stack <stack@apache.org> 2016-05-30 23:02:33 -04:00			`if result3:`
			`test_case = result3.group(1)`
HBASE-16434 - Add date and count of flaky tests - Sort tests by decreasing order of flakyness - Internal links to each job's results - Correct calculation of total bad runs for a test - Fixes pylint errors Change-Id: I12ebc32ccec14c5ff389464b4de8ae93653c876c 2016-08-17 03:47:16 -04:00			`timeout_tests_set.add(test_case)`
HBASE-16038 Ignore JVM crashes or machine shutdown failures in report-flakies. Change-Id: If49acd704e827b289c75f449a6180038b297d647 2016-06-15 18:39:49 -04:00			`for bad_string in BAD_RUN_STRINGS:`
			`if re.match("." + bad_string + ".", line):`
			`print "Bad string found in build:\n > {}".format(line)`
HBASE-15917 Flaky tests dashboard. (Apekshit) Change-Id: If0e4299b173d3d76e0bdb12a3de080e5b51d8748 Signed-off-by: stack <stack@apache.org> 2016-05-30 23:02:33 -04:00			`print "Result > total tests: {:4} failed : {:4} timedout : {:4} hanging : {:4}".format(`
HBASE-16434 - Add date and count of flaky tests - Sort tests by decreasing order of flakyness - Internal links to each job's results - Correct calculation of total bad runs for a test - Fixes pylint errors Change-Id: I12ebc32ccec14c5ff389464b4de8ae93653c876c 2016-08-17 03:47:16 -04:00			`len(all_tests_set), len(failed_tests_set), len(timeout_tests_set), len(hanging_tests_set))`
			`return [all_tests_set, failed_tests_set, timeout_tests_set, hanging_tests_set]`
HBASE-15896 Add timeout tests to flaky list from report-flakies.py - Adds timed-out tests to flaky list. Dumpes two new files for reference, 'timeout' and 'failed' for corresponding list of bad tests. - Set --max-builds for different urls separately. This is needed so that we can turn the knobs for post-commit job and flaky-tests job separately. (Apekshit) Change-Id: I88e1f9a8924eed1b5010106e73edede3aff34b0b Signed-off-by: stack <stack@apache.org> 2016-05-24 23:39:54 -04:00
			`if __name__ == "__main__":`
HBASE-16434 - Add date and count of flaky tests - Sort tests by decreasing order of flakyness - Internal links to each job's results - Correct calculation of total bad runs for a test - Fixes pylint errors Change-Id: I12ebc32ccec14c5ff389464b4de8ae93653c876c 2016-08-17 03:47:16 -04:00			`if len(sys.argv) != 2:`
HBASE-15896 Add timeout tests to flaky list from report-flakies.py - Adds timed-out tests to flaky list. Dumpes two new files for reference, 'timeout' and 'failed' for corresponding list of bad tests. - Set --max-builds for different urls separately. This is needed so that we can turn the knobs for post-commit job and flaky-tests job separately. (Apekshit) Change-Id: I88e1f9a8924eed1b5010106e73edede3aff34b0b Signed-off-by: stack <stack@apache.org> 2016-05-24 23:39:54 -04:00			`print "ERROR : Provide the jenkins job console URL as the only argument."`
			`sys.exit(1)`

			`print "Fetching {}".format(sys.argv[1])`
HBASE-16038 Ignore JVM crashes or machine shutdown failures in report-flakies. Change-Id: If49acd704e827b289c75f449a6180038b297d647 2016-06-15 18:39:49 -04:00			`result = get_bad_tests(sys.argv[1])`
			`if not result:`
			`sys.exit(1)`
			`[all_tests, failed_tests, timedout_tests, hanging_tests] = result`

HBASE-15896 Add timeout tests to flaky list from report-flakies.py - Adds timed-out tests to flaky list. Dumpes two new files for reference, 'timeout' and 'failed' for corresponding list of bad tests. - Set --max-builds for different urls separately. This is needed so that we can turn the knobs for post-commit job and flaky-tests job separately. (Apekshit) Change-Id: I88e1f9a8924eed1b5010106e73edede3aff34b0b Signed-off-by: stack <stack@apache.org> 2016-05-24 23:39:54 -04:00			`print "Found {} hanging tests:".format(len(hanging_tests))`
			`for test in hanging_tests:`
			`print test`
			`print "\n"`
HBASE-15917 Flaky tests dashboard. (Apekshit) Change-Id: If0e4299b173d3d76e0bdb12a3de080e5b51d8748 Signed-off-by: stack <stack@apache.org> 2016-05-30 23:02:33 -04:00			`print "Found {} failed tests of which {} timed out:".format(`
			`len(failed_tests), len(timedout_tests))`
HBASE-15896 Add timeout tests to flaky list from report-flakies.py - Adds timed-out tests to flaky list. Dumpes two new files for reference, 'timeout' and 'failed' for corresponding list of bad tests. - Set --max-builds for different urls separately. This is needed so that we can turn the knobs for post-commit job and flaky-tests job separately. (Apekshit) Change-Id: I88e1f9a8924eed1b5010106e73edede3aff34b0b Signed-off-by: stack <stack@apache.org> 2016-05-24 23:39:54 -04:00			`for test in failed_tests:`
HBASE-15917 Flaky tests dashboard. (Apekshit) Change-Id: If0e4299b173d3d76e0bdb12a3de080e5b51d8748 Signed-off-by: stack <stack@apache.org> 2016-05-30 23:02:33 -04:00			`print "{0} {1}".format(test, ("(Timed Out)" if test in timedout_tests else ""))`

			`print ("\nA test may have had 0 or more atomic test failures before it timed out. So a "`
			`"'Timed Out' test may have other errors too.")`