HBASE-20387 ADDENDUM backport findHangingTests.py changes from master to branches-1.

2018-08-16 23:55:28 -05:00 · 2018-08-16 23:55:28 -05:00 · 8716ac2568
parent 18840e9510
commit 8716ac2568
1 changed files with 98 additions and 65 deletions
--- a/dev-support/flaky-tests/findHangingTests.py
+++ b/dev-support/flaky-tests/findHangingTests.py
@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 ##
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@ -15,68 +15,101 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 ##
 # script to find hanging test from Jenkins build output
 # usage: ./findHangingTests.py <url of Jenkins build console>
 #
 import urllib2
 import sys
 import string
 if len(sys.argv) != 2 :
  print "ERROR : Provide the jenkins job console URL as the only argument."
  exit(1)
 print "Fetching " + sys.argv[1]
 response = urllib2.urlopen(sys.argv[1])
 i = 0;
 tests = {}
 failed_tests = {}
 summary = 0
 host = False
 patch = False
 branch = False
 while True:
  n = response.readline()
  if n == "" :
    break
  if not host and n.find("Building remotely on") >= 0:
    host = True
    print n.strip()    
    continue
  if not patch and n.find("Testing patch for ") >= 0:
    patch = True
    print n.strip()    
    continue
  if not branch and n.find("Testing patch on branch ") >= 0:
    branch = True
    print n.strip()    
    continue
  if n.find("PATCH APPLICATION FAILED") >= 0:
    print "PATCH APPLICATION FAILED"
    sys.exit(1) 
  if summary == 0 and n.find("Running tests.") >= 0:
    summary = summary + 1
    continue
  if summary == 1 and n.find("[INFO] Reactor Summary:") >= 0:
    summary = summary + 1
    continue
  if summary == 2 and n.find("[INFO] Apache HBase ") >= 0:
    sys.stdout.write(n)
    continue
  if n.find("org.apache.hadoop.hbase") < 0:
    continue 
  test_name = string.strip(n[n.find("org.apache.hadoop.hbase"):len(n)])
  if n.find("Running org.apache.hadoop.hbase") > -1 :
    tests[test_name] = False
  if n.find("Tests run:") > -1 :
    if n.find("FAILURE") > -1 or n.find("ERROR") > -1:
      failed_tests[test_name] = True
    tests[test_name] = True
 response.close()
-print "Printing hanging tests"
+# pylint: disable=invalid-name
-for key, value in tests.iteritems():
+# To disable 'invalid constant name' warnings.
-  if value == False:
+
-    print "Hanging test : " + key
+"""
-print "Printing Failing tests"
+# Script to find hanging test from Jenkins build output
-for key, value in failed_tests.iteritems():
+# usage: ./findHangingTests.py <url of Jenkins build console>
-  print "Failing test : " + key
+"""
 import re
 import sys
 import requests
 # If any of these strings appear in the console output, it's a build one should probably ignore
 # for analyzing failed/hanging tests.
 BAD_RUN_STRINGS = [
    "Slave went offline during the build",  # Machine went down, can't do anything about it.
    "The forked VM terminated without properly saying goodbye",  # JVM crashed.
 ]
 def get_bad_tests(console_url):
    """
    Returns [[all tests], [failed tests], [timeout tests], [hanging tests]] if successfully gets
    the build information.
    If there is error getting console text or if there are blacklisted strings in console text,
    then returns None.
    """
    response = requests.get(console_url)
    if response.status_code != 200:
        print "Error getting consoleText. Response = {} {}".format(
            response.status_code, response.reason)
        return
    # All tests: All testcases which were run.
    # Hanging test: A testcase which started but never finished.
    # Failed test: Testcase which encountered any kind of failure. It can be failing atomic tests,
    #   timed out tests, etc
    # Timeout test: A Testcase which encountered timeout. Naturally, all timeout tests will be
    #   included in failed tests.
    all_tests_set = set()
    hanging_tests_set = set()
    failed_tests_set = set()
    timeout_tests_set = set()
    for line in response.content.splitlines():
        result1 = re.findall("Running org.apache.hadoop.hbase.(.*)", line)
        if len(result1) == 1:
            test_case = result1[0]
            if test_case in all_tests_set:
                print ("ERROR! Multiple tests with same name '{}'. Might get wrong results "
                       "for this test.".format(test_case))
            else:
                hanging_tests_set.add(test_case)
                all_tests_set.add(test_case)
        result2 = re.findall("Tests run:.*?- in org.apache.hadoop.hbase.(.*)", line)
        if len(result2) == 1:
            test_case = result2[0]
            if "FAILURE!" in line:
                failed_tests_set.add(test_case)
            if test_case not in hanging_tests_set:
                print ("ERROR! No test '{}' found in hanging_tests. Might get wrong results "
                       "for this test. This may also happen if maven is set to retry failing "
                       "tests.".format(test_case))
            else:
                hanging_tests_set.remove(test_case)
        result3 = re.match("^\\s+(\\w*).*\\sTestTimedOut", line)
        if result3:
            test_case = result3.group(1)
            timeout_tests_set.add(test_case)
        for bad_string in BAD_RUN_STRINGS:
            if re.match(".*" + bad_string + ".*", line):
                print "Bad string found in build:\n > {}".format(line)
    print "Result > total tests: {:4}   failed : {:4}  timedout : {:4}  hanging : {:4}".format(
        len(all_tests_set), len(failed_tests_set), len(timeout_tests_set), len(hanging_tests_set))
    return [all_tests_set, failed_tests_set, timeout_tests_set, hanging_tests_set]
 if __name__ == "__main__":
    if len(sys.argv) != 2:
        print "ERROR : Provide the jenkins job console URL as the only argument."
        sys.exit(1)
    print "Fetching {}".format(sys.argv[1])
    result = get_bad_tests(sys.argv[1])
    if not result:
        sys.exit(1)
    [all_tests, failed_tests, timedout_tests, hanging_tests] = result
    print "Found {} hanging tests:".format(len(hanging_tests))
    for test in hanging_tests:
        print test
    print "\n"
    print "Found {} failed tests of which {} timed out:".format(
        len(failed_tests), len(timedout_tests))
    for test in failed_tests:
        print "{0} {1}".format(test, ("(Timed Out)" if test in timedout_tests else ""))
    print ("\nA test may have had 0 or more atomic test failures before it timed out. So a "
           "'Timed Out' test may have other errors too.")