hadoop/dev-support/determine-flaky-tests-hadoo...

#!/usr/bin/env python
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Given a jenkins test job, this script examines all runs of the job done
# within specified period of time (number of days prior to the execution
# time of this script), and reports all failed tests.
#
# The output of this script includes a section for each run that has failed
# tests, with each failed test name listed.
#
# More importantly, at the end, it outputs a summary section to list all failed
# tests within all examined runs, and indicate how many runs a same test
# failed, and sorted all failed tests by how many runs each test failed.
#
# This way, when we see failed tests in PreCommit build, we can quickly tell
# whether a failed test is a new failure, or it failed before and how often it
# failed, so to have idea whether it may just be a flaky test.
#
# Of course, to be 100% sure about the reason of a test failure, closer look
# at the failed test for the specific run is necessary.
#
import sys
import platform
sysversion = sys.hexversion
onward30 = False
if sysversion < 0x020600F0:
  sys.exit("Minimum supported python version is 2.6, the current version is " +
      "Python" + platform.python_version())

if sysversion == 0x030000F0:
  sys.exit("There is a known bug with Python" + platform.python_version() +
      ", please try a different version");

if sysversion < 0x03000000:
  import urllib2
else:
  onward30 = True
  import urllib.request

import datetime
import json as simplejson
import logging
from optparse import OptionParser
import time

# Configuration
DEFAULT_JENKINS_URL = "https://builds.apache.org"
DEFAULT_JOB_NAME = "Hadoop-Common-trunk"
DEFAULT_NUM_PREVIOUS_DAYS = 14
DEFAULT_TOP_NUM_FAILED_TEST = -1

SECONDS_PER_DAY = 86400

# total number of runs to examine
numRunsToExamine = 0

#summary mode
summary_mode = False

#total number of errors
error_count = 0

""" Parse arguments """
def parse_args():
  parser = OptionParser()
  parser.add_option("-J", "--jenkins-url", type="string",
                    dest="jenkins_url", help="Jenkins URL",
                    default=DEFAULT_JENKINS_URL)
  parser.add_option("-j", "--job-name", type="string",
                    dest="job_name", help="Job name to look at",
                    default=DEFAULT_JOB_NAME)
  parser.add_option("-n", "--num-days", type="int",
                    dest="num_prev_days", help="Number of days to examine",
                    default=DEFAULT_NUM_PREVIOUS_DAYS)
  parser.add_option("-t", "--top", type="int",
                    dest="num_failed_tests",
                    help="Summary Mode, only show top number of failed tests",
                    default=DEFAULT_TOP_NUM_FAILED_TEST)

  (options, args) = parser.parse_args()
  if args:
    parser.error("unexpected arguments: " + repr(args))
  return options

""" Load data from specified url """
def load_url_data(url):
  if onward30:
    ourl = urllib.request.urlopen(url)
    codec = ourl.info().get_param('charset')
    content = ourl.read().decode(codec)
    data = simplejson.loads(content, strict=False)
  else:
    ourl = urllib2.urlopen(url)
    data = simplejson.load(ourl, strict=False)
  return data
 
""" List all builds of the target project. """
def list_builds(jenkins_url, job_name):
  global summary_mode
  url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
      jenkins=jenkins_url,
      job_name=job_name)

  try:
    data = load_url_data(url)

  except:
    if not summary_mode:
      logging.error("Could not fetch: %s" % url)
    error_count += 1
    raise
  return data['builds']

""" Find the names of any tests which failed in the given build output URL. """
def find_failing_tests(testReportApiJson, jobConsoleOutput):
  global summary_mode
  global error_count
  ret = set()
  try:
    data = load_url_data(testReportApiJson)

  except:
    if not summary_mode:
      logging.error("    Could not open testReport, check " +
        jobConsoleOutput + " for why it was reported failed")
    error_count += 1
    return ret

  for suite in data['suites']:
    for cs in suite['cases']:
      status = cs['status']
      errDetails = cs['errorDetails']
      if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
        ret.add(cs['className'] + "." + cs['name'])

  if len(ret) == 0 and (not summary_mode):
    logging.info("    No failed tests in testReport, check " +
        jobConsoleOutput + " for why it was reported failed.")
  return ret

""" Iterate runs of specfied job within num_prev_days and collect results """
def find_flaky_tests(jenkins_url, job_name, num_prev_days):
  global numRunsToExamine
  global summary_mode
  all_failing = dict()
  # First list all builds
  builds = list_builds(jenkins_url, job_name)

  # Select only those in the last N days
  min_time = int(time.time()) - SECONDS_PER_DAY * num_prev_days
  builds = [b for b in builds if (int(b['timestamp']) / 1000) > min_time]

  # Filter out only those that failed
  failing_build_urls = [(b['url'] , b['timestamp']) for b in builds
      if (b['result'] in ('UNSTABLE', 'FAILURE'))]

  tnum = len(builds)
  num = len(failing_build_urls)
  numRunsToExamine = tnum
  if not summary_mode:
    logging.info("    THERE ARE " + str(num) + " builds (out of " + str(tnum)
      + ") that have failed tests in the past " + str(num_prev_days) + " days"
      + ((".", ", as listed below:\n")[num > 0]))

  for failed_build_with_time in failing_build_urls:
    failed_build = failed_build_with_time[0];
    jobConsoleOutput = failed_build + "Console";
    testReport = failed_build + "testReport";
    testReportApiJson = testReport + "/api/json";

    ts = float(failed_build_with_time[1]) / 1000.
    st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
    if not summary_mode:
      logging.info("===>%s" % str(testReport) + " (" + st + ")")
    failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
    if failing:
      for ftest in failing:
        if not summary_mode:
          logging.info("    Failed test: %s" % ftest)
        all_failing[ftest] = all_failing.get(ftest,0)+1

  return all_failing

def main():
  global numRunsToExamine
  global summary_mode
  logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

  # set up logger to write to stdout
  soh = logging.StreamHandler(sys.stdout)
  soh.setLevel(logging.INFO)
  logger = logging.getLogger()
  logger.removeHandler(logger.handlers[0])
  logger.addHandler(soh)

  opts = parse_args()
  logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
      + "/job/" + opts.job_name + "")

  if opts.num_failed_tests != -1:
    summary_mode = True

  all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
      opts.num_prev_days)
  if len(all_failing) == 0:
    raise SystemExit(0)

  if summary_mode and opts.num_failed_tests < len(all_failing):
    logging.info("\nAmong " + str(numRunsToExamine) +
                 " runs examined, top " + str(opts.num_failed_tests) +
                 " failed tests <#failedRuns: testName>:")
  else:
      logging.info("\nAmong " + str(numRunsToExamine) +
                   " runs examined, all failed tests <#failedRuns: testName>:")

  # print summary section: all failed tests sorted by how many times they failed
  line_count = 0
  for tn in sorted(all_failing, key=all_failing.get, reverse=True):
    logging.info("    " + str(all_failing[tn])+ ": " + tn)
    if summary_mode:
      line_count += 1
      if line_count == opts.num_failed_tests:
        break

  if summary_mode and error_count > 0:
    logging.info("\n" + str(error_count) + " errors found, you may "
                 + "re-run in non summary mode to see error details.");

if __name__ == "__main__":
  main()
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`#!/usr/bin/env python`
			`#`
			`# Licensed to the Apache Software Foundation (ASF) under one`
			`# or more contributor license agreements. See the NOTICE file`
			`# distributed with this work for additional information`
			`# regarding copyright ownership. The ASF licenses this file`
			`# to you under the Apache License, Version 2.0 (the`
			`# "License"); you may not use this file except in compliance`
			`# with the License. You may obtain a copy of the License at`
			`#`
			`# http://www.apache.org/licenses/LICENSE-2.0`
			`#`
			`# Unless required by applicable law or agreed to in writing, software`
			`# distributed under the License is distributed on an "AS IS" BASIS,`
			`# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`# See the License for the specific language governing permissions and`
			`# limitations under the License.`
			`#`
			`# Given a jenkins test job, this script examines all runs of the job done`
			`# within specified period of time (number of days prior to the execution`
			`# time of this script), and reports all failed tests.`
			`#`
			`# The output of this script includes a section for each run that has failed`
			`# tests, with each failed test name listed.`
			`#`
			`# More importantly, at the end, it outputs a summary section to list all failed`
			`# tests within all examined runs, and indicate how many runs a same test`
			`# failed, and sorted all failed tests by how many runs each test failed.`
			`#`
			`# This way, when we see failed tests in PreCommit build, we can quickly tell`
			`# whether a failed test is a new failure, or it failed before and how often it`
			`# failed, so to have idea whether it may just be a flaky test.`
			`#`
			`# Of course, to be 100% sure about the reason of a test failure, closer look`
			`# at the failed test for the specific run is necessary.`
			`#`
			`import sys`
			`import platform`
			`sysversion = sys.hexversion`
			`onward30 = False`
			`if sysversion < 0x020600F0:`
			`sys.exit("Minimum supported python version is 2.6, the current version is " +`
			`"Python" + platform.python_version())`

			`if sysversion == 0x030000F0:`
			`sys.exit("There is a known bug with Python" + platform.python_version() +`
			`", please try a different version");`

			`if sysversion < 0x03000000:`
			`import urllib2`
			`else:`
			`onward30 = True`
			`import urllib.request`

			`import datetime`
			`import json as simplejson`
			`import logging`
			`from optparse import OptionParser`
			`import time`

			`# Configuration`
			`DEFAULT_JENKINS_URL = "https://builds.apache.org"`
			`DEFAULT_JOB_NAME = "Hadoop-Common-trunk"`
			`DEFAULT_NUM_PREVIOUS_DAYS = 14`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`DEFAULT_TOP_NUM_FAILED_TEST = -1`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00
			`SECONDS_PER_DAY = 86400`

			`# total number of runs to examine`
			`numRunsToExamine = 0`

HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`#summary mode`
			`summary_mode = False`

			`#total number of errors`
			`error_count = 0`

HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`""" Parse arguments """`
			`def parse_args():`
			`parser = OptionParser()`
			`parser.add_option("-J", "--jenkins-url", type="string",`
			`dest="jenkins_url", help="Jenkins URL",`
			`default=DEFAULT_JENKINS_URL)`
			`parser.add_option("-j", "--job-name", type="string",`
			`dest="job_name", help="Job name to look at",`
			`default=DEFAULT_JOB_NAME)`
			`parser.add_option("-n", "--num-days", type="int",`
			`dest="num_prev_days", help="Number of days to examine",`
			`default=DEFAULT_NUM_PREVIOUS_DAYS)`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`parser.add_option("-t", "--top", type="int",`
			`dest="num_failed_tests",`
			`help="Summary Mode, only show top number of failed tests",`
			`default=DEFAULT_TOP_NUM_FAILED_TEST)`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00
			`(options, args) = parser.parse_args()`
			`if args:`
			`parser.error("unexpected arguments: " + repr(args))`
			`return options`

			`""" Load data from specified url """`
			`def load_url_data(url):`
			`if onward30:`
			`ourl = urllib.request.urlopen(url)`
			`codec = ourl.info().get_param('charset')`
			`content = ourl.read().decode(codec)`
HADOOP-11549. flaky test detection tool failed to handle special control characters in test result. Contributed by Yongjun Zhang. 2015-02-04 17:17:00 -05:00			`data = simplejson.loads(content, strict=False)`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`else:`
			`ourl = urllib2.urlopen(url)`
HADOOP-11549. flaky test detection tool failed to handle special control characters in test result. Contributed by Yongjun Zhang. 2015-02-04 17:17:00 -05:00			`data = simplejson.load(ourl, strict=False)`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`return data`

			`""" List all builds of the target project. """`
			`def list_builds(jenkins_url, job_name):`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`global summary_mode`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(`
			`jenkins=jenkins_url,`
			`job_name=job_name)`

			`try:`
			`data = load_url_data(url)`

			`except:`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`if not summary_mode:`
			`logging.error("Could not fetch: %s" % url)`
			`error_count += 1`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`raise`
			`return data['builds']`

			`""" Find the names of any tests which failed in the given build output URL. """`
			`def find_failing_tests(testReportApiJson, jobConsoleOutput):`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`global summary_mode`
			`global error_count`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`ret = set()`
			`try:`
			`data = load_url_data(testReportApiJson)`

			`except:`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`if not summary_mode:`
			`logging.error(" Could not open testReport, check " +`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`jobConsoleOutput + " for why it was reported failed")`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`error_count += 1`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`return ret`

			`for suite in data['suites']:`
			`for cs in suite['cases']:`
			`status = cs['status']`
			`errDetails = cs['errorDetails']`
			`if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):`
			`ret.add(cs['className'] + "." + cs['name'])`

HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`if len(ret) == 0 and (not summary_mode):`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`logging.info(" No failed tests in testReport, check " +`
			`jobConsoleOutput + " for why it was reported failed.")`
			`return ret`

			`""" Iterate runs of specfied job within num_prev_days and collect results """`
			`def find_flaky_tests(jenkins_url, job_name, num_prev_days):`
			`global numRunsToExamine`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`global summary_mode`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`all_failing = dict()`
			`# First list all builds`
			`builds = list_builds(jenkins_url, job_name)`

			`# Select only those in the last N days`
			`min_time = int(time.time()) - SECONDS_PER_DAY * num_prev_days`
			`builds = [b for b in builds if (int(b['timestamp']) / 1000) > min_time]`

			`# Filter out only those that failed`
			`failing_build_urls = [(b['url'] , b['timestamp']) for b in builds`
			`if (b['result'] in ('UNSTABLE', 'FAILURE'))]`

			`tnum = len(builds)`
			`num = len(failing_build_urls)`
			`numRunsToExamine = tnum`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`if not summary_mode:`
			`logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum)`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`+ ") that have failed tests in the past " + str(num_prev_days) + " days"`
			`+ ((".", ", as listed below:\n")[num > 0]))`

			`for failed_build_with_time in failing_build_urls:`
			`failed_build = failed_build_with_time[0];`
			`jobConsoleOutput = failed_build + "Console";`
			`testReport = failed_build + "testReport";`
			`testReportApiJson = testReport + "/api/json";`

			`ts = float(failed_build_with_time[1]) / 1000.`
			`st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`if not summary_mode:`
			`logging.info("===>%s" % str(testReport) + " (" + st + ")")`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`failing = find_failing_tests(testReportApiJson, jobConsoleOutput)`
			`if failing:`
			`for ftest in failing:`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`if not summary_mode:`
			`logging.info(" Failed test: %s" % ftest)`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`all_failing[ftest] = all_failing.get(ftest,0)+1`

			`return all_failing`

			`def main():`
			`global numRunsToExamine`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`global summary_mode`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)`

			`# set up logger to write to stdout`
			`soh = logging.StreamHandler(sys.stdout)`
			`soh.setLevel(logging.INFO)`
			`logger = logging.getLogger()`
			`logger.removeHandler(logger.handlers[0])`
			`logger.addHandler(soh)`

			`opts = parse_args()`
			`logging.info("****Recently FAILED builds in url: " + opts.jenkins_url`
			`+ "/job/" + opts.job_name + "")`

HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`if opts.num_failed_tests != -1:`
			`summary_mode = True`

HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,`
			`opts.num_prev_days)`
			`if len(all_failing) == 0:`
			`raise SystemExit(0)`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00
			`if summary_mode and opts.num_failed_tests < len(all_failing):`
			`logging.info("\nAmong " + str(numRunsToExamine) +`
			`" runs examined, top " + str(opts.num_failed_tests) +`
			`" failed tests <#failedRuns: testName>:")`
			`else:`
			`logging.info("\nAmong " + str(numRunsToExamine) +`
			`" runs examined, all failed tests <#failedRuns: testName>:")`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00
			`# print summary section: all failed tests sorted by how many times they failed`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`line_count = 0`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00			`for tn in sorted(all_failing, key=all_failing.get, reverse=True):`
			`logging.info(" " + str(all_failing[tn])+ ": " + tn)`
HADOOP-11965. determine-flaky-tests needs a summary mode. Contributed by Yufei Gu, 2015-06-17 18:48:29 -04:00			`if summary_mode:`
			`line_count += 1`
			`if line_count == opts.num_failed_tests:`
			`break`

			`if summary_mode and error_count > 0:`
			`logging.info("\n" + str(error_count) + " errors found, you may "`
			`+ "re-run in non summary mode to see error details.");`
HADOOP-11045. Introducing a tool to detect flaky tests of hadoop jenkins testing job. Contributed by Yongjun Zhang and Todd Lipcon. 2015-02-03 11:26:31 -05:00
			`if __name__ == "__main__":`
			`main()`