HBASE-27450 Update all our python scripts to use python3 (#4851)

Signed-off-by: Guanghao Zhang <zghao@apache.org>
2022-10-28 18:41:47 +08:00 · 2022-10-28 18:41:47 +08:00 · cdabfd3ca8
parent d80053641d
commit cdabfd3ca8
12 changed files with 101 additions and 412 deletions
--- a/dev-support/Dockerfile
+++ b/dev-support/Dockerfile
@ -20,16 +20,14 @@
 #
 # Specifically, it's used for the flaky test reporting job defined in
 # dev-support/flaky-tests/flaky-reporting.Jenkinsfile
-FROM ubuntu:18.04
+FROM ubuntu:22.04
 COPY . /hbase/dev-support
 RUN DEBIAN_FRONTEND=noninteractive apt-get -qq -y update \
    && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install --no-install-recommends \
-      curl='7.58.0-*' \
+      curl='7.81.0-*' \
-      python2.7='2.7.17-*' \
+      python3-pip='22.0.2+dfsg-*' \
      python-pip='9.0.1-*' \
      python-setuptools='39.0.1-*' \
    && apt-get clean \
    && rm -rf /var/lib/apt/lists/*
-RUN pip install -r /hbase/dev-support/python-requirements.txt
+RUN pip3 install -r /hbase/dev-support/python-requirements.txt
--- a/dev-support/Jenkinsfile
+++ b/dev-support/Jenkinsfile
@ -267,14 +267,14 @@ pipeline {
              if [ -d "${OUTPUT_DIR}/branch-site" ]; then
                echo "Remove ${OUTPUT_DIR}/branch-site for saving space"
                rm -rf "${OUTPUT_DIR}/branch-site"
-                python2 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/branch-site" > "${OUTPUT_DIR}/branch-site.html"
+                python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/branch-site" > "${OUTPUT_DIR}/branch-site.html"
              else
                echo "No branch-site, skipping"
              fi
              if [ -d "${OUTPUT_DIR}/patch-site" ]; then
                echo "Remove ${OUTPUT_DIR}/patch-site for saving space"
                rm -rf "${OUTPUT_DIR}/patch-site"
-                python2 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/patch-site" > "${OUTPUT_DIR}/patch-site.html"
+                python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/patch-site" > "${OUTPUT_DIR}/patch-site.html"
              else
                echo "No patch-site, skipping"
              fi
@ -384,7 +384,7 @@ pipeline {
                if [ -f "${OUTPUT_DIR}/test_logs.zip" ]; then
                  echo "Remove ${OUTPUT_DIR}/test_logs.zip for saving space"
                  rm -rf "${OUTPUT_DIR}/test_logs.zip"
-                  python2 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html"
+                  python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html"
                else
                  echo "No test_logs.zip, skipping"
                fi
@ -493,7 +493,7 @@ pipeline {
                if [ -f "${OUTPUT_DIR}/test_logs.zip" ]; then
                  echo "Remove ${OUTPUT_DIR}/test_logs.zip for saving space"
                  rm -rf "${OUTPUT_DIR}/test_logs.zip"
-                  python2 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html"
+                  python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html"
                else
                  echo "No test_logs.zip, skipping"
                fi
@ -604,7 +604,7 @@ pipeline {
                if [ -f "${OUTPUT_DIR}/test_logs.zip" ]; then
                  echo "Remove ${OUTPUT_DIR}/test_logs.zip for saving space"
                  rm -rf "${OUTPUT_DIR}/test_logs.zip"
-                  python2 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html"
+                  python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html"
                else
                  echo "No test_logs.zip, skipping"
                fi
@ -773,7 +773,7 @@ pipeline {
                if [ -f "${SRC_TAR}" ]; then
                  echo "Remove ${SRC_TAR} for saving space"
                  rm -rf "${SRC_TAR}"
-                  python2 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/output-srctarball" > "${WORKSPACE}/output-srctarball/hbase-src.html"
+                  python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/output-srctarball" > "${WORKSPACE}/output-srctarball/hbase-src.html"
                else
                  echo "No hbase-src.tar.gz, skipping"
                fi
--- a/dev-support/Jenkinsfile_GitHub
+++ b/dev-support/Jenkinsfile_GitHub
@ -143,14 +143,14 @@ pipeline {
                            if [ -d "${PATCHDIR}/branch-site" ]; then
                              echo "Remove ${PATCHDIR}/branch-site for saving space"
                              rm -rf "${PATCHDIR}/branch-site"
-                              python2 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/branch-site" > "${PATCHDIR}/branch-site.html"
+                              python3 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/branch-site" > "${PATCHDIR}/branch-site.html"
                            else
                              echo "No branch-site, skipping"
                            fi
                            if [ -d "${PATCHDIR}/patch-site" ]; then
                              echo "Remove ${PATCHDIR}/patch-site for saving space"
                              rm -rf "${PATCHDIR}/patch-site"
-                              python2 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/patch-site" > "${PATCHDIR}/patch-site.html"
+                              python3 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/patch-site" > "${PATCHDIR}/patch-site.html"
                            else
                              echo "No patch-site, skipping"
                            fi
@ -282,7 +282,7 @@ pipeline {
                            if [ -f "${PATCHDIR}/test_logs.zip" ]; then
                              echo "Remove ${PATCHDIR}/test_logs.zip for saving space"
                              rm -rf "${PATCHDIR}/test_logs.zip"
-                              python2 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${WORKDIR_REL}/${PATCH_REL}" > "${PATCHDIR}/test_logs.html"
+                              python3 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${WORKDIR_REL}/${PATCH_REL}" > "${PATCHDIR}/test_logs.html"
                            else
                              echo "No test_logs.zip, skipping"
                            fi
@ -414,7 +414,7 @@ pipeline {
                            if [ -f "${PATCHDIR}/test_logs.zip" ]; then
                              echo "Remove ${PATCHDIR}/test_logs.zip for saving space"
                              rm -rf "${PATCHDIR}/test_logs.zip"
-                              python2 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${WORKDIR_REL}/${PATCH_REL}" > "${PATCHDIR}/test_logs.html"
+                              python3 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${WORKDIR_REL}/${PATCH_REL}" > "${PATCHDIR}/test_logs.html"
                            else
                              echo "No test_logs.zip, skipping"
                            fi
--- a/dev-support/checkcompatibility.py
+++ b/dev-support/checkcompatibility.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 #
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@ -41,7 +41,9 @@ import re
 import shutil
 import subprocess
 import sys
-import urllib2
+import urllib.request
 import urllib.error
 import urllib.parse
 from collections import namedtuple
 try:
    import argparse
@ -55,11 +57,11 @@ REPO_DIR = os.getcwd()
 def check_output(*popenargs, **kwargs):
-    """ Run command with arguments and return its output as a byte string.
+    """ Run command with arguments and return its output as a byte string. """
-    Backported from Python 2.7 as it's implemented as pure python on stdlib.
+    process = subprocess.Popen(stdout=subprocess.PIPE,
-    >>> check_output(['/usr/bin/python', '--version'])
+                               universal_newlines=True,
-    Python 2.6.2 """
+                               *popenargs,
-    process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
+                               **kwargs)
    output, _ = process.communicate()
    retcode = process.poll()
    if retcode:
@ -69,7 +71,7 @@ def check_output(*popenargs, **kwargs):
        error = subprocess.CalledProcessError(retcode, cmd)
        error.output = output
        raise error
-    return output
+    return output.strip()
 def get_repo_dir():
@ -161,7 +163,7 @@ def checkout_java_acc(force):
    url = "https://github.com/lvc/japi-compliance-checker/archive/2.4.tar.gz"
    scratch_dir = get_scratch_dir()
    path = os.path.join(scratch_dir, os.path.basename(url))
-    jacc = urllib2.urlopen(url)
+    jacc = urllib.request.urlopen(url)
    with open(path, 'wb') as w:
        w.write(jacc.read())
@ -196,8 +198,8 @@ def ascii_encode_dict(data):
    """ Iterate through a dictionary of data and convert all unicode to ascii.
    This method was taken from
    stackoverflow.com/questions/9590382/forcing-python-json-module-to-work-with-ascii """
-    ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
+    ascii_encode = lambda x: x.encode('ascii') if isinstance(x, str) else x
-    return dict(map(ascii_encode, pair) for pair in data.items())
+    return dict(list(map(ascii_encode, pair)) for pair in list(data.items()))
 def process_json(path):
@ -229,8 +231,8 @@ def compare_results(tool_results, known_issues, compare_warnings):
    unexpected_issues = [unexpected_issue(check=check,  issue_type=issue_type,
                                      known_count=known_count,
                                      observed_count=tool_results[check][issue_type])
-                     for check, known_issue_counts in known_issues.items()
+                     for check, known_issue_counts in list(known_issues.items())
-                        for issue_type, known_count in known_issue_counts.items()
+                        for issue_type, known_count in list(known_issue_counts.items())
                           if compare_tool_results_count(tool_results, check, issue_type, known_count)]
    if not compare_warnings:
@ -309,14 +311,14 @@ def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations, skip_annot
        logging.info("Annotations are: %s", annotations)
        annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
        logging.info("Annotations path: %s", annotations_path)
-        with file(annotations_path, "w") as f:
+        with open(annotations_path, "w") as f:
            f.write('\n'.join(annotations))
        args.extend(["-annotations-list", annotations_path])
    if skip_annotations is not None:
        skip_annotations_path = os.path.join(
            get_scratch_dir(), "skip_annotations.txt")
-        with file(skip_annotations_path, "w") as f:
+        with open(skip_annotations_path, "w") as f:
            f.write('\n'.join(skip_annotations))
        args.extend(["-skip-annotations-list", skip_annotations_path])
--- a/dev-support/checkstyle_report.py
+++ b/dev-support/checkstyle_report.py
@ -1,4 +1,4 @@
-#!/usr/bin/python2
+#!/usr/bin/env python3
 ##
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@ -29,8 +29,8 @@ import xml.etree.ElementTree as etree
 from collections import defaultdict
 if len(sys.argv) != 3 :
-  print "usage: %s checkstyle-result-master.xml checkstyle-result-patch.xml" % sys.argv[0]
+  print("usage: %s checkstyle-result-master.xml checkstyle-result-patch.xml" % sys.argv[0])
-  exit(1)
+  sys.exit(1)
 def path_key(x):
  path = x.attrib['name']
@ -40,8 +40,8 @@ def error_name(x):
  error_class = x.attrib['source']
  return error_class[error_class.rfind(".") + 1:]
-def print_row(path, error, master_errors, patch_errors):
+def print_row(path, err, master_errors, patch_errors):
-    print '%s\t%s\t%s\t%s' % (path,error, master_errors,patch_errors)
+  print('%s\t%s\t%s\t%s' % (path, err, master_errors, patch_errors))
 master = etree.parse(sys.argv[1])
 patch = etree.parse(sys.argv[2])
@ -49,32 +49,32 @@ patch = etree.parse(sys.argv[2])
 master_dict = defaultdict(int)
 ret_value = 0
-for child in master.getroot().getchildren():
+for child in list(master.getroot()):
    if child.tag != 'file':
-        continue
+      continue
    file = path_key(child)
-    for error_tag in child.getchildren():
+    for error_tag in list(child):
-        error = error_name(error_tag)
+      error = error_name(error_tag)
-        if (file, error) in master_dict:
+      if (file, error) in master_dict:
-            master_dict[(file, error)] += 1
+        master_dict[(file, error)] += 1
-        else:
+      else:
-            master_dict[(file, error)] = 1
+        master_dict[(file, error)] = 1
-for child in patch.getroot().getchildren():
+for child in list(patch.getroot()):
-    if child.tag != 'file':
+  if child.tag != 'file':
-        continue
+    continue
-    temp_dict = defaultdict(int)
+  temp_dict = defaultdict(int)
-    for error_tag in child.getchildren():
+  for error_tag in list(child):
-        error = error_name(error_tag)
+    error = error_name(error_tag)
-        if error in temp_dict:
+    if error in temp_dict:
-            temp_dict[error] += 1
+      temp_dict[error] += 1
-        else:
+    else:
-            temp_dict[error] = 1
+      temp_dict[error] = 1
-    file = path_key(child)
+  file = path_key(child)
-    for error, count in temp_dict.iteritems():
+  for error, count in temp_dict.items():
-        if count > master_dict[(file, error)]:
+    if count > master_dict[(file, error)]:
-            print_row(file, error, master_dict[(file, error)], count)
+      print_row(file, error, master_dict[(file, error)], count)
-            ret_value = 1
+      ret_value = 1
 sys.exit(ret_value)
--- a/dev-support/create-release/prepend_changes.py
+++ b/dev-support/create-release/prepend_changes.py
@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 ##
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@ -22,7 +22,7 @@ import os
 if len(sys.argv) != 3:
  print("usage: %s <NEW_CHANGES.md> <PREV_CHANGES.md>" % sys.argv[0])
-  exit(1)
+  sys.exit(1)
 pattern = re.compile(r'^## Release .+ - Unreleased .+$')
 with open(sys.argv[1], 'r', errors = 'ignore') as new_r, open(sys.argv[2], 'r', errors = 'ignore') as prev_r, open(sys.argv[2] + '.tmp', 'w') as w:
--- a/dev-support/create-release/prepend_releasenotes.py
+++ b/dev-support/create-release/prepend_releasenotes.py
@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
 ##
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@ -22,7 +22,7 @@ import os
 if len(sys.argv) != 3:
  print("usage: %s <NEW_RELEASENOTES.md> <PREV_RELEASENOTES.md>" % sys.argv[0])
-  exit(1)
+  sys.exit(1)
 pattern = re.compile(r'^# .+ Release Notes$')
 with open(sys.argv[1], 'r', errors = 'ignore') as new_r, open(sys.argv[2], 'r', errors = 'ignore') as prev_r, open(sys.argv[2] + '.tmp', 'w') as w:
--- a/dev-support/flaky-tests/findHangingTests.py
+++ b/dev-support/flaky-tests/findHangingTests.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 ##
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@ -45,8 +45,8 @@ def get_bad_tests(console_url):
    """
    response = requests.get(console_url)
    if response.status_code != 200:
-        print "Error getting consoleText. Response = {} {}".format(
+        print("Error getting consoleText. Response = {} {}".format(
-            response.status_code, response.reason)
+            response.status_code, response.reason))
        return
    # All tests: All testcases which were run.
@ -59,13 +59,13 @@ def get_bad_tests(console_url):
    hanging_tests_set = set()
    failed_tests_set = set()
    timeout_tests_set = set()
-    for line in response.content.splitlines():
+    for line in response.content.decode("utf-8").splitlines():
        result1 = re.findall("Running org.apache.hadoop.hbase.(.*)", line)
        if len(result1) == 1:
            test_case = result1[0]
            if test_case in all_tests_set:
-                print ("ERROR! Multiple tests with same name '{}'. Might get wrong results "
+                print(("ERROR! Multiple tests with same name '{}'. Might get wrong results "
-                       "for this test.".format(test_case))
+                       "for this test.".format(test_case)))
            else:
                hanging_tests_set.add(test_case)
                all_tests_set.add(test_case)
@ -75,9 +75,9 @@ def get_bad_tests(console_url):
            if "FAILURE!" in line:
                failed_tests_set.add(test_case)
            if test_case not in hanging_tests_set:
-                print ("ERROR! No test '{}' found in hanging_tests. Might get wrong results "
+                print(("ERROR! No test '{}' found in hanging_tests. Might get wrong results "
                       "for this test. This may also happen if maven is set to retry failing "
-                       "tests.".format(test_case))
+                       "tests.".format(test_case)))
            else:
                hanging_tests_set.remove(test_case)
        result3 = re.match("^\\s+(\\w*).*\\sTestTimedOut", line)
@ -86,30 +86,30 @@ def get_bad_tests(console_url):
            timeout_tests_set.add(test_case)
        for bad_string in BAD_RUN_STRINGS:
            if re.match(".*" + bad_string + ".*", line):
-                print "Bad string found in build:\n > {}".format(line)
+                print("Bad string found in build:\n > {}".format(line))
-    print "Result > total tests: {:4}   failed : {:4}  timedout : {:4}  hanging : {:4}".format(
+    print("Result > total tests: {:4}   failed : {:4}  timedout : {:4}  hanging : {:4}".format(
-        len(all_tests_set), len(failed_tests_set), len(timeout_tests_set), len(hanging_tests_set))
+        len(all_tests_set), len(failed_tests_set), len(timeout_tests_set), len(hanging_tests_set)))
    return [all_tests_set, failed_tests_set, timeout_tests_set, hanging_tests_set]
 if __name__ == "__main__":
    if len(sys.argv) != 2:
-        print "ERROR : Provide the jenkins job console URL as the only argument."
+        print("ERROR : Provide the jenkins job console URL as the only argument.")
        sys.exit(1)
-    print "Fetching {}".format(sys.argv[1])
+    print("Fetching {}".format(sys.argv[1]))
    result = get_bad_tests(sys.argv[1])
    if not result:
        sys.exit(1)
    [all_tests, failed_tests, timedout_tests, hanging_tests] = result
-    print "Found {} hanging tests:".format(len(hanging_tests))
+    print("Found {} hanging tests:".format(len(hanging_tests)))
    for test in hanging_tests:
-        print test
+        print(test)
-    print "\n"
+    print("\n")
-    print "Found {} failed tests of which {} timed out:".format(
+    print("Found {} failed tests of which {} timed out:".format(
-        len(failed_tests), len(timedout_tests))
+        len(failed_tests), len(timedout_tests)))
    for test in failed_tests:
-        print "{0} {1}".format(test, ("(Timed Out)" if test in timedout_tests else ""))
+        print("{0} {1}".format(test, ("(Timed Out)" if test in timedout_tests else "")))
    print ("\nA test may have had 0 or more atomic test failures before it timed out. So a "
           "'Timed Out' test may have other errors too.")
--- a/dev-support/flaky-tests/flaky-reporting.Jenkinsfile
+++ b/dev-support/flaky-tests/flaky-reporting.Jenkinsfile
@ -47,7 +47,7 @@ pipeline {
          flaky_args=("${flaky_args[@]}" --urls "${JENKINS_URL}/job/HBase-Flaky-Tests/job/${BRANCH_NAME}" --is-yetus False --max-builds 50)
          docker build -t hbase-dev-support dev-support
          docker run --ulimit nproc=12500 -v "${WORKSPACE}":/hbase -u `id -u`:`id -g` --workdir=/hbase hbase-dev-support \
-            python dev-support/flaky-tests/report-flakies.py --mvn -v -o output "${flaky_args[@]}"
+            ./dev-support/flaky-tests/report-flakies.py --mvn -v -o output "${flaky_args[@]}"
        '''
        sshPublisher(publishers: [
          sshPublisherDesc(configName: 'Nightlies',
--- a/dev-support/flaky-tests/report-flakies.py
+++ b/dev-support/flaky-tests/report-flakies.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python3
 ##
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@ -140,7 +140,7 @@ def expand_multi_config_projects(cli_args):
            raise Exception("Failed to get job information from jenkins for url '" + job_url +
                            "'. Jenkins returned HTTP status " + str(request.status_code))
        response = request.json()
-        if response.has_key("activeConfigurations"):
+        if "activeConfigurations" in response:
            for config in response["activeConfigurations"]:
                final_expanded_urls.append({'url':config["url"], 'max_builds': max_builds,
                                            'excludes': excluded_builds, 'is_yetus': is_yetus})
@ -167,7 +167,7 @@ for url_max_build in expanded_urls:
    url = url_max_build["url"]
    excludes = url_max_build["excludes"]
    json_response = requests.get(url + "/api/json?tree=id,builds%5Bnumber,url%5D").json()
-    if json_response.has_key("builds"):
+    if "builds" in json_response:
        builds = json_response["builds"]
        logger.info("Analyzing job: %s", url)
    else:
@ -238,27 +238,27 @@ for url_max_build in expanded_urls:
    # Sort tests in descending order by flakyness.
    sorted_test_to_build_ids = OrderedDict(
-        sorted(test_to_build_ids.iteritems(), key=lambda x: x[1]['flakyness'], reverse=True))
+        sorted(iter(test_to_build_ids.items()), key=lambda x: x[1]['flakyness'], reverse=True))
    url_to_bad_test_results[url] = sorted_test_to_build_ids
    if len(sorted_test_to_build_ids) > 0:
-        print "URL: {}".format(url)
+        print("URL: {}".format(url))
-        print "{:>60}  {:10}  {:25}  {}".format(
+        print("{:>60}  {:10}  {:25}  {}".format(
-            "Test Name", "Total Runs", "Bad Runs(failed/timeout/hanging)", "Flakyness")
+            "Test Name", "Total Runs", "Bad Runs(failed/timeout/hanging)", "Flakyness"))
        for bad_test in sorted_test_to_build_ids:
            test_status = sorted_test_to_build_ids[bad_test]
-            print "{:>60}  {:10}  {:7} ( {:4} / {:5} / {:5} )  {:2.0f}%".format(
+            print("{:>60}  {:10}  {:7} ( {:4} / {:5} / {:5} )  {:2.0f}%".format(
                bad_test, len(test_status['all']), test_status['bad_count'],
                len(test_status['failed']), len(test_status['timeout']),
-                len(test_status['hanging']), test_status['flakyness'])
+                len(test_status['hanging']), test_status['flakyness']))
    else:
-        print "No flaky tests founds."
+        print("No flaky tests founds.")
        if len(url_to_build_ids[url]) == len(build_ids_without_tests_run):
-            print "None of the analyzed builds have test result."
+            print("None of the analyzed builds have test result.")
-    print "Builds analyzed: {}".format(url_to_build_ids[url])
+    print("Builds analyzed: {}".format(url_to_build_ids[url]))
-    print "Builds without any test runs: {}".format(build_ids_without_tests_run)
+    print("Builds without any test runs: {}".format(build_ids_without_tests_run))
-    print ""
+    print("")
 all_bad_tests = all_hanging_tests.union(all_failed_tests)
--- a/dev-support/gen_redirect_html.py
+++ b/dev-support/gen_redirect_html.py
@ -1,4 +1,4 @@
-#!/usr/bin/python2
+#!/usr/bin/env python3
 ##
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
@ -20,8 +20,8 @@ import sys
 from string import Template
 if len(sys.argv) != 2 :
-  print "usage: %s <redirect url>" % sys.argv[0]
+  print("usage: %s <redirect url>" % sys.argv[0])
-  exit(1)
+  sys.exit(1)
 url = sys.argv[1].replace(" ", "%20")
 template = Template("""<html>
@ -34,4 +34,4 @@ template = Template("""<html>
 </html>""")
 output = template.substitute(url = url)
-print output
+print(output)
--- a/dev-support/submit-patch.py
+++ b/dev-support/submit-patch.py
@ -1,311 +0,0 @@
 #!/usr/bin/env python2
 ##
 # Licensed to the Apache Software Foundation (ASF) under one
 # or more contributor license agreements.  See the NOTICE file
 # distributed with this work for additional information
 # regarding copyright ownership.  The ASF licenses this file
 # to you under the Apache License, Version 2.0 (the
 # "License"); you may not use this file except in compliance
 # with the License.  You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
 # Makes a patch for the current branch, creates/updates the review board request and uploads new
 # patch to jira. Patch is named as (JIRA).(branch name).(patch number).patch as per Yetus' naming
 # rules. If no jira is specified, patch will be named (branch name).patch and jira and review board
 # are not updated. Review board id is retrieved from the remote link in the jira.
 # Print help: submit-patch.py --h
 import argparse
 from builtins import input, str
 import getpass
 import git
 import json
 import logging
 import os
 import re
 import requests
 import subprocess
 import sys
 parser = argparse.ArgumentParser(
    epilog = "To avoid having to enter jira/review board username/password every time, setup an "
             "encrypted ~/.apache-cred files as follows:\n"
             "1) Create a file with following single "
             "line: \n{\"jira_username\" : \"appy\", \"jira_password\":\"123\", "
             "\"rb_username\":\"appy\", \"rb_password\" : \"@#$\"}\n"
             "2) Encrypt it with openssl.\n"
             "openssl enc -aes-256-cbc -in <file> -out ~/.apache-creds\n"
             "3) Delete original file.\n"
             "Now onwards, you'll need to enter this encryption key only once per run. If you "
             "forget the key, simply regenerate ~/.apache-cred file again.",
    formatter_class=argparse.RawTextHelpFormatter
 )
 parser.add_argument("-b", "--branch",
                    help = "Branch to use for generating diff. If not specified, tracking branch "
                         "is used. If there is no tracking branch, error will be thrown.")
 # Arguments related to Jira.
 parser.add_argument("-jid", "--jira-id",
                    help = "Jira id of the issue. If set, we deduce next patch version from "
                           "attachments in the jira and also upload the new patch. Script will "
                           "ask for jira username/password for authentication. If not set, "
                           "patch is named <branch>.patch.")
 # Arguments related to Review Board.
 parser.add_argument("-srb", "--skip-review-board",
                    help = "Don't create/update the review board.",
                    default = False, action = "store_true")
 parser.add_argument("--reviewers",
                    help = "Comma separated list of users to add as reviewers.")
 # Misc arguments
 parser.add_argument("--patch-dir", default = "~/patches",
                    help = "Directory to store patch files. If it doesn't exist, it will be "
                          "created. Default: ~/patches")
 parser.add_argument("--rb-repo", default = "hbase-git",
                    help = "Review board repository. Default: hbase-git")
 args = parser.parse_args()
 # Setup logger
 logging.basicConfig()
 logger = logging.getLogger("submit-patch")
 logger.setLevel(logging.INFO)
 def log_fatal_and_exit(*arg):
    logger.fatal(*arg)
    sys.exit(1)
 def assert_status_code(response, expected_status_code, description):
    if response.status_code != expected_status_code:
        log_fatal_and_exit(" Oops, something went wrong when %s. \nResponse: %s %s\nExiting..",
                           description, response.status_code, response.reason)
 # Make repo instance to interact with git repo.
 try:
    repo = git.Repo(os.getcwd())
    git = repo.git
 except git.exc.InvalidGitRepositoryError as e:
    log_fatal_and_exit(" '%s' is not valid git repo directory.\nRun from base directory of "
                       "HBase's git repo.", e)
 logger.info(" Active branch: %s", repo.active_branch.name)
 # Do not proceed if there are uncommitted changes.
 if repo.is_dirty():
    log_fatal_and_exit(" Git status is dirty. Commit locally first.")
 # Returns base branch for creating diff.
 def get_base_branch():
    # if --branch is set, use it as base branch for computing diff. Also check that it's a valid branch.
    if args.branch is not None:
        base_branch = args.branch
        # Check that given branch exists.
        for ref in repo.refs:
            if ref.name == base_branch:
                return base_branch
        log_fatal_and_exit(" Branch '%s' does not exist in refs.", base_branch)
    else:
        # if --branch is not set, use tracking branch as base branch for computing diff.
        # If there is no tracking branch, log error and quit.
        tracking_branch = repo.active_branch.tracking_branch()
        if tracking_branch is None:
            log_fatal_and_exit(" Active branch doesn't have a tracking_branch. Please specify base "
                               " branch for computing diff using --branch flag.")
        logger.info(" Using tracking branch as base branch")
        return tracking_branch.name
 # Returns patch name having format (JIRA).(branch name).(patch number).patch. If no jira is
 # specified, patch is name (branch name).patch.
 def get_patch_name(branch):
    if args.jira_id is None:
        return branch + ".patch"
    patch_name_prefix = args.jira_id.upper() + "." + branch
    return get_patch_name_with_version(patch_name_prefix)
 # Fetches list of attachments from the jira, deduces next version for the patch and returns final
 # patch name.
 def get_patch_name_with_version(patch_name_prefix):
    # JIRA's rest api is broken wrt to attachments. https://jira.atlassian.com/browse/JRA-27637.
    # Using crude way to get list of attachments.
    url = "https://issues.apache.org/jira/browse/" + args.jira_id
    logger.info("Getting list of attachments for jira %s from %s", args.jira_id, url)
    html = requests.get(url)
    if html.status_code == 404:
        log_fatal_and_exit(" Invalid jira id : %s", args.jira_id)
    if html.status_code != 200:
        log_fatal_and_exit(" Cannot fetch jira information. Status code %s", html.status_code)
    # Iterate over patch names starting from version 1 and return when name is not already used.
    content = str(html.content, 'utf-8')
    for i in range(1, 1000):
        name = patch_name_prefix + "." + ('{0:03d}'.format(i)) + ".patch"
        if name not in content:
            return name
 # Validates that patch directory exists, if not, creates it.
 def validate_patch_dir(patch_dir):
    # Create patch_dir if it doesn't exist.
    if not os.path.exists(patch_dir):
        logger.warn(" Patch directory doesn't exist. Creating it.")
        os.mkdir(patch_dir)
    else:
        # If patch_dir exists, make sure it's a directory.
        if not os.path.isdir(patch_dir):
            log_fatal_and_exit(" '%s' exists but is not a directory. Specify another directory.",
                               patch_dir)
 # Make sure current branch is ahead of base_branch by exactly 1 commit. Quits if
 # - base_branch has commits not in current branch
 # - current branch is same as base branch
 # - current branch is ahead of base_branch by more than 1 commits
 def check_diff_between_branches(base_branch):
    only_in_base_branch = list(repo.iter_commits("HEAD.." + base_branch))
    only_in_active_branch = list(repo.iter_commits(base_branch + "..HEAD"))
    if len(only_in_base_branch) != 0:
        log_fatal_and_exit(" '%s' is ahead of current branch by %s commits. Rebase "
                           "and try again.", base_branch, len(only_in_base_branch))
    if len(only_in_active_branch) == 0:
        log_fatal_and_exit(" Current branch is same as '%s'. Exiting...", base_branch)
    if len(only_in_active_branch) > 1:
        log_fatal_and_exit(" Current branch is ahead of '%s' by %s commits. Squash into single "
                           "commit and try again.", base_branch, len(only_in_active_branch))
 # If ~/.apache-creds is present, load credentials from it otherwise prompt user.
 def get_credentials():
    creds = dict()
    creds_filepath = os.path.expanduser("~/.apache-creds")
    if os.path.exists(creds_filepath):
        try:
            logger.info(" Reading ~/.apache-creds for Jira and ReviewBoard credentials")
            content = subprocess.check_output("openssl enc -aes-256-cbc -d -in " + creds_filepath,
                                              shell=True)
        except subprocess.CalledProcessError as e:
            log_fatal_and_exit(" Couldn't decrypt ~/.apache-creds file. Exiting..")
        creds = json.loads(content)
    else:
        creds['jira_username'] = input("Jira username:")
        creds['jira_password'] = getpass.getpass("Jira password:")
        if not args.skip_review_board:
            creds['rb_username'] = input("Review Board username:")
            creds['rb_password'] = getpass.getpass("Review Board password:")
    return creds
 def attach_patch_to_jira(issue_url, patch_filepath, patch_filename, creds):
    # Upload patch to jira using REST API.
    headers = {'X-Atlassian-Token': 'no-check'}
    files = {'file': (patch_filename, open(patch_filepath, 'rb'), 'text/plain')}
    jira_auth = requests.auth.HTTPBasicAuth(creds['jira_username'], creds['jira_password'])
    attachment_url = issue_url + "/attachments"
    r = requests.post(attachment_url, headers = headers, files = files, auth = jira_auth)
    assert_status_code(r, 200, "uploading patch to jira")
 def get_jira_summary(issue_url):
    r = requests.get(issue_url + "?fields=summary")
    assert_status_code(r, 200, "fetching jira summary")
    return json.loads(r.content)["fields"]["summary"]
 def get_review_board_id_if_present(issue_url, rb_link_title):
    r = requests.get(issue_url + "/remotelink")
    assert_status_code(r, 200, "fetching remote links")
    links = json.loads(r.content)
    for link in links:
        if link["object"]["title"] == rb_link_title:
            res = re.search("reviews.apache.org/r/([0-9]+)", link["object"]["url"])
            return res.group(1)
    return None
 base_branch = get_base_branch()
 # Remove remote repo name from branch name if present. This assumes that we don't use '/' in
 # actual branch names.
 base_branch_without_remote = base_branch.split('/')[-1]
 logger.info(" Base branch: %s", base_branch)
 check_diff_between_branches(base_branch)
 patch_dir = os.path.abspath(os.path.expanduser(args.patch_dir))
 logger.info(" Patch directory: %s", patch_dir)
 validate_patch_dir(patch_dir)
 patch_filename = get_patch_name(base_branch_without_remote)
 logger.info(" Patch name: %s", patch_filename)
 patch_filepath = os.path.join(patch_dir, patch_filename)
 diff = git.format_patch(base_branch, stdout = True)
 with open(patch_filepath, "wb") as f:
    f.write(diff.encode('utf8'))
 if args.jira_id is not None:
    creds = get_credentials()
    issue_url = "https://issues.apache.org/jira/rest/api/2/issue/" + args.jira_id
    attach_patch_to_jira(issue_url, patch_filepath, patch_filename, creds)
    if not args.skip_review_board:
        rb_auth = requests.auth.HTTPBasicAuth(creds['rb_username'], creds['rb_password'])
        rb_link_title = "Review Board (" + base_branch_without_remote + ")"
        rb_id = get_review_board_id_if_present(issue_url, rb_link_title)
        # If no review board link found, create new review request and add its link to jira.
        if rb_id is None:
            reviews_url = "https://reviews.apache.org/api/review-requests/"
            data = {"repository" : "hbase-git"}
            r = requests.post(reviews_url, data = data, auth = rb_auth)
            assert_status_code(r, 201, "creating new review request")
            review_request = json.loads(r.content)["review_request"]
            absolute_url = review_request["absolute_url"]
            logger.info(" Created new review request: %s", absolute_url)
            # Use jira summary as review's summary too.
            summary = get_jira_summary(issue_url)
            # Use commit message as description.
            description = repo.head.commit.message
            update_draft_data = {"bugs_closed" : [args.jira_id.upper()], "target_groups" : "hbase",
                                 "target_people" : args.reviewers, "summary" : summary,
                                 "description" : description }
            draft_url = review_request["links"]["draft"]["href"]
            r = requests.put(draft_url, data = update_draft_data, auth = rb_auth)
            assert_status_code(r, 200, "updating review draft")
            draft_request = json.loads(r.content)["draft"]
            diff_url = draft_request["links"]["draft_diffs"]["href"]
            files = {'path' : (patch_filename, open(patch_filepath, 'rb'))}
            r = requests.post(diff_url, files = files, auth = rb_auth)
            assert_status_code(r, 201, "uploading diff to review draft")
            r = requests.put(draft_url, data = {"public" : True}, auth = rb_auth)
            assert_status_code(r, 200, "publishing review request")
            # Add link to review board in the jira.
            remote_link = json.dumps({'object': {'url': absolute_url, 'title': rb_link_title}})
            jira_auth = requests.auth.HTTPBasicAuth(creds['jira_username'], creds['jira_password'])
            r = requests.post(issue_url + "/remotelink", data = remote_link, auth = jira_auth,
                              headers={'Content-Type':'application/json'})
        else:
            logger.info(" Updating existing review board: https://reviews.apache.org/r/%s", rb_id)
            draft_url = "https://reviews.apache.org/api/review-requests/" + rb_id + "/draft/"
            diff_url = draft_url + "diffs/"
            files = {'path' : (patch_filename, open(patch_filepath, 'rb'))}
            r = requests.post(diff_url, files = files, auth = rb_auth)
            assert_status_code(r, 201, "uploading diff to review draft")
            r = requests.put(draft_url, data = {"public" : True}, auth = rb_auth)
            assert_status_code(r, 200, "publishing review request")