HBASE-27450 Update all our python scripts to use python3 (#4851)

Signed-off-by: Guanghao Zhang <zghao@apache.org>
(cherry picked from commit cdabfd3ca8)
This commit is contained in:
Duo Zhang 2022-10-28 18:41:47 +08:00
parent 419ce5ad62
commit f34b002373
10 changed files with 112 additions and 415 deletions

View File

@ -20,16 +20,14 @@
#
# Specifically, it's used for the flaky test reporting job defined in
# dev-support/flaky-tests/flaky-reporting.Jenkinsfile
FROM ubuntu:18.04
FROM ubuntu:22.04
COPY . /hbase/dev-support
RUN DEBIAN_FRONTEND=noninteractive apt-get -qq -y update \
&& DEBIAN_FRONTEND=noninteractive apt-get -qq -y install --no-install-recommends \
curl='7.58.0-*' \
python2.7='2.7.17-*' \
python-pip='9.0.1-*' \
python-setuptools='39.0.1-*' \
curl='7.81.0-*' \
python3-pip='22.0.2+dfsg-*' \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
RUN pip install -r /hbase/dev-support/python-requirements.txt
RUN pip3 install -r /hbase/dev-support/python-requirements.txt

View File

@ -267,14 +267,14 @@ pipeline {
if [ -d "${OUTPUT_DIR}/branch-site" ]; then
echo "Remove ${OUTPUT_DIR}/branch-site for saving space"
rm -rf "${OUTPUT_DIR}/branch-site"
python2 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/branch-site" > "${OUTPUT_DIR}/branch-site.html"
python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/branch-site" > "${OUTPUT_DIR}/branch-site.html"
else
echo "No branch-site, skipping"
fi
if [ -d "${OUTPUT_DIR}/patch-site" ]; then
echo "Remove ${OUTPUT_DIR}/patch-site for saving space"
rm -rf "${OUTPUT_DIR}/patch-site"
python2 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/patch-site" > "${OUTPUT_DIR}/patch-site.html"
python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/patch-site" > "${OUTPUT_DIR}/patch-site.html"
else
echo "No patch-site, skipping"
fi
@ -384,7 +384,7 @@ pipeline {
if [ -f "${OUTPUT_DIR}/test_logs.zip" ]; then
echo "Remove ${OUTPUT_DIR}/test_logs.zip for saving space"
rm -rf "${OUTPUT_DIR}/test_logs.zip"
python2 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html"
python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html"
else
echo "No test_logs.zip, skipping"
fi
@ -493,7 +493,7 @@ pipeline {
if [ -f "${OUTPUT_DIR}/test_logs.zip" ]; then
echo "Remove ${OUTPUT_DIR}/test_logs.zip for saving space"
rm -rf "${OUTPUT_DIR}/test_logs.zip"
python2 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html"
python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html"
else
echo "No test_logs.zip, skipping"
fi
@ -604,7 +604,7 @@ pipeline {
if [ -f "${OUTPUT_DIR}/test_logs.zip" ]; then
echo "Remove ${OUTPUT_DIR}/test_logs.zip for saving space"
rm -rf "${OUTPUT_DIR}/test_logs.zip"
python2 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html"
python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html"
else
echo "No test_logs.zip, skipping"
fi
@ -773,7 +773,7 @@ pipeline {
if [ -f "${SRC_TAR}" ]; then
echo "Remove ${SRC_TAR} for saving space"
rm -rf "${SRC_TAR}"
python2 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/output-srctarball" > "${WORKSPACE}/output-srctarball/hbase-src.html"
python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/output-srctarball" > "${WORKSPACE}/output-srctarball/hbase-src.html"
else
echo "No hbase-src.tar.gz, skipping"
fi

View File

@ -143,14 +143,14 @@ pipeline {
if [ -d "${PATCHDIR}/branch-site" ]; then
echo "Remove ${PATCHDIR}/branch-site for saving space"
rm -rf "${PATCHDIR}/branch-site"
python2 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/branch-site" > "${PATCHDIR}/branch-site.html"
python3 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/branch-site" > "${PATCHDIR}/branch-site.html"
else
echo "No branch-site, skipping"
fi
if [ -d "${PATCHDIR}/patch-site" ]; then
echo "Remove ${PATCHDIR}/patch-site for saving space"
rm -rf "${PATCHDIR}/patch-site"
python2 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/patch-site" > "${PATCHDIR}/patch-site.html"
python3 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/patch-site" > "${PATCHDIR}/patch-site.html"
else
echo "No patch-site, skipping"
fi
@ -282,7 +282,7 @@ pipeline {
if [ -f "${PATCHDIR}/test_logs.zip" ]; then
echo "Remove ${PATCHDIR}/test_logs.zip for saving space"
rm -rf "${PATCHDIR}/test_logs.zip"
python2 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${WORKDIR_REL}/${PATCH_REL}" > "${PATCHDIR}/test_logs.html"
python3 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${WORKDIR_REL}/${PATCH_REL}" > "${PATCHDIR}/test_logs.html"
else
echo "No test_logs.zip, skipping"
fi
@ -415,7 +415,7 @@ pipeline {
if [ -f "${PATCHDIR}/test_logs.zip" ]; then
echo "Remove ${PATCHDIR}/test_logs.zip for saving space"
rm -rf "${PATCHDIR}/test_logs.zip"
python2 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${WORKDIR_REL}/${PATCH_REL}" > "${PATCHDIR}/test_logs.html"
python3 ${SOURCEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${WORKDIR_REL}/${PATCH_REL}" > "${PATCHDIR}/test_logs.html"
else
echo "No test_logs.zip, skipping"
fi

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
@ -32,7 +32,7 @@
# --annotation org.apache.yetus.audience.InterfaceAudience.LimitedPrivate \
# --include-file "hbase-*" \
# --known_problems_path ~/known_problems.json \
# rel/1.0.0 branch-1.2
# rel/1.3.0 branch-1.4
import json
import logging
@ -41,7 +41,9 @@ import re
import shutil
import subprocess
import sys
import urllib2
import urllib.request
import urllib.error
import urllib.parse
from collections import namedtuple
try:
import argparse
@ -55,11 +57,11 @@ REPO_DIR = os.getcwd()
def check_output(*popenargs, **kwargs):
""" Run command with arguments and return its output as a byte string.
Backported from Python 2.7 as it's implemented as pure python on stdlib.
>>> check_output(['/usr/bin/python', '--version'])
Python 2.6.2 """
process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
""" Run command with arguments and return its output as a byte string. """
process = subprocess.Popen(stdout=subprocess.PIPE,
universal_newlines=True,
*popenargs,
**kwargs)
output, _ = process.communicate()
retcode = process.poll()
if retcode:
@ -69,7 +71,7 @@ def check_output(*popenargs, **kwargs):
error = subprocess.CalledProcessError(retcode, cmd)
error.output = output
raise error
return output
return output.strip()
def get_repo_dir():
@ -161,7 +163,7 @@ def checkout_java_acc(force):
url = "https://github.com/lvc/japi-compliance-checker/archive/2.4.tar.gz"
scratch_dir = get_scratch_dir()
path = os.path.join(scratch_dir, os.path.basename(url))
jacc = urllib2.urlopen(url)
jacc = urllib.request.urlopen(url)
with open(path, 'wb') as w:
w.write(jacc.read())
@ -196,8 +198,8 @@ def ascii_encode_dict(data):
""" Iterate through a dictionary of data and convert all unicode to ascii.
This method was taken from
stackoverflow.com/questions/9590382/forcing-python-json-module-to-work-with-ascii """
ascii_encode = lambda x: x.encode('ascii') if isinstance(x, unicode) else x
return dict(map(ascii_encode, pair) for pair in data.items())
ascii_encode = lambda x: x.encode('ascii') if isinstance(x, str) else x
return dict(list(map(ascii_encode, pair)) for pair in list(data.items()))
def process_json(path):
@ -229,9 +231,9 @@ def compare_results(tool_results, known_issues, compare_warnings):
unexpected_issues = [unexpected_issue(check=check, issue_type=issue_type,
known_count=known_count,
observed_count=tool_results[check][issue_type])
for check, known_issue_counts in known_issues.items()
for issue_type, known_count in known_issue_counts.items()
if tool_results[check][issue_type] > known_count]
for check, known_issue_counts in list(known_issues.items())
for issue_type, known_count in list(known_issue_counts.items())
if compare_tool_results_count(tool_results, check, issue_type, known_count)]
if not compare_warnings:
unexpected_issues = [tup for tup in unexpected_issues
@ -243,6 +245,14 @@ def compare_results(tool_results, known_issues, compare_warnings):
return bool(unexpected_issues)
def compare_tool_results_count(tool_results, check, issue_type, known_count):
""" Check problem counts are no more than the known count.
(This function exists just so can add in logging; previous was inlined
one-liner but this made it hard debugging)
"""
# logging.info("known_count=%s, check key=%s, tool_results=%s, issue_type=%s",
# str(known_count), str(check), str(tool_results), str(issue_type))
return tool_results[check][issue_type] > known_count
def process_java_acc_output(output):
""" Process the output string to find the problems and warnings in both the
@ -301,14 +311,14 @@ def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations, skip_annot
logging.info("Annotations are: %s", annotations)
annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
logging.info("Annotations path: %s", annotations_path)
with file(annotations_path, "w") as f:
with open(annotations_path, "w") as f:
f.write('\n'.join(annotations))
args.extend(["-annotations-list", annotations_path])
if skip_annotations is not None:
skip_annotations_path = os.path.join(
get_scratch_dir(), "skip_annotations.txt")
with file(skip_annotations_path, "w") as f:
with open(skip_annotations_path, "w") as f:
f.write('\n'.join(skip_annotations))
args.extend(["-skip-annotations-list", skip_annotations_path])
@ -327,14 +337,14 @@ def get_known_problems(json_path, src_rev, dst_rev):
keys in the format source_branch/destination_branch and the values
dictionaries with binary and source problems and warnings
Example:
{'branch-1.0.0': {
'rel/1.0.0': {'binary': {'problems': 123, 'warnings': 16},
{'branch-1.3': {
'rel/1.3.0': {'binary': {'problems': 123, 'warnings': 16},
'source': {'problems': 167, 'warnings': 1}},
'branch-1.2.0': {'binary': {'problems': 0, 'warnings': 0},
'branch-1.4': {'binary': {'problems': 0, 'warnings': 0},
'source': {'problems': 0, 'warnings': 0}}
},
'branch-1.2.0': {
'rel/1.2.1': {'binary': {'problems': 13, 'warnings': 1},
'branch-1.4': {
'rel/1.4.1': {'binary': {'problems': 13, 'warnings': 1},
'source': {'problems': 23, 'warnings': 0}}
}
} """

View File

@ -1,4 +1,4 @@
#!/usr/bin/python2
#!/usr/bin/env python3
##
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
@ -29,8 +29,8 @@ import xml.etree.ElementTree as etree
from collections import defaultdict
if len(sys.argv) != 3 :
print "usage: %s checkstyle-result-master.xml checkstyle-result-patch.xml" % sys.argv[0]
exit(1)
print("usage: %s checkstyle-result-master.xml checkstyle-result-patch.xml" % sys.argv[0])
sys.exit(1)
def path_key(x):
path = x.attrib['name']
@ -40,8 +40,8 @@ def error_name(x):
error_class = x.attrib['source']
return error_class[error_class.rfind(".") + 1:]
def print_row(path, error, master_errors, patch_errors):
print '%s\t%s\t%s\t%s' % (path,error, master_errors,patch_errors)
def print_row(path, err, master_errors, patch_errors):
print('%s\t%s\t%s\t%s' % (path, err, master_errors, patch_errors))
master = etree.parse(sys.argv[1])
patch = etree.parse(sys.argv[2])
@ -49,22 +49,22 @@ patch = etree.parse(sys.argv[2])
master_dict = defaultdict(int)
ret_value = 0
for child in master.getroot().getchildren():
for child in list(master.getroot()):
if child.tag != 'file':
continue
file = path_key(child)
for error_tag in child.getchildren():
for error_tag in list(child):
error = error_name(error_tag)
if (file, error) in master_dict:
master_dict[(file, error)] += 1
else:
master_dict[(file, error)] = 1
for child in patch.getroot().getchildren():
for child in list(patch.getroot()):
if child.tag != 'file':
continue
temp_dict = defaultdict(int)
for error_tag in child.getchildren():
for error_tag in list(child):
error = error_name(error_tag)
if error in temp_dict:
temp_dict[error] += 1
@ -72,7 +72,7 @@ for child in patch.getroot().getchildren():
temp_dict[error] = 1
file = path_key(child)
for error, count in temp_dict.iteritems():
for error, count in temp_dict.items():
if count > master_dict[(file, error)]:
print_row(file, error, master_dict[(file, error)], count)
ret_value = 1

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
##
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
@ -45,8 +45,8 @@ def get_bad_tests(console_url):
"""
response = requests.get(console_url)
if response.status_code != 200:
print "Error getting consoleText. Response = {} {}".format(
response.status_code, response.reason)
print("Error getting consoleText. Response = {} {}".format(
response.status_code, response.reason))
return
# All tests: All testcases which were run.
@ -59,13 +59,13 @@ def get_bad_tests(console_url):
hanging_tests_set = set()
failed_tests_set = set()
timeout_tests_set = set()
for line in response.content.splitlines():
for line in response.content.decode("utf-8").splitlines():
result1 = re.findall("Running org.apache.hadoop.hbase.(.*)", line)
if len(result1) == 1:
test_case = result1[0]
if test_case in all_tests_set:
print ("ERROR! Multiple tests with same name '{}'. Might get wrong results "
"for this test.".format(test_case))
print(("ERROR! Multiple tests with same name '{}'. Might get wrong results "
"for this test.".format(test_case)))
else:
hanging_tests_set.add(test_case)
all_tests_set.add(test_case)
@ -75,9 +75,9 @@ def get_bad_tests(console_url):
if "FAILURE!" in line:
failed_tests_set.add(test_case)
if test_case not in hanging_tests_set:
print ("ERROR! No test '{}' found in hanging_tests. Might get wrong results "
print(("ERROR! No test '{}' found in hanging_tests. Might get wrong results "
"for this test. This may also happen if maven is set to retry failing "
"tests.".format(test_case))
"tests.".format(test_case)))
else:
hanging_tests_set.remove(test_case)
result3 = re.match("^\\s+(\\w*).*\\sTestTimedOut", line)
@ -86,30 +86,30 @@ def get_bad_tests(console_url):
timeout_tests_set.add(test_case)
for bad_string in BAD_RUN_STRINGS:
if re.match(".*" + bad_string + ".*", line):
print "Bad string found in build:\n > {}".format(line)
print "Result > total tests: {:4} failed : {:4} timedout : {:4} hanging : {:4}".format(
len(all_tests_set), len(failed_tests_set), len(timeout_tests_set), len(hanging_tests_set))
print("Bad string found in build:\n > {}".format(line))
print("Result > total tests: {:4} failed : {:4} timedout : {:4} hanging : {:4}".format(
len(all_tests_set), len(failed_tests_set), len(timeout_tests_set), len(hanging_tests_set)))
return [all_tests_set, failed_tests_set, timeout_tests_set, hanging_tests_set]
if __name__ == "__main__":
if len(sys.argv) != 2:
print "ERROR : Provide the jenkins job console URL as the only argument."
print("ERROR : Provide the jenkins job console URL as the only argument.")
sys.exit(1)
print "Fetching {}".format(sys.argv[1])
print("Fetching {}".format(sys.argv[1]))
result = get_bad_tests(sys.argv[1])
if not result:
sys.exit(1)
[all_tests, failed_tests, timedout_tests, hanging_tests] = result
print "Found {} hanging tests:".format(len(hanging_tests))
print("Found {} hanging tests:".format(len(hanging_tests)))
for test in hanging_tests:
print test
print "\n"
print "Found {} failed tests of which {} timed out:".format(
len(failed_tests), len(timedout_tests))
print(test)
print("\n")
print("Found {} failed tests of which {} timed out:".format(
len(failed_tests), len(timedout_tests)))
for test in failed_tests:
print "{0} {1}".format(test, ("(Timed Out)" if test in timedout_tests else ""))
print("{0} {1}".format(test, ("(Timed Out)" if test in timedout_tests else "")))
print ("\nA test may have had 0 or more atomic test failures before it timed out. So a "
"'Timed Out' test may have other errors too.")

View File

@ -47,7 +47,7 @@ pipeline {
flaky_args=("${flaky_args[@]}" --urls "${JENKINS_URL}/job/HBase-Flaky-Tests/job/${BRANCH_NAME}" --is-yetus False --max-builds 50)
docker build -t hbase-dev-support dev-support
docker run --ulimit nproc=12500 -v "${WORKSPACE}":/hbase -u `id -u`:`id -g` --workdir=/hbase hbase-dev-support \
python dev-support/flaky-tests/report-flakies.py --mvn -v -o output "${flaky_args[@]}"
./dev-support/flaky-tests/report-flakies.py --mvn -v -o output "${flaky_args[@]}"
'''
sshPublisher(publishers: [
sshPublisherDesc(configName: 'Nightlies',

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/env python3
##
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
@ -140,7 +140,7 @@ def expand_multi_config_projects(cli_args):
raise Exception("Failed to get job information from jenkins for url '" + job_url +
"'. Jenkins returned HTTP status " + str(request.status_code))
response = request.json()
if response.has_key("activeConfigurations"):
if "activeConfigurations" in response:
for config in response["activeConfigurations"]:
final_expanded_urls.append({'url':config["url"], 'max_builds': max_builds,
'excludes': excluded_builds, 'is_yetus': is_yetus})
@ -167,7 +167,7 @@ for url_max_build in expanded_urls:
url = url_max_build["url"]
excludes = url_max_build["excludes"]
json_response = requests.get(url + "/api/json?tree=id,builds%5Bnumber,url%5D").json()
if json_response.has_key("builds"):
if "builds" in json_response:
builds = json_response["builds"]
logger.info("Analyzing job: %s", url)
else:
@ -238,27 +238,27 @@ for url_max_build in expanded_urls:
# Sort tests in descending order by flakyness.
sorted_test_to_build_ids = OrderedDict(
sorted(test_to_build_ids.iteritems(), key=lambda x: x[1]['flakyness'], reverse=True))
sorted(iter(test_to_build_ids.items()), key=lambda x: x[1]['flakyness'], reverse=True))
url_to_bad_test_results[url] = sorted_test_to_build_ids
if len(sorted_test_to_build_ids) > 0:
print "URL: {}".format(url)
print "{:>60} {:10} {:25} {}".format(
"Test Name", "Total Runs", "Bad Runs(failed/timeout/hanging)", "Flakyness")
print("URL: {}".format(url))
print("{:>60} {:10} {:25} {}".format(
"Test Name", "Total Runs", "Bad Runs(failed/timeout/hanging)", "Flakyness"))
for bad_test in sorted_test_to_build_ids:
test_status = sorted_test_to_build_ids[bad_test]
print "{:>60} {:10} {:7} ( {:4} / {:5} / {:5} ) {:2.0f}%".format(
print("{:>60} {:10} {:7} ( {:4} / {:5} / {:5} ) {:2.0f}%".format(
bad_test, len(test_status['all']), test_status['bad_count'],
len(test_status['failed']), len(test_status['timeout']),
len(test_status['hanging']), test_status['flakyness'])
len(test_status['hanging']), test_status['flakyness']))
else:
print "No flaky tests founds."
print("No flaky tests founds.")
if len(url_to_build_ids[url]) == len(build_ids_without_tests_run):
print "None of the analyzed builds have test result."
print("None of the analyzed builds have test result.")
print "Builds analyzed: {}".format(url_to_build_ids[url])
print "Builds without any test runs: {}".format(build_ids_without_tests_run)
print ""
print("Builds analyzed: {}".format(url_to_build_ids[url]))
print("Builds without any test runs: {}".format(build_ids_without_tests_run))
print("")
all_bad_tests = all_hanging_tests.union(all_failed_tests)

View File

@ -1,4 +1,4 @@
#!/usr/bin/python2
#!/usr/bin/env python3
##
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
@ -20,8 +20,8 @@ import sys
from string import Template
if len(sys.argv) != 2 :
print "usage: %s <redirect url>" % sys.argv[0]
exit(1)
print("usage: %s <redirect url>" % sys.argv[0])
sys.exit(1)
url = sys.argv[1].replace(" ", "%20")
template = Template("""<html>
@ -34,4 +34,4 @@ template = Template("""<html>
</html>""")
output = template.substitute(url = url)
print output
print(output)

View File

@ -1,311 +0,0 @@
#!/usr/bin/env python2
##
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Makes a patch for the current branch, creates/updates the review board request and uploads new
# patch to jira. Patch is named as (JIRA).(branch name).(patch number).patch as per Yetus' naming
# rules. If no jira is specified, patch will be named (branch name).patch and jira and review board
# are not updated. Review board id is retrieved from the remote link in the jira.
# Print help: submit-patch.py --h
import argparse
from builtins import input, str
import getpass
import git
import json
import logging
import os
import re
import requests
import subprocess
import sys
parser = argparse.ArgumentParser(
epilog = "To avoid having to enter jira/review board username/password every time, setup an "
"encrypted ~/.apache-cred files as follows:\n"
"1) Create a file with following single "
"line: \n{\"jira_username\" : \"appy\", \"jira_password\":\"123\", "
"\"rb_username\":\"appy\", \"rb_password\" : \"@#$\"}\n"
"2) Encrypt it with openssl.\n"
"openssl enc -aes-256-cbc -in <file> -out ~/.apache-creds\n"
"3) Delete original file.\n"
"Now onwards, you'll need to enter this encryption key only once per run. If you "
"forget the key, simply regenerate ~/.apache-cred file again.",
formatter_class=argparse.RawTextHelpFormatter
)
parser.add_argument("-b", "--branch",
help = "Branch to use for generating diff. If not specified, tracking branch "
"is used. If there is no tracking branch, error will be thrown.")
# Arguments related to Jira.
parser.add_argument("-jid", "--jira-id",
help = "Jira id of the issue. If set, we deduce next patch version from "
"attachments in the jira and also upload the new patch. Script will "
"ask for jira username/password for authentication. If not set, "
"patch is named <branch>.patch.")
# Arguments related to Review Board.
parser.add_argument("-srb", "--skip-review-board",
help = "Don't create/update the review board.",
default = False, action = "store_true")
parser.add_argument("--reviewers",
help = "Comma separated list of users to add as reviewers.")
# Misc arguments
parser.add_argument("--patch-dir", default = "~/patches",
help = "Directory to store patch files. If it doesn't exist, it will be "
"created. Default: ~/patches")
parser.add_argument("--rb-repo", default = "hbase-git",
help = "Review board repository. Default: hbase-git")
args = parser.parse_args()
# Setup logger
logging.basicConfig()
logger = logging.getLogger("submit-patch")
logger.setLevel(logging.INFO)
def log_fatal_and_exit(*arg):
logger.fatal(*arg)
sys.exit(1)
def assert_status_code(response, expected_status_code, description):
if response.status_code != expected_status_code:
log_fatal_and_exit(" Oops, something went wrong when %s. \nResponse: %s %s\nExiting..",
description, response.status_code, response.reason)
# Make repo instance to interact with git repo.
try:
repo = git.Repo(os.getcwd())
git = repo.git
except git.exc.InvalidGitRepositoryError as e:
log_fatal_and_exit(" '%s' is not valid git repo directory.\nRun from base directory of "
"HBase's git repo.", e)
logger.info(" Active branch: %s", repo.active_branch.name)
# Do not proceed if there are uncommitted changes.
if repo.is_dirty():
log_fatal_and_exit(" Git status is dirty. Commit locally first.")
# Returns base branch for creating diff.
def get_base_branch():
# if --branch is set, use it as base branch for computing diff. Also check that it's a valid branch.
if args.branch is not None:
base_branch = args.branch
# Check that given branch exists.
for ref in repo.refs:
if ref.name == base_branch:
return base_branch
log_fatal_and_exit(" Branch '%s' does not exist in refs.", base_branch)
else:
# if --branch is not set, use tracking branch as base branch for computing diff.
# If there is no tracking branch, log error and quit.
tracking_branch = repo.active_branch.tracking_branch()
if tracking_branch is None:
log_fatal_and_exit(" Active branch doesn't have a tracking_branch. Please specify base "
" branch for computing diff using --branch flag.")
logger.info(" Using tracking branch as base branch")
return tracking_branch.name
# Returns patch name having format (JIRA).(branch name).(patch number).patch. If no jira is
# specified, patch is name (branch name).patch.
def get_patch_name(branch):
if args.jira_id is None:
return branch + ".patch"
patch_name_prefix = args.jira_id.upper() + "." + branch
return get_patch_name_with_version(patch_name_prefix)
# Fetches list of attachments from the jira, deduces next version for the patch and returns final
# patch name.
def get_patch_name_with_version(patch_name_prefix):
# JIRA's rest api is broken wrt to attachments. https://jira.atlassian.com/browse/JRA-27637.
# Using crude way to get list of attachments.
url = "https://issues.apache.org/jira/browse/" + args.jira_id
logger.info("Getting list of attachments for jira %s from %s", args.jira_id, url)
html = requests.get(url)
if html.status_code == 404:
log_fatal_and_exit(" Invalid jira id : %s", args.jira_id)
if html.status_code != 200:
log_fatal_and_exit(" Cannot fetch jira information. Status code %s", html.status_code)
# Iterate over patch names starting from version 1 and return when name is not already used.
content = str(html.content, 'utf-8')
for i in range(1, 1000):
name = patch_name_prefix + "." + ('{0:03d}'.format(i)) + ".patch"
if name not in content:
return name
# Validates that patch directory exists, if not, creates it.
def validate_patch_dir(patch_dir):
# Create patch_dir if it doesn't exist.
if not os.path.exists(patch_dir):
logger.warn(" Patch directory doesn't exist. Creating it.")
os.mkdir(patch_dir)
else:
# If patch_dir exists, make sure it's a directory.
if not os.path.isdir(patch_dir):
log_fatal_and_exit(" '%s' exists but is not a directory. Specify another directory.",
patch_dir)
# Make sure current branch is ahead of base_branch by exactly 1 commit. Quits if
# - base_branch has commits not in current branch
# - current branch is same as base branch
# - current branch is ahead of base_branch by more than 1 commits
def check_diff_between_branches(base_branch):
only_in_base_branch = list(repo.iter_commits("HEAD.." + base_branch))
only_in_active_branch = list(repo.iter_commits(base_branch + "..HEAD"))
if len(only_in_base_branch) != 0:
log_fatal_and_exit(" '%s' is ahead of current branch by %s commits. Rebase "
"and try again.", base_branch, len(only_in_base_branch))
if len(only_in_active_branch) == 0:
log_fatal_and_exit(" Current branch is same as '%s'. Exiting...", base_branch)
if len(only_in_active_branch) > 1:
log_fatal_and_exit(" Current branch is ahead of '%s' by %s commits. Squash into single "
"commit and try again.", base_branch, len(only_in_active_branch))
# If ~/.apache-creds is present, load credentials from it otherwise prompt user.
def get_credentials():
creds = dict()
creds_filepath = os.path.expanduser("~/.apache-creds")
if os.path.exists(creds_filepath):
try:
logger.info(" Reading ~/.apache-creds for Jira and ReviewBoard credentials")
content = subprocess.check_output("openssl enc -aes-256-cbc -d -in " + creds_filepath,
shell=True)
except subprocess.CalledProcessError as e:
log_fatal_and_exit(" Couldn't decrypt ~/.apache-creds file. Exiting..")
creds = json.loads(content)
else:
creds['jira_username'] = input("Jira username:")
creds['jira_password'] = getpass.getpass("Jira password:")
if not args.skip_review_board:
creds['rb_username'] = input("Review Board username:")
creds['rb_password'] = getpass.getpass("Review Board password:")
return creds
def attach_patch_to_jira(issue_url, patch_filepath, patch_filename, creds):
# Upload patch to jira using REST API.
headers = {'X-Atlassian-Token': 'no-check'}
files = {'file': (patch_filename, open(patch_filepath, 'rb'), 'text/plain')}
jira_auth = requests.auth.HTTPBasicAuth(creds['jira_username'], creds['jira_password'])
attachment_url = issue_url + "/attachments"
r = requests.post(attachment_url, headers = headers, files = files, auth = jira_auth)
assert_status_code(r, 200, "uploading patch to jira")
def get_jira_summary(issue_url):
r = requests.get(issue_url + "?fields=summary")
assert_status_code(r, 200, "fetching jira summary")
return json.loads(r.content)["fields"]["summary"]
def get_review_board_id_if_present(issue_url, rb_link_title):
r = requests.get(issue_url + "/remotelink")
assert_status_code(r, 200, "fetching remote links")
links = json.loads(r.content)
for link in links:
if link["object"]["title"] == rb_link_title:
res = re.search("reviews.apache.org/r/([0-9]+)", link["object"]["url"])
return res.group(1)
return None
base_branch = get_base_branch()
# Remove remote repo name from branch name if present. This assumes that we don't use '/' in
# actual branch names.
base_branch_without_remote = base_branch.split('/')[-1]
logger.info(" Base branch: %s", base_branch)
check_diff_between_branches(base_branch)
patch_dir = os.path.abspath(os.path.expanduser(args.patch_dir))
logger.info(" Patch directory: %s", patch_dir)
validate_patch_dir(patch_dir)
patch_filename = get_patch_name(base_branch_without_remote)
logger.info(" Patch name: %s", patch_filename)
patch_filepath = os.path.join(patch_dir, patch_filename)
diff = git.format_patch(base_branch, stdout = True)
with open(patch_filepath, "w") as f:
f.write(diff.encode('utf8'))
if args.jira_id is not None:
creds = get_credentials()
issue_url = "https://issues.apache.org/jira/rest/api/2/issue/" + args.jira_id
attach_patch_to_jira(issue_url, patch_filepath, patch_filename, creds)
if not args.skip_review_board:
rb_auth = requests.auth.HTTPBasicAuth(creds['rb_username'], creds['rb_password'])
rb_link_title = "Review Board (" + base_branch_without_remote + ")"
rb_id = get_review_board_id_if_present(issue_url, rb_link_title)
# If no review board link found, create new review request and add its link to jira.
if rb_id is None:
reviews_url = "https://reviews.apache.org/api/review-requests/"
data = {"repository" : "hbase-git"}
r = requests.post(reviews_url, data = data, auth = rb_auth)
assert_status_code(r, 201, "creating new review request")
review_request = json.loads(r.content)["review_request"]
absolute_url = review_request["absolute_url"]
logger.info(" Created new review request: %s", absolute_url)
# Use jira summary as review's summary too.
summary = get_jira_summary(issue_url)
# Use commit message as description.
description = repo.head.commit.message
update_draft_data = {"bugs_closed" : [args.jira_id.upper()], "target_groups" : "hbase",
"target_people" : args.reviewers, "summary" : summary,
"description" : description }
draft_url = review_request["links"]["draft"]["href"]
r = requests.put(draft_url, data = update_draft_data, auth = rb_auth)
assert_status_code(r, 200, "updating review draft")
draft_request = json.loads(r.content)["draft"]
diff_url = draft_request["links"]["draft_diffs"]["href"]
files = {'path' : (patch_filename, open(patch_filepath, 'rb'))}
r = requests.post(diff_url, files = files, auth = rb_auth)
assert_status_code(r, 201, "uploading diff to review draft")
r = requests.put(draft_url, data = {"public" : True}, auth = rb_auth)
assert_status_code(r, 200, "publishing review request")
# Add link to review board in the jira.
remote_link = json.dumps({'object': {'url': absolute_url, 'title': rb_link_title}})
jira_auth = requests.auth.HTTPBasicAuth(creds['jira_username'], creds['jira_password'])
r = requests.post(issue_url + "/remotelink", data = remote_link, auth = jira_auth,
headers={'Content-Type':'application/json'})
else:
logger.info(" Updating existing review board: https://reviews.apache.org/r/%s", rb_id)
draft_url = "https://reviews.apache.org/api/review-requests/" + rb_id + "/draft/"
diff_url = draft_url + "diffs/"
files = {'path' : (patch_filename, open(patch_filepath, 'rb'))}
r = requests.post(diff_url, files = files, auth = rb_auth)
assert_status_code(r, 201, "uploading diff to review draft")
r = requests.put(draft_url, data = {"public" : True}, auth = rb_auth)
assert_status_code(r, 200, "publishing review request")