HADOOP-16748. Migrate to Python 3 and upgrade Yetus to 0.13.0 (branch-3.2) (#2717)

- Upgrade Yetus to 0.13.0 to support Python 3 for the release scripts.
- Removed determine-flaky-tests-hadoop.py.
- Temporarily disabled shelldocs check due to YETUS-1099.

Reviewed-by: Inigo Goiri <inigoiri@apache.org>
Reviewed-by: Mingliang Liu <liuml07@apache.org>
(cherry picked from commit b9b49ed956)

 Conflicts:
	dev-support/Jenkinsfile
	dev-support/docker/Dockerfile

(cherry picked from commit ff8b79f890)
This commit is contained in:
Akira Ajisaka 2021-02-24 11:05:58 +09:00 committed by GitHub
parent 165eaafaba
commit 08652eeee7
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 35 additions and 299 deletions

View File

@ -35,7 +35,7 @@ pipeline {
DOCKERFILE = "${SOURCEDIR}/dev-support/docker/Dockerfile"
YETUS='yetus'
// Branch or tag name. Yetus release tags are 'rel/X.Y.Z'
YETUS_VERSION='rel/0.12.0'
YETUS_VERSION='rel/0.13.0'
}
parameters {
@ -60,7 +60,7 @@ pipeline {
stage ('precommit-run') {
steps {
withCredentials(
[usernamePassword(credentialsId: 'apache-hadoop-at-github.com',
[usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd',
passwordVariable: 'GITHUB_TOKEN',
usernameVariable: 'GITHUB_USER'),
usernamePassword(credentialsId: 'hadoopqa-at-asf-jira',
@ -133,9 +133,6 @@ pipeline {
# plugins to enable
YETUS_ARGS+=("--plugins=all")
# use Hadoop's bundled shelldocs
YETUS_ARGS+=("--shelldocs=${WORKSPACE}/${SOURCEDIR}/dev-support/bin/shelldocs")
# don't let these tests cause -1s because we aren't really paying that
# much attention to them
YETUS_ARGS+=("--tests-filter=checkstyle")
@ -152,8 +149,8 @@ pipeline {
# help keep the ASF boxes clean
YETUS_ARGS+=("--sentinel")
# use emoji vote so it is easier to find the broken line
YETUS_ARGS+=("--github-use-emoji-vote")
# custom javadoc goals
YETUS_ARGS+=("--mvn-javadoc-goals=process-sources,javadoc:javadoc-no-fork")
"${TESTPATCHBIN}" "${YETUS_ARGS[@]}"
'''
@ -166,6 +163,19 @@ pipeline {
post {
always {
script {
// Publish status if it was missed (YETUS-1059)
withCredentials(
[usernamePassword(credentialsId: '683f5dcf-5552-4b28-9fb1-6a6b77cf53dd',
passwordVariable: 'GITHUB_TOKEN',
usernameVariable: 'GITHUB_USER')]) {
sh '''#!/usr/bin/env bash
YETUS_ARGS+=("--github-token=${GITHUB_TOKEN}")
YETUS_ARGS+=("--patch-dir=${WORKSPACE}/${PATCHDIR}")
TESTPATCHBIN="${WORKSPACE}/${YETUS}/precommit/src/main/shell/github-status-recovery.sh"
/usr/bin/env bash "${TESTPATCHBIN}" "${YETUS_ARGS[@]}" ${EXTRA_ARGS} || true
'''
}
// Yetus output
archiveArtifacts "${env.PATCHDIR}/**"
// Publish the HTML report so that it can be looked at

View File

@ -1,4 +1,4 @@
#!/usr/bin/env python
#!/usr/bin/env python3
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
@ -30,33 +30,16 @@ import re
import shutil
import subprocess
import sys
import urllib2
try:
import argparse
except ImportError:
sys.stderr.write("Please install argparse, e.g. via `pip install argparse`.")
sys.exit(2)
import urllib.request
import argparse
# Various relative paths
REPO_DIR = os.getcwd()
def check_output(*popenargs, **kwargs):
r"""Run command with arguments and return its output as a byte string.
Backported from Python 2.7 as it's implemented as pure python on stdlib.
>>> check_output(['/usr/bin/python', '--version'])
Python 2.6.2
"""
process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs)
output, _ = process.communicate()
retcode = process.poll()
if retcode:
cmd = kwargs.get("args")
if cmd is None:
cmd = popenargs[0]
error = subprocess.CalledProcessError(retcode, cmd)
error.output = output
raise error
return output
""" Run command with arguments and return its output as a string. """
return subprocess.check_output(*popenargs, **kwargs, encoding='utf-8')
def get_repo_dir():
""" Return the path to the top of the repo. """
@ -139,7 +122,7 @@ def checkout_java_acc(force):
url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz"
scratch_dir = get_scratch_dir()
path = os.path.join(scratch_dir, os.path.basename(url))
jacc = urllib2.urlopen(url)
jacc = urllib.request.urlopen(url)
with open(path, 'wb') as w:
w.write(jacc.read())
@ -194,7 +177,7 @@ def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations):
annotations_path = os.path.join(get_scratch_dir(), "annotations.txt")
with file(annotations_path, "w") as f:
for ann in annotations:
print >>f, ann
print(ann, file=f)
args += ["-annotations-list", annotations_path]
subprocess.check_call(args)
@ -264,8 +247,8 @@ def main():
parser.add_argument("--skip-build",
action="store_true",
help="Skip building the projects.")
parser.add_argument("src_rev", nargs=1, help="Source revision.")
parser.add_argument("dst_rev", nargs="?", default="HEAD",
parser.add_argument("src_rev", nargs=1, type=str, help="Source revision.")
parser.add_argument("dst_rev", nargs="?", type=str, default="HEAD",
help="Destination revision. " +
"If not specified, will use HEAD.")

View File

@ -77,7 +77,7 @@ WANTED="$1"
shift
ARGV=("$@")
HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.10.0}
HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.13.0}
BIN=$(yetus_abs "${BASH_SOURCE-$0}")
BINDIR=$(dirname "${BIN}")

View File

@ -1,245 +0,0 @@
#!/usr/bin/env python
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Given a jenkins test job, this script examines all runs of the job done
# within specified period of time (number of days prior to the execution
# time of this script), and reports all failed tests.
#
# The output of this script includes a section for each run that has failed
# tests, with each failed test name listed.
#
# More importantly, at the end, it outputs a summary section to list all failed
# tests within all examined runs, and indicate how many runs a same test
# failed, and sorted all failed tests by how many runs each test failed.
#
# This way, when we see failed tests in PreCommit build, we can quickly tell
# whether a failed test is a new failure, or it failed before and how often it
# failed, so to have idea whether it may just be a flaky test.
#
# Of course, to be 100% sure about the reason of a test failure, closer look
# at the failed test for the specific run is necessary.
#
import sys
import platform
sysversion = sys.hexversion
onward30 = False
if sysversion < 0x020600F0:
sys.exit("Minimum supported python version is 2.6, the current version is " +
"Python" + platform.python_version())
if sysversion == 0x030000F0:
sys.exit("There is a known bug with Python" + platform.python_version() +
", please try a different version");
if sysversion < 0x03000000:
import urllib2
else:
onward30 = True
import urllib.request
import datetime
import json as simplejson
import logging
from optparse import OptionParser
import time
# Configuration
DEFAULT_JENKINS_URL = "https://builds.apache.org"
DEFAULT_JOB_NAME = "Hadoop-Common-trunk"
DEFAULT_NUM_PREVIOUS_DAYS = 14
DEFAULT_TOP_NUM_FAILED_TEST = -1
SECONDS_PER_DAY = 86400
# total number of runs to examine
numRunsToExamine = 0
#summary mode
summary_mode = False
#total number of errors
error_count = 0
""" Parse arguments """
def parse_args():
parser = OptionParser()
parser.add_option("-J", "--jenkins-url", type="string",
dest="jenkins_url", help="Jenkins URL",
default=DEFAULT_JENKINS_URL)
parser.add_option("-j", "--job-name", type="string",
dest="job_name", help="Job name to look at",
default=DEFAULT_JOB_NAME)
parser.add_option("-n", "--num-days", type="int",
dest="num_prev_days", help="Number of days to examine",
default=DEFAULT_NUM_PREVIOUS_DAYS)
parser.add_option("-t", "--top", type="int",
dest="num_failed_tests",
help="Summary Mode, only show top number of failed tests",
default=DEFAULT_TOP_NUM_FAILED_TEST)
(options, args) = parser.parse_args()
if args:
parser.error("unexpected arguments: " + repr(args))
return options
""" Load data from specified url """
def load_url_data(url):
if onward30:
ourl = urllib.request.urlopen(url)
codec = ourl.info().get_param('charset')
content = ourl.read().decode(codec)
data = simplejson.loads(content, strict=False)
else:
ourl = urllib2.urlopen(url)
data = simplejson.load(ourl, strict=False)
return data
""" List all builds of the target project. """
def list_builds(jenkins_url, job_name):
global summary_mode
url = "%(jenkins)s/job/%(job_name)s/api/json?tree=builds[url,result,timestamp]" % dict(
jenkins=jenkins_url,
job_name=job_name)
try:
data = load_url_data(url)
except:
if not summary_mode:
logging.error("Could not fetch: %s" % url)
error_count += 1
raise
return data['builds']
""" Find the names of any tests which failed in the given build output URL. """
def find_failing_tests(testReportApiJson, jobConsoleOutput):
global summary_mode
global error_count
ret = set()
try:
data = load_url_data(testReportApiJson)
except:
if not summary_mode:
logging.error(" Could not open testReport, check " +
jobConsoleOutput + " for why it was reported failed")
error_count += 1
return ret
for suite in data['suites']:
for cs in suite['cases']:
status = cs['status']
errDetails = cs['errorDetails']
if (status == 'REGRESSION' or status == 'FAILED' or (errDetails is not None)):
ret.add(cs['className'] + "." + cs['name'])
if len(ret) == 0 and (not summary_mode):
logging.info(" No failed tests in testReport, check " +
jobConsoleOutput + " for why it was reported failed.")
return ret
""" Iterate runs of specfied job within num_prev_days and collect results """
def find_flaky_tests(jenkins_url, job_name, num_prev_days):
global numRunsToExamine
global summary_mode
all_failing = dict()
# First list all builds
builds = list_builds(jenkins_url, job_name)
# Select only those in the last N days
min_time = int(time.time()) - SECONDS_PER_DAY * num_prev_days
builds = [b for b in builds if (int(b['timestamp']) / 1000) > min_time]
# Filter out only those that failed
failing_build_urls = [(b['url'] , b['timestamp']) for b in builds
if (b['result'] in ('UNSTABLE', 'FAILURE'))]
tnum = len(builds)
num = len(failing_build_urls)
numRunsToExamine = tnum
if not summary_mode:
logging.info(" THERE ARE " + str(num) + " builds (out of " + str(tnum)
+ ") that have failed tests in the past " + str(num_prev_days) + " days"
+ ((".", ", as listed below:\n")[num > 0]))
for failed_build_with_time in failing_build_urls:
failed_build = failed_build_with_time[0];
jobConsoleOutput = failed_build + "Console";
testReport = failed_build + "testReport";
testReportApiJson = testReport + "/api/json";
ts = float(failed_build_with_time[1]) / 1000.
st = datetime.datetime.fromtimestamp(ts).strftime('%Y-%m-%d %H:%M:%S')
if not summary_mode:
logging.info("===>%s" % str(testReport) + " (" + st + ")")
failing = find_failing_tests(testReportApiJson, jobConsoleOutput)
if failing:
for ftest in failing:
if not summary_mode:
logging.info(" Failed test: %s" % ftest)
all_failing[ftest] = all_failing.get(ftest,0)+1
return all_failing
def main():
global numRunsToExamine
global summary_mode
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)
# set up logger to write to stdout
soh = logging.StreamHandler(sys.stdout)
soh.setLevel(logging.INFO)
logger = logging.getLogger()
logger.removeHandler(logger.handlers[0])
logger.addHandler(soh)
opts = parse_args()
logging.info("****Recently FAILED builds in url: " + opts.jenkins_url
+ "/job/" + opts.job_name + "")
if opts.num_failed_tests != -1:
summary_mode = True
all_failing = find_flaky_tests(opts.jenkins_url, opts.job_name,
opts.num_prev_days)
if len(all_failing) == 0:
raise SystemExit(0)
if summary_mode and opts.num_failed_tests < len(all_failing):
logging.info("\nAmong " + str(numRunsToExamine) +
" runs examined, top " + str(opts.num_failed_tests) +
" failed tests <#failedRuns: testName>:")
else:
logging.info("\nAmong " + str(numRunsToExamine) +
" runs examined, all failed tests <#failedRuns: testName>:")
# print summary section: all failed tests sorted by how many times they failed
line_count = 0
for tn in sorted(all_failing, key=all_failing.get, reverse=True):
logging.info(" " + str(all_failing[tn])+ ": " + tn)
if summary_mode:
line_count += 1
if line_count == opts.num_failed_tests:
break
if summary_mode and error_count > 0:
logging.info("\n" + str(error_count) + " errors found, you may "
+ "re-run in non summary mode to see error details.");
if __name__ == "__main__":
main()

View File

@ -71,12 +71,11 @@ RUN apt-get -q update \
make \
pinentry-curses \
pkg-config \
python \
python2.7 \
python-pip \
python-pkg-resources \
python-setuptools \
python-wheel \
python3 \
python3-pip \
python3-pkg-resources \
python3-setuptools \
python3-wheel \
rsync \
shellcheck \
software-properties-common \
@ -136,19 +135,8 @@ RUN apt-get -q update \
&& rm -rf /var/lib/apt/lists/*
ENV FINDBUGS_HOME /usr
####
# Install pylint at fixed version (2.0.0 removed python2 support)
# https://github.com/PyCQA/pylint/issues/2294
####
RUN pip2 install \
configparser==4.0.2 \
pylint==1.9.2 \
isort==4.3.21
####
# Install dateutil.parser
####
RUN pip2 install python-dateutil==2.7.3
# Install pylint and python-dateutil
RUN pip3 install pylint==2.6.0 python-dateutil==2.8.1
###
# Install node.js 8.17.0 for web UI framework (4.2.6 ships with Xenial)