From 9b64e30ccf845ec5edc3b1986853fd029e4d66b0 Mon Sep 17 00:00:00 2001 From: Andrew Wang Date: Tue, 1 Nov 2016 16:36:10 -0700 Subject: [PATCH] HADOOP-13583. Incorporate checkcompatibility script which runs Java API Compliance Checker. (cherry picked from commit 0cd22d66691fdbc3fb3e0a35ad1625236c3ae3f7) (cherry picked from commit 92fff7d065d11df4115063e4bbfb98fa0a763718) --- BUILDING.txt | 11 + dev-support/bin/checkcompatibility.py | 345 ++++++++++++++++++++++++++ 2 files changed, 356 insertions(+) create mode 100755 dev-support/bin/checkcompatibility.py diff --git a/BUILDING.txt b/BUILDING.txt index 39bb73b7b91..9c3c683dffa 100644 --- a/BUILDING.txt +++ b/BUILDING.txt @@ -345,3 +345,14 @@ http://www.zlib.net/ Building distributions: * Build distribution with native code : mvn package [-Pdist][-Pdocs][-Psrc][-Dtar] + +---------------------------------------------------------------------------------- +Running compatibility checks with checkcompatibility.py + +Invoke `./dev-support/bin/checkcompatibility.py` to run Java API Compliance Checker +to compare the public Java APIs of two git objects. This can be used by release +managers to compare the compatibility of a previous and current release. + +As an example, this invocation will check the compatibility of interfaces annotated as Public or LimitedPrivate: + +./dev-support/bin/checkcompatibility.py --annotation org.apache.hadoop.classification.InterfaceAudience.Public --annotation org.apache.hadoop.classification.InterfaceAudience.LimitedPrivate --include "hadoop.*" branch-2.7.2 trunk diff --git a/dev-support/bin/checkcompatibility.py b/dev-support/bin/checkcompatibility.py new file mode 100755 index 00000000000..ad1e9cbe47f --- /dev/null +++ b/dev-support/bin/checkcompatibility.py @@ -0,0 +1,345 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Script which checks Java API compatibility between two revisions of the +# Java client. +# +# Originally sourced from Apache Kudu, which was based on the +# compatibility checker from the Apache HBase project, but ported to +# Python for better readability. + +import logging +import os +import re +import shutil +import subprocess +import sys +import urllib2 +try: + import argparse +except ImportError: + sys.stderr.write("Please install argparse, e.g. via `pip install argparse`.") + sys.exit(2) + +# Various relative paths +REPO_DIR = os.getcwd() + +def check_output(*popenargs, **kwargs): + r"""Run command with arguments and return its output as a byte string. + Backported from Python 2.7 as it's implemented as pure python on stdlib. + >>> check_output(['/usr/bin/python', '--version']) + Python 2.6.2 + """ + process = subprocess.Popen(stdout=subprocess.PIPE, *popenargs, **kwargs) + output, _ = process.communicate() + retcode = process.poll() + if retcode: + cmd = kwargs.get("args") + if cmd is None: + cmd = popenargs[0] + error = subprocess.CalledProcessError(retcode, cmd) + error.output = output + raise error + return output + +def get_repo_dir(): + """ Return the path to the top of the repo. """ + dirname, _ = os.path.split(os.path.abspath(__file__)) + return os.path.join(dirname, "../..") + +def get_scratch_dir(): + """ Return the path to the scratch dir that we build within. """ + scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check") + if not os.path.exists(scratch_dir): + os.makedirs(scratch_dir) + return scratch_dir + +def get_java_acc_dir(): + """ Return the path where we check out the Java API Compliance Checker. """ + return os.path.join(get_repo_dir(), "target", "java-acc") + + +def clean_scratch_dir(scratch_dir): + """ Clean up and re-create the scratch directory. """ + if os.path.exists(scratch_dir): + logging.info("Removing scratch dir %s...", scratch_dir) + shutil.rmtree(scratch_dir) + logging.info("Creating empty scratch dir %s...", scratch_dir) + os.makedirs(scratch_dir) + + +def checkout_java_tree(rev, path): + """ Check out the Java source tree for the given revision into + the given path. """ + logging.info("Checking out %s in %s", rev, path) + os.makedirs(path) + # Extract java source + subprocess.check_call(["bash", '-o', 'pipefail', "-c", + ("git archive --format=tar %s | " + + "tar -C \"%s\" -xf -") % (rev, path)], + cwd=get_repo_dir()) + +def get_git_hash(revname): + """ Convert 'revname' to its SHA-1 hash. """ + return check_output(["git", "rev-parse", revname], + cwd=get_repo_dir()).strip() + +def get_repo_name(): + """Get the name of the repo based on the git remote.""" + remotes = check_output(["git", "remote", "-v"], + cwd=get_repo_dir()).strip().split("\n") + # Example output: + # origin https://github.com/apache/hadoop.git (fetch) + # origin https://github.com/apache/hadoop.git (push) + remote_url = remotes[0].split("\t")[1].split(" ")[0] + remote = remote_url.split("/")[-1] + if remote.endswith(".git"): + remote = remote[:-4] + return remote + +def build_tree(java_path): + """ Run the Java build within 'path'. """ + logging.info("Building in %s...", java_path) + subprocess.check_call(["mvn", "-DskipTests", "-Dmaven.javadoc.skip=true", + "package"], + cwd=java_path) + + +def checkout_java_acc(force): + """ + Check out the Java API Compliance Checker. If 'force' is true, will + re-download even if the directory exists. + """ + acc_dir = get_java_acc_dir() + if os.path.exists(acc_dir): + logging.info("Java ACC is already downloaded.") + if not force: + return + logging.info("Forcing re-download.") + shutil.rmtree(acc_dir) + + logging.info("Downloading Java ACC...") + + url = "https://github.com/lvc/japi-compliance-checker/archive/1.8.tar.gz" + scratch_dir = get_scratch_dir() + path = os.path.join(scratch_dir, os.path.basename(url)) + jacc = urllib2.urlopen(url) + with open(path, 'wb') as w: + w.write(jacc.read()) + + subprocess.check_call(["tar", "xzf", path], + cwd=scratch_dir) + + shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-1.8"), + os.path.join(acc_dir)) + + +def find_jars(path): + """ Return a list of jars within 'path' to be checked for compatibility. """ + all_jars = set(check_output(["find", path, "-name", "*.jar"]).splitlines()) + + return [j for j in all_jars if ( + "-tests" not in j and + "-sources" not in j and + "-with-dependencies" not in j)] + +def write_xml_file(path, version, jars): + """Write the XML manifest file for JACC.""" + with open(path, "wt") as f: + f.write("" + version + "\n") + f.write("") + for j in jars: + f.write(j + "\n") + f.write("") + +def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations): + """ Run the compliance checker to compare 'src' and 'dst'. """ + logging.info("Will check compatibility between original jars:\n\t%s\n" + + "and new jars:\n\t%s", + "\n\t".join(src_jars), + "\n\t".join(dst_jars)) + + java_acc_path = os.path.join(get_java_acc_dir(), "japi-compliance-checker.pl") + + src_xml_path = os.path.join(get_scratch_dir(), "src.xml") + dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml") + write_xml_file(src_xml_path, src_name, src_jars) + write_xml_file(dst_xml_path, dst_name, dst_jars) + + out_path = os.path.join(get_scratch_dir(), "report.html") + + args = ["perl", java_acc_path, + "-l", get_repo_name(), + "-d1", src_xml_path, + "-d2", dst_xml_path, + "-report-path", out_path] + + if annotations is not None: + annotations_path = os.path.join(get_scratch_dir(), "annotations.txt") + with file(annotations_path, "w") as f: + for ann in annotations: + print >>f, ann + args += ["-annotations-list", annotations_path] + + subprocess.check_call(args) + +def filter_jars(jars, include_filters, exclude_filters): + """Filter the list of JARs based on include and exclude filters.""" + filtered = [] + # Apply include filters + for j in jars: + found = False + basename = os.path.basename(j) + for f in include_filters: + if f.match(basename): + found = True + break + if found: + filtered += [j] + else: + logging.debug("Ignoring JAR %s", j) + # Apply exclude filters + exclude_filtered = [] + for j in filtered: + basename = os.path.basename(j) + found = False + for f in exclude_filters: + if f.match(basename): + found = True + break + if found: + logging.debug("Ignoring JAR %s", j) + else: + exclude_filtered += [j] + + return exclude_filtered + + +def main(): + """Main function.""" + logging.basicConfig(level=logging.INFO) + parser = argparse.ArgumentParser( + description="Run Java API Compliance Checker.") + parser.add_argument("-f", "--force-download", + action="store_true", + help="Download dependencies (i.e. Java JAVA_ACC) " + + "even if they are already present") + parser.add_argument("-i", "--include-file", + action="append", + dest="include_files", + help="Regex filter for JAR files to be included. " + + "Applied before the exclude filters. " + + "Can be specified multiple times.") + parser.add_argument("-e", "--exclude-file", + action="append", + dest="exclude_files", + help="Regex filter for JAR files to be excluded. " + + "Applied after the include filters. " + + "Can be specified multiple times.") + parser.add_argument("-a", "--annotation", + action="append", + dest="annotations", + help="Fully-qualified Java annotation. " + + "Java ACC will only check compatibility of " + + "annotated classes. Can be specified multiple times.") + parser.add_argument("--skip-clean", + action="store_true", + help="Skip cleaning the scratch directory.") + parser.add_argument("--skip-build", + action="store_true", + help="Skip building the projects.") + parser.add_argument("src_rev", nargs=1, help="Source revision.") + parser.add_argument("dst_rev", nargs="?", default="HEAD", + help="Destination revision. " + + "If not specified, will use HEAD.") + + if len(sys.argv) == 1: + parser.print_help() + sys.exit(1) + + args = parser.parse_args() + + src_rev, dst_rev = args.src_rev[0], args.dst_rev + + logging.info("Source revision: %s", src_rev) + logging.info("Destination revision: %s", dst_rev) + + # Construct the JAR regex patterns for filtering. + include_filters = [] + if args.include_files is not None: + for f in args.include_files: + logging.info("Applying JAR filename include filter: %s", f) + include_filters += [re.compile(f)] + else: + include_filters = [re.compile(".*")] + + exclude_filters = [] + if args.exclude_files is not None: + for f in args.exclude_files: + logging.info("Applying JAR filename exclude filter: %s", f) + exclude_filters += [re.compile(f)] + + # Construct the annotation list + annotations = args.annotations + if annotations is not None: + logging.info("Filtering classes using %d annotation(s):", len(annotations)) + for a in annotations: + logging.info("\t%s", a) + + # Download deps. + checkout_java_acc(args.force_download) + + # Set up the build. + scratch_dir = get_scratch_dir() + src_dir = os.path.join(scratch_dir, "src") + dst_dir = os.path.join(scratch_dir, "dst") + + if args.skip_clean: + logging.info("Skipping cleaning the scratch directory") + else: + clean_scratch_dir(scratch_dir) + # Check out the src and dst source trees. + checkout_java_tree(get_git_hash(src_rev), src_dir) + checkout_java_tree(get_git_hash(dst_rev), dst_dir) + + # Run the build in each. + if args.skip_build: + logging.info("Skipping the build") + else: + build_tree(src_dir) + build_tree(dst_dir) + + # Find the JARs. + src_jars = find_jars(src_dir) + dst_jars = find_jars(dst_dir) + + # Filter the JARs. + src_jars = filter_jars(src_jars, include_filters, exclude_filters) + dst_jars = filter_jars(dst_jars, include_filters, exclude_filters) + + if len(src_jars) == 0 or len(dst_jars) == 0: + logging.error("No JARs found! Are your filters too strong?") + sys.exit(1) + + run_java_acc(src_rev, src_jars, + dst_rev, dst_jars, annotations) + + +if __name__ == "__main__": + main()