#!/usr/bin/env python3 # # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # Script which checks Java API compatibility between two revisions of the # Java client. # # Originally sourced from Apache Kudu, which was based on the # compatibility checker from the Apache HBase project, but ported to # Python for better readability. # The script can be invoked as follows: # $ ./checkcompatibility.py ${SOURCE_GIT_REVISION} ${GIT_BRANCH_OR_TAG} # or with some options: # $ ./dev-support/checkcompatibility.py \ # --annotation org.apache.yetus.audience.InterfaceAudience.Public \ # --annotation org.apache.yetus.audience.InterfaceAudience.LimitedPrivate \ # --include-file "hbase-*" \ # --known_problems_path ~/known_problems.json \ # rel/1.3.0 branch-1.4 import json import logging import os import re import shutil import subprocess import sys import urllib.request import urllib.error import urllib.parse from collections import namedtuple try: import argparse except ImportError: logging.error( "Please install argparse, e.g. via `pip install argparse`.") sys.exit(2) # Various relative paths REPO_DIR = os.getcwd() def check_output(*popenargs, **kwargs): """ Run command with arguments and return its output as a byte string. """ process = subprocess.Popen(stdout=subprocess.PIPE, universal_newlines=True, *popenargs, **kwargs) output, _ = process.communicate() retcode = process.poll() if retcode: cmd = kwargs.get("args") if cmd is None: cmd = popenargs[0] error = subprocess.CalledProcessError(retcode, cmd) error.output = output raise error return output.strip() def get_repo_dir(): """ Return the path to the top of the repo. """ dirname, _ = os.path.split(os.path.abspath(__file__)) dirname = os.path.dirname(dirname) logging.debug("Repo dir is %s", dirname) return dirname def get_scratch_dir(): """ Return the path to the scratch dir that we build within. """ scratch_dir = os.path.join(get_repo_dir(), "target", "compat-check") if not os.path.exists(scratch_dir): os.makedirs(scratch_dir) return scratch_dir def get_java_acc_dir(): """ Return the path where we check out the Java API Compliance Checker. """ return os.path.join(get_repo_dir(), "target", "java-acc") def clean_scratch_dir(scratch_dir): """ Clean up and re-create the scratch directory. """ if os.path.exists(scratch_dir): logging.info("Removing scratch dir %s ", scratch_dir) shutil.rmtree(scratch_dir) logging.info("Creating empty scratch dir %s ", scratch_dir) os.makedirs(scratch_dir) def checkout_java_tree(rev, path): """ Check out the Java source tree for the given revision into the given path. """ logging.info("Checking out %s in %s", rev, path) os.makedirs(path) # Extract java source subprocess.check_call(["bash", '-o', 'pipefail', "-c", ("git archive --format=tar %s | " "tar -C '%s' -xf -") % (rev, path)], cwd=get_repo_dir()) def get_git_hash(revname): """ Convert 'revname' to its SHA-1 hash. """ try: return check_output(["git", "rev-parse", revname], cwd=get_repo_dir()).strip() except: revname = "origin/" + revname return check_output(["git", "rev-parse", revname], cwd=get_repo_dir()).strip() def get_repo_name(remote_name="origin"): """ Get the name of the repo based on the git remote.""" remote = check_output(["git", "config", "--get", "remote.{0}.url".format(remote_name)], cwd=get_repo_dir()).strip() remote = remote.split("/")[-1] return remote[:-4] if remote.endswith(".git") else remote def build_tree(java_path, verbose): """ Run the Java build within 'path'. """ logging.info("Building in %s ", java_path) # special hack for comparing with rel/2.0.0, see HBASE-26063 for more details subprocess.check_call(["sed", "-i", "2148s/3.0.0/3.0.4/g", "pom.xml"], cwd=java_path) mvn_cmd = ["mvn", "--batch-mode", "-DskipTests", "-Dmaven.javadoc.skip=true", "package"] if not verbose: mvn_cmd.insert(-1, "--quiet") subprocess.check_call(mvn_cmd, cwd=java_path) def checkout_java_acc(force): """ Check out the Java API Compliance Checker. If 'force' is true, will re-download even if the directory exists. """ acc_dir = get_java_acc_dir() if os.path.exists(acc_dir): logging.info("Java ACC is already downloaded.") if not force: return logging.info("Forcing re-download.") shutil.rmtree(acc_dir) logging.info("Downloading Java ACC...") url = "https://github.com/lvc/japi-compliance-checker/archive/2.4.tar.gz" scratch_dir = get_scratch_dir() path = os.path.join(scratch_dir, os.path.basename(url)) jacc = urllib.request.urlopen(url) with open(path, 'wb') as w: w.write(jacc.read()) subprocess.check_call(["tar", "xzf", path], cwd=scratch_dir) shutil.move(os.path.join(scratch_dir, "japi-compliance-checker-2.4"), os.path.join(acc_dir)) def find_jars(path): """ Return a list of jars within 'path' to be checked for compatibility. """ all_jars = set(check_output(["find", path, "-type", "f", "-name", "*.jar"]).splitlines()) return [j for j in all_jars if ( "-tests" not in j and "-sources" not in j and "-with-dependencies" not in j)] def write_xml_file(path, version, jars): """ Write the XML manifest file for JACC. """ with open(path, "wt") as f: f.write("%s\n" % version) f.write("") for j in jars: f.write("%s\n" % j) f.write("") def ascii_encode_dict(data): """ Iterate through a dictionary of data and convert all unicode to ascii. This method was taken from stackoverflow.com/questions/9590382/forcing-python-json-module-to-work-with-ascii """ ascii_encode = lambda x: x.encode('ascii') if isinstance(x, str) else x return dict(list(map(ascii_encode, pair)) for pair in list(data.items())) def process_json(path): """ Process the known problems json file. The program raises an uncaught exception if it can't find the file or if the json is invalid """ path = os.path.abspath(os.path.expanduser(path)) try: with open(path) as f: return json.load(f, object_hook=ascii_encode_dict) except ValueError as e: logging.error("File: %s\nInvalid JSON:\n%s", str(path), str(e)) raise except IOError as io: logging.error("Provided json file path does not exist %s", str(path)) raise def compare_results(tool_results, known_issues, compare_warnings): """ Compare the number of problems found with the allowed number. If compare_warnings is true then also compare the number of warnings found. tool_results = results from the JACC tool - a dictionary known_issues = dictionary of expected issue count compare_warnings = boolean - if true also compare warnings as well as problems """ logging.info("Results: %s", str(tool_results)) unexpected_issue = namedtuple('unexpected_issue', ['check', 'issue_type', 'known_count', 'observed_count']) unexpected_issues = [unexpected_issue(check=check, issue_type=issue_type, known_count=known_count, observed_count=tool_results[check][issue_type]) for check, known_issue_counts in list(known_issues.items()) for issue_type, known_count in list(known_issue_counts.items()) if compare_tool_results_count(tool_results, check, issue_type, known_count)] if not compare_warnings: unexpected_issues = [tup for tup in unexpected_issues if tup.issue_type != 'warnings'] for issue in unexpected_issues: logging.error('Found %s during %s check (known issues: %d, observed issues: %d)', issue.issue_type, issue.check, issue.known_count, issue.observed_count) return bool(unexpected_issues) def compare_tool_results_count(tool_results, check, issue_type, known_count): """ Check problem counts are no more than the known count. (This function exists just so can add in logging; previous was inlined one-liner but this made it hard debugging) """ # logging.info("known_count=%s, check key=%s, tool_results=%s, issue_type=%s", # str(known_count), str(check), str(tool_results), str(issue_type)) return tool_results[check][issue_type] > known_count def process_java_acc_output(output): """ Process the output string to find the problems and warnings in both the binary and source compatibility. This is done in a way that is admittedly brittle; we are open to better implementations. We expect a line containing the relevant information to look something like: "total binary compatibility problems: 123, warnings: 16" """ return_value = {} output = output.split("\n") for line in output: # Line has relevant info if line.lower().startswith("total"): values = {} # Remove "total" keyword line = line[6:] # Seperate the two valuable parts line_list = line.split(",") for segment in line_list: part = segment.split(":") # Extract key and value values[part[0][-8:]] = int(part[1]) return_value[line[:6]] = values return return_value def log_java_acc_version(): java_acc_path = os.path.join( get_java_acc_dir(), "japi-compliance-checker.pl") args = ["perl", java_acc_path, "-dumpversion"] logging.info("Java ACC version: " + check_output(args)) def run_java_acc(src_name, src_jars, dst_name, dst_jars, annotations, skip_annotations, name): """ Run the compliance checker to compare 'src' and 'dst'. """ logging.info("Will check compatibility between original jars:\n\t%s\n" "and new jars:\n\t%s", "\n\t".join(src_jars), "\n\t".join(dst_jars)) java_acc_path = os.path.join( get_java_acc_dir(), "japi-compliance-checker.pl") src_xml_path = os.path.join(get_scratch_dir(), "src.xml") dst_xml_path = os.path.join(get_scratch_dir(), "dst.xml") write_xml_file(src_xml_path, src_name, src_jars) write_xml_file(dst_xml_path, dst_name, dst_jars) out_path = os.path.join(get_scratch_dir(), "report.html") args = ["perl", java_acc_path, "-l", name, "-d1", src_xml_path, "-d2", dst_xml_path, "-report-path", out_path] if annotations is not None: logging.info("Annotations are: %s", annotations) annotations_path = os.path.join(get_scratch_dir(), "annotations.txt") logging.info("Annotations path: %s", annotations_path) with open(annotations_path, "w") as f: f.write('\n'.join(annotations)) args.extend(["-annotations-list", annotations_path]) if skip_annotations is not None: skip_annotations_path = os.path.join( get_scratch_dir(), "skip_annotations.txt") with open(skip_annotations_path, "w") as f: f.write('\n'.join(skip_annotations)) args.extend(["-skip-annotations-list", skip_annotations_path]) try: output = check_output(args) except subprocess.CalledProcessError as e: # The program returns a nonzero error code if issues are found. We # almost always expect some issues and want to process the results. output = e.output acc_processed = process_java_acc_output(output) return acc_processed def get_known_problems(json_path, src_rev, dst_rev): """ The json file should be in the following format: a dictionary with the keys in the format source_branch/destination_branch and the values dictionaries with binary and source problems and warnings Example: {'branch-1.3': { 'rel/1.3.0': {'binary': {'problems': 123, 'warnings': 16}, 'source': {'problems': 167, 'warnings': 1}}, 'branch-1.4': {'binary': {'problems': 0, 'warnings': 0}, 'source': {'problems': 0, 'warnings': 0}} }, 'branch-1.4': { 'rel/1.4.1': {'binary': {'problems': 13, 'warnings': 1}, 'source': {'problems': 23, 'warnings': 0}} } } """ # These are the default values for allowed problems and warnings known_problems = {"binary": {"problems": 0, "warnings": 0}, "source": {"problems": 0, "warnings": 0}} if src_rev.startswith("origin/"): src_rev = src_rev[7:] if dst_rev.startswith("origin/"): dst_rev = dst_rev[7:] if json_path is not None: known_problems = process_json(json_path) try: return known_problems[src_rev][dst_rev] except KeyError: logging.error(("Known Problems values for %s %s are not in " "provided json file. If you are trying to run " "the test with the default values, don't " "provide the --known_problems_path argument") % (src_rev, dst_rev)) raise return known_problems def filter_jars(jars, include_filters, exclude_filters): """ Filter the list of JARs based on include and exclude filters. """ filtered = [] # Apply include filters for j in jars: basename = os.path.basename(j) for f in include_filters: if f.match(basename): filtered += [j] break else: logging.debug("Ignoring JAR %s", j) # Apply exclude filters exclude_filtered = [] for j in filtered: basename = os.path.basename(j) for f in exclude_filters: if f.match(basename): logging.debug("Ignoring JAR %s", j) break else: exclude_filtered += [j] return exclude_filtered def main(): """ Main function. """ logging.basicConfig(level=logging.INFO) parser = argparse.ArgumentParser( description="Run Java API Compliance Checker.") parser.add_argument("-f", "--force-download", action="store_true", help="Download dependencies (i.e. Java JAVA_ACC) " "even if they are already present") parser.add_argument("-i", "--include-file", action="append", dest="include_files", help="Regex filter for JAR files to be included. " "Applied before the exclude filters. " "Can be specified multiple times.") parser.add_argument("-e", "--exclude-file", action="append", dest="exclude_files", help="Regex filter for JAR files to be excluded. " "Applied after the include filters. " "Can be specified multiple times.") parser.add_argument("-a", "--annotation", action="append", dest="annotations", help="Fully-qualified Java annotation. " "Java ACC will only check compatibility of " "annotated classes. Can be specified multiple times.") parser.add_argument("--skip-annotation", action="append", dest="skip_annotations", help="Fully-qualified Java annotation. " "Java ACC will not check compatibility of " "these annotated classes. Can be specified multiple " "times.") parser.add_argument("-p", "--known_problems_path", default=None, dest="known_problems_path", help="Path to file with json 'known_problems " "dictionary.' Path can be relative or absolute. An " "examples file can be seen in the pydocs for the " "get_known_problems method.") parser.add_argument("--skip-clean", action="store_true", help="Skip cleaning the scratch directory.") parser.add_argument("--compare-warnings", dest="compare_warnings", action="store_true", default=False, help="Compare warnings as well as problems.") parser.add_argument("--skip-build", action="store_true", help="Skip building the projects.") parser.add_argument("--verbose", action="store_true", help="more output") parser.add_argument("-r", "--remote", default="origin", dest="remote_name", help="Name of remote to use. e.g. its repo name will be used as the name " "we pass to Java ACC for the library.") parser.add_argument("src_rev", nargs=1, help="Source revision.") parser.add_argument("dst_rev", nargs="?", default="HEAD", help="Destination revision. " "If not specified, will use HEAD.") args = parser.parse_args() src_rev, dst_rev = args.src_rev[0], args.dst_rev logging.info("Source revision: %s", src_rev) logging.info("Destination revision: %s", dst_rev) # Configure the expected numbers known_problems = get_known_problems( args.known_problems_path, src_rev, dst_rev) # Construct the JAR regex patterns for filtering. include_filters = [] if args.include_files is not None: for f in args.include_files: logging.info("Applying JAR filename include filter: %s", f) include_filters += [re.compile(f)] else: include_filters = [re.compile(".*")] exclude_filters = [] if args.exclude_files is not None: for f in args.exclude_files: logging.info("Applying JAR filename exclude filter: %s", f) exclude_filters += [re.compile(f)] # Construct the annotation list if args.annotations is not None: logging.info("Filtering classes using %d annotation(s):", len(args.annotations)) for a in args.annotations: logging.info("\t%s", a) skip_annotations = args.skip_annotations if skip_annotations is not None: logging.info("Skipping classes with %d annotation(s):", len(skip_annotations)) for a in skip_annotations: logging.info("\t%s", a) # Download deps. checkout_java_acc(args.force_download) log_java_acc_version() # Set up the build. scratch_dir = get_scratch_dir() src_dir = os.path.join(scratch_dir, "src") dst_dir = os.path.join(scratch_dir, "dst") if args.skip_clean: logging.info("Skipping cleaning the scratch directory") else: clean_scratch_dir(scratch_dir) # Check out the src and dst source trees. checkout_java_tree(get_git_hash(src_rev), src_dir) checkout_java_tree(get_git_hash(dst_rev), dst_dir) # Run the build in each. if args.skip_build: logging.info("Skipping the build") else: build_tree(src_dir, args.verbose) build_tree(dst_dir, args.verbose) # Find the JARs. src_jars = find_jars(src_dir) dst_jars = find_jars(dst_dir) # Filter the JARs. src_jars = filter_jars(src_jars, include_filters, exclude_filters) dst_jars = filter_jars(dst_jars, include_filters, exclude_filters) if not src_jars or not dst_jars: logging.error("No JARs found! Are your filters too strong?") sys.exit(1) output = run_java_acc(src_rev, src_jars, dst_rev, dst_jars, args.annotations, skip_annotations, get_repo_name(args.remote_name)) sys.exit(compare_results(output, known_problems, args.compare_warnings)) if __name__ == "__main__": main()