diff --git a/distribution/asf-release-process-guide.md b/distribution/asf-release-process-guide.md index 08ea5409c53..5604686af48 100644 --- a/distribution/asf-release-process-guide.md +++ b/distribution/asf-release-process-guide.md @@ -258,7 +258,7 @@ It is also the release managers responsibility for correctly assigning all PRs m | [get-milestone-contributors](bin/get-milestone-contributors.py) | lists github users who contributed to a milestone | | [get-milestone-prs](bin/get-milestone-prs.py) | lists PRs between tags or commits and the milestone associated with them. | | [tag-missing-milestones](bin/tag-missing-milestones.py) | Find pull requests which the milestone is missing and tag them properly. | -| [find-missing-backports](bin/find-missing-backports.py) | Find PRs which have been back-ported to one release branch but not another. Useful if a bug fix release based on the previous release is required during a release cycle. | +| [find-missing-backports](bin/find-missing-backports.py) | Find PRs which have been back-ported to one release branch but not another. Useful if a bug fix release based on the previous release is required during a release cycle. Make sure to fetch remote commits before running this command. | | [make-linkable-release-notes](bin/make-linkable-release-notes.py) | given input of a version, input markdown file path, and output markdown file path, will rewrite markdown headers of the input file to have embedded links in the release notes style. | diff --git a/distribution/bin/find-missing-backports.py b/distribution/bin/find-missing-backports.py index c34677ed79e..d7dcc143a9a 100755 --- a/distribution/bin/find-missing-backports.py +++ b/distribution/bin/find-missing-backports.py @@ -22,37 +22,34 @@ import subprocess import sys +pr_number_pattern = r'\(#(\d+)\)' +backport_pattern = r'\[Backport[^\]]*\]' + def extract_pr_title_from_commit_message(commit_msg): # Extract commit message except the pr number + commit_msg = re.sub(backport_pattern, '', commit_msg) pr_num_pos = commit_msg.find("(#") if pr_num_pos < 0: pr_num_pos = len(commit_msg) - backport_pos = commit_msg.find("[Backport]") - if backport_pos < 0: - backport_pos = 0 - else: - backport_pos = backport_pos + len("[Backport]") - return commit_msg[backport_pos:pr_num_pos].strip() + return commit_msg[:pr_num_pos].strip() +def extract_pr_numbers_from_commit_message(commit_msg): + extracted_numbers = re.findall(pr_number_pattern, commit_msg) + return extracted_numbers -def extract_pr_title(pr_json): - commit_url = pr_json['commits_url'] - resp = requests.get(commit_url, auth=(github_username, os.environ["GIT_TOKEN"])) - title_candidates = [extract_pr_title_from_commit_message(pr_json['title'])] - if len(resp.json()) == 1: - title_candidates.append(extract_pr_title_from_commit_message(resp.json()[0]['commit']['message'])) - return title_candidates - - -def find_missing_backports(pr_jsons, release_pr_subjects): +def find_missing_backports(pr_jsons, release_pr_subjects, release_pr_numbers): for pr in pr_jsons: - if pr['milestone'] is not None: - if pr['milestone']['number'] == milestone_number: - for pr_title_candidate in extract_pr_title(pr): - if pr_title_candidate in release_pr_subjects: - return - print("Missing backport found for PR {}, url: {}".format(pr['number'], pr['html_url'])) - + backport_found = False + for label in pr['labels']: + if label['name'] == 'Backport': + backport_found = True + pr_title_candidate = extract_pr_title_from_commit_message(pr['title']) + if pr_title_candidate in release_pr_subjects: + backport_found = True + if str(pr['number']) in release_pr_numbers: + backport_found = True + if backport_found == False: + print("Missing backport found for PR {}, url: {}".format(pr['number'], pr['html_url'])) def find_next_url(links): for link in links: @@ -95,15 +92,33 @@ command = "git log --pretty=tformat:%s {}..{}".format(previous_branch_first_comm all_release_commits = subprocess.check_output(command, shell=True).decode('UTF-8') release_pr_subjects = set() +release_pr_numbers = set() for commit_msg in all_release_commits.splitlines(): title = extract_pr_title_from_commit_message(commit_msg) + pr_numbers = extract_pr_numbers_from_commit_message(commit_msg) release_pr_subjects.add(title) + release_pr_numbers.update(pr_numbers) +print("Number of release PR subjects: {}".format(len(release_pr_subjects))) # Get all closed PRs and filter out with milestone -next_url = "https://api.github.com/repos/apache/druid/pulls?state=closed" - -while next_url is not None: - resp = requests.get(next_url, auth=(github_username, os.environ["GIT_TOKEN"])) - find_missing_backports(resp.json(), release_pr_subjects) - links = resp.headers['Link'].split(',') - next_url = find_next_url(links) \ No newline at end of file +milestone_url = "https://api.github.com/repos/apache/druid/milestones/{}".format(milestone_number) +resp = requests.get(milestone_url, auth=(github_username, os.environ["GIT_TOKEN"])).json() +milestone_title = resp['title'] +pr_items = [] +page = 0 +while True: + page = page + 1 + pr_url = "https://api.github.com/search/issues?per_page=50&page={}&q=milestone:{}+type:pr+is:merged+is:closed+repo:apache/druid".format(page,milestone_title) + pr_resp = requests.get(pr_url, auth=(github_username, os.environ["GIT_TOKEN"])).json() + if pr_resp['incomplete_results']: + sys.stderr.write('This script cannot handle incomplete results') + sys.exit(1) + pr_items.extend(pr_resp['items']) + if len(pr_resp['items']) < 50: + print("Total PRs for current milestone: {}".format(len(pr_items))) + print("Total expected count: {}".format(pr_resp['total_count'])) + if pr_resp['total_count'] != len(pr_items): + sys.stderr.write('Expected PR count does not match with number of PRs fetched') + sys.exit(1) + break +find_missing_backports(pr_items, release_pr_subjects, release_pr_numbers)