Improve the backport missing script (#14723)

This commit is contained in:
Abhishek Agarwal 2023-08-04 15:21:55 +05:30 committed by GitHub
parent 0d73480c8f
commit 6ced208391
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 46 additions and 31 deletions

View File

@ -258,7 +258,7 @@ It is also the release managers responsibility for correctly assigning all PRs m
| [get-milestone-contributors](bin/get-milestone-contributors.py) | lists github users who contributed to a milestone |
| [get-milestone-prs](bin/get-milestone-prs.py) | lists PRs between tags or commits and the milestone associated with them. |
| [tag-missing-milestones](bin/tag-missing-milestones.py) | Find pull requests which the milestone is missing and tag them properly. |
| [find-missing-backports](bin/find-missing-backports.py) | Find PRs which have been back-ported to one release branch but not another. Useful if a bug fix release based on the previous release is required during a release cycle. |
| [find-missing-backports](bin/find-missing-backports.py) | Find PRs which have been back-ported to one release branch but not another. Useful if a bug fix release based on the previous release is required during a release cycle. Make sure to fetch remote commits before running this command. |
| [make-linkable-release-notes](bin/make-linkable-release-notes.py) | given input of a version, input markdown file path, and output markdown file path, will rewrite markdown headers of the input file to have embedded links in the release notes style. |

View File

@ -22,37 +22,34 @@ import subprocess
import sys
pr_number_pattern = r'\(#(\d+)\)'
backport_pattern = r'\[Backport[^\]]*\]'
def extract_pr_title_from_commit_message(commit_msg):
# Extract commit message except the pr number
commit_msg = re.sub(backport_pattern, '', commit_msg)
pr_num_pos = commit_msg.find("(#")
if pr_num_pos < 0:
pr_num_pos = len(commit_msg)
backport_pos = commit_msg.find("[Backport]")
if backport_pos < 0:
backport_pos = 0
else:
backport_pos = backport_pos + len("[Backport]")
return commit_msg[backport_pos:pr_num_pos].strip()
return commit_msg[:pr_num_pos].strip()
def extract_pr_numbers_from_commit_message(commit_msg):
extracted_numbers = re.findall(pr_number_pattern, commit_msg)
return extracted_numbers
def extract_pr_title(pr_json):
commit_url = pr_json['commits_url']
resp = requests.get(commit_url, auth=(github_username, os.environ["GIT_TOKEN"]))
title_candidates = [extract_pr_title_from_commit_message(pr_json['title'])]
if len(resp.json()) == 1:
title_candidates.append(extract_pr_title_from_commit_message(resp.json()[0]['commit']['message']))
return title_candidates
def find_missing_backports(pr_jsons, release_pr_subjects):
def find_missing_backports(pr_jsons, release_pr_subjects, release_pr_numbers):
for pr in pr_jsons:
if pr['milestone'] is not None:
if pr['milestone']['number'] == milestone_number:
for pr_title_candidate in extract_pr_title(pr):
if pr_title_candidate in release_pr_subjects:
return
print("Missing backport found for PR {}, url: {}".format(pr['number'], pr['html_url']))
backport_found = False
for label in pr['labels']:
if label['name'] == 'Backport':
backport_found = True
pr_title_candidate = extract_pr_title_from_commit_message(pr['title'])
if pr_title_candidate in release_pr_subjects:
backport_found = True
if str(pr['number']) in release_pr_numbers:
backport_found = True
if backport_found == False:
print("Missing backport found for PR {}, url: {}".format(pr['number'], pr['html_url']))
def find_next_url(links):
for link in links:
@ -95,15 +92,33 @@ command = "git log --pretty=tformat:%s {}..{}".format(previous_branch_first_comm
all_release_commits = subprocess.check_output(command, shell=True).decode('UTF-8')
release_pr_subjects = set()
release_pr_numbers = set()
for commit_msg in all_release_commits.splitlines():
title = extract_pr_title_from_commit_message(commit_msg)
pr_numbers = extract_pr_numbers_from_commit_message(commit_msg)
release_pr_subjects.add(title)
release_pr_numbers.update(pr_numbers)
print("Number of release PR subjects: {}".format(len(release_pr_subjects)))
# Get all closed PRs and filter out with milestone
next_url = "https://api.github.com/repos/apache/druid/pulls?state=closed"
while next_url is not None:
resp = requests.get(next_url, auth=(github_username, os.environ["GIT_TOKEN"]))
find_missing_backports(resp.json(), release_pr_subjects)
links = resp.headers['Link'].split(',')
next_url = find_next_url(links)
milestone_url = "https://api.github.com/repos/apache/druid/milestones/{}".format(milestone_number)
resp = requests.get(milestone_url, auth=(github_username, os.environ["GIT_TOKEN"])).json()
milestone_title = resp['title']
pr_items = []
page = 0
while True:
page = page + 1
pr_url = "https://api.github.com/search/issues?per_page=50&page={}&q=milestone:{}+type:pr+is:merged+is:closed+repo:apache/druid".format(page,milestone_title)
pr_resp = requests.get(pr_url, auth=(github_username, os.environ["GIT_TOKEN"])).json()
if pr_resp['incomplete_results']:
sys.stderr.write('This script cannot handle incomplete results')
sys.exit(1)
pr_items.extend(pr_resp['items'])
if len(pr_resp['items']) < 50:
print("Total PRs for current milestone: {}".format(len(pr_items)))
print("Total expected count: {}".format(pr_resp['total_count']))
if pr_resp['total_count'] != len(pr_items):
sys.stderr.write('Expected PR count does not match with number of PRs fetched')
sys.exit(1)
break
find_missing_backports(pr_items, release_pr_subjects, release_pr_numbers)