HBASE-24845 Git/Jira Release Audit: limit branches when building audit db (#2238)

Populating the audit database with release tag information from git is
time consuming. Until that's sorted out, give the user a flag for
limiting which branches they want to be reviewed.

Signed-off-by:  Andrew Purtell <apurtell@apache.org >
This commit is contained in:
Nick Dimiduk 2020-10-29 09:21:18 -07:00 committed by GitHub
parent bb4a9d335f
commit 259fe1984a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 3 deletions

View File

@ -62,6 +62,7 @@ usage: git_jira_release_audit.py [-h] [--populate-from-git POPULATE_FROM_GIT]
[--release-line-regexp RELEASE_LINE_REGEXP] [--release-line-regexp RELEASE_LINE_REGEXP]
[--parse-release-tags PARSE_RELEASE_TAGS] [--parse-release-tags PARSE_RELEASE_TAGS]
[--fallback-actions-path FALLBACK_ACTIONS_PATH] [--fallback-actions-path FALLBACK_ACTIONS_PATH]
[--branch-filter-regexp BRANCH_FILTER_REGEXP]
[--jira-url JIRA_URL] --branch-1-fix-version [--jira-url JIRA_URL] --branch-1-fix-version
BRANCH_1_FIX_VERSION --branch-2-fix-version BRANCH_1_FIX_VERSION --branch-2-fix-version
BRANCH_2_FIX_VERSION BRANCH_2_FIX_VERSION
@ -119,6 +120,9 @@ Interactions with the Git repo:
--fallback-actions-path FALLBACK_ACTIONS_PATH --fallback-actions-path FALLBACK_ACTIONS_PATH
Path to a file containing _DB.Actions applicable to Path to a file containing _DB.Actions applicable to
specific git shas. (default: fallback_actions.csv) specific git shas. (default: fallback_actions.csv)
--branch-filter-regexp BRANCH_FILTER_REGEXP
Limit repo parsing to branch names that match this
filter expression. (default: .*)
--branch-1-fix-version BRANCH_1_FIX_VERSION --branch-1-fix-version BRANCH_1_FIX_VERSION
The Jira fixVersion used to indicate an issue is The Jira fixVersion used to indicate an issue is
committed to the specified release line branch committed to the specified release line branch
@ -175,8 +179,9 @@ fetch from Jira 100%|███████████████████
Optionally, the database can be build to include release tags, by specifying Optionally, the database can be build to include release tags, by specifying
`--parse-release-tags=true`. This is more time-consuming, but is necessary for `--parse-release-tags=true`. This is more time-consuming, but is necessary for
auditing discrepancies between git and Jira. Running the same command but auditing discrepancies between git and Jira. Optionally, limit the branches
including this flag looks like this: under consideration by specifying a regex filter with `--branch-filter-regexp`.
Running the same command but including this flag looks like this:
```shell script ```shell script
origin/branch-1 100%|███████████████████████████████████████| 4084/4084 [08:58<00:00, 7.59 commit/s] origin/branch-1 100%|███████████████████████████████████████| 4084/4084 [08:58<00:00, 7.59 commit/s]

View File

@ -199,13 +199,14 @@ class _RepoReader:
_identify_amend_jira_id_pattern = re.compile(r'^amend (.+)', re.IGNORECASE) _identify_amend_jira_id_pattern = re.compile(r'^amend (.+)', re.IGNORECASE)
def __init__(self, db, fallback_actions_path, remote_name, development_branch, def __init__(self, db, fallback_actions_path, remote_name, development_branch,
release_line_regexp, parse_release_tags, **_kwargs): release_line_regexp, branch_filter_regexp, parse_release_tags, **_kwargs):
self._db = db self._db = db
self._repo = _RepoReader._open_repo() self._repo = _RepoReader._open_repo()
self._fallback_actions = _RepoReader._load_fallback_actions(fallback_actions_path) self._fallback_actions = _RepoReader._load_fallback_actions(fallback_actions_path)
self._remote_name = remote_name self._remote_name = remote_name
self._development_branch = development_branch self._development_branch = development_branch
self._release_line_regexp = release_line_regexp self._release_line_regexp = release_line_regexp
self._branch_filter_regexp = branch_filter_regexp
self._parse_release_tags = parse_release_tags self._parse_release_tags = parse_release_tags
@property @property
@ -364,6 +365,10 @@ class _RepoReader:
release_branch (str): The name of the ref whose history is to be parsed. release_branch (str): The name of the ref whose history is to be parsed.
""" """
global MANAGER global MANAGER
branch_filter_pattern = re.compile('%s/%s' % (self._remote_name, self._branch_filter_regexp))
if not branch_filter_pattern.match(release_branch):
return
commits = list(self._repo.iter_commits( commits = list(self._repo.iter_commits(
"%s...%s" % (origin_commit.hexsha, release_branch), reverse=True)) "%s...%s" % (origin_commit.hexsha, release_branch), reverse=True))
LOG.info("%s has %d commits since its origin at %s.", release_branch, len(commits), LOG.info("%s has %d commits since its origin at %s.", release_branch, len(commits),
@ -638,6 +643,10 @@ class Auditor:
'--fallback-actions-path', '--fallback-actions-path',
help='Path to a file containing _DB.Actions applicable to specific git shas.', help='Path to a file containing _DB.Actions applicable to specific git shas.',
default='fallback_actions.csv') default='fallback_actions.csv')
git_repo_group.add_argument(
'--branch-filter-regexp',
help='Limit repo parsing to branch names that match this filter expression.',
default=r'.*')
jira_group = parser.add_argument_group('Interactions with Jira') jira_group = parser.add_argument_group('Interactions with Jira')
jira_group.add_argument( jira_group.add_argument(
'--jira-url', '--jira-url',