HBASE-23763 Add 'new on release line report' for git/jira audit tool (#1104)

Codify building the summary of what's new on a release line
branch (i.e., `branch-2`), but not yet released on earlier release
branches of that line.

Builds a cvs report that looks like https://home.apache.org/~ndimiduk/new_for_branch-2.csv
This commit is contained in:
Nick Dimiduk 2020-01-30 09:31:01 -08:00 committed by GitHub
parent 8b00f9f0b1
commit 85b0c8e6bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 200 additions and 62 deletions

View File

@ -49,44 +49,73 @@ The tool provides basic help docs.
```shell script
$ ./venv/bin/python ./git_jira_release_audit.py --help
usage: git_jira_release_audit.py [-h] [--db-path DB_PATH]
usage: git_jira_release_audit.py [-h] [--populate-from-git POPULATE_FROM_GIT]
[--populate-from-jira POPULATE_FROM_JIRA]
[--db-path DB_PATH]
[--initialize-db INITIALIZE_DB]
[--report-new-for-release-line REPORT_NEW_FOR_RELEASE_LINE]
[--git-repo-path GIT_REPO_PATH]
[--remote-name REMOTE_NAME]
[--development-branch DEVELOPMENT_BRANCH]
[--development-branch-fix-version DEVELOPMENT_BRANCH_FIX_VERSION]
[--release-line-regexp RELEASE_LINE_REGEXP]
[--parse-release-tags PARSE_RELEASE_TAGS]
[--fallback-actions-path FALLBACK_ACTIONS_PATH]
[--jira-url JIRA_URL] --branch-1-fix-version
BRANCH_1_FIX_VERSION --branch-2-fix-version
BRANCH_2_FIX_VERSION
[--jira-url JIRA_URL]
optional arguments:
-h, --help show this help message and exit
Building the audit database:
--populate-from-git POPULATE_FROM_GIT
When true, populate the audit database from the Git
repository.
--populate-from-jira POPULATE_FROM_JIRA
When true, populate the audit database from Jira.
--db-path DB_PATH Path to the database file, or leave unspecified for a
transient db.
--initialize-db INITIALIZE_DB
When true, initialize the database tables. This is
destructive to the contents of an existing database.
Generating reports:
--report-new-for-release-line REPORT_NEW_FOR_RELEASE_LINE
Builds a report of the Jira issues that are new on the
target release line, not present on any of the
associated release branches. (i.e., on branch-2 but
not branch-{2.0,2.1,...})
Interactions with the Git repo:
--git-repo-path GIT_REPO_PATH
Path to the git repo, or leave unspecified to infer
from the current file's path.
--remote-name REMOTE_NAME
The name of the git remote to use when identifying
branches.
branches. Default: 'origin'
--development-branch DEVELOPMENT_BRANCH
The name of the branch from which all release lines
originate.
originate. Default: 'master'
--development-branch-fix-version DEVELOPMENT_BRANCH_FIX_VERSION
The Jira fixVersion used to indicate an issue is
committed to the development branch.
committed to the development branch. Default: '3.0.0'
--release-line-regexp RELEASE_LINE_REGEXP
A regexp used to identify release lines.
--parse-release-tags PARSE_RELEASE_TAGS
When true, look for release tags and annotate commits
according to their release version. An Expensive
calculation, disabled by default.
--fallback-actions-path FALLBACK_ACTIONS_PATH
Path to a file containing _DB.Actions applicable to specific git shas.
--jira-url JIRA_URL A URL locating the target JIRA instance.
Path to a file containing _DB.Actions applicable to
specific git shas.
--branch-1-fix-version BRANCH_1_FIX_VERSION
The Jira fixVersion used to indicate an issue is
committed to the specified release line branch
--branch-2-fix-version BRANCH_2_FIX_VERSION
The Jira fixVersion used to indicate an issue is
committed to the specified release line branch
Interactions with Jira:
--jira-url JIRA_URL A URL locating the target JIRA instance.
```
Example Run:

View File

@ -183,6 +183,7 @@ c89cfd3406823cf05fa83464c5ddee16bf0d473f,ADD,HBASE-17248
c89cfd3406823cf05fa83464c5ddee16bf0d473f,ADD,HBASE-17248
c97905a962b88a0c68ca8a51c2e507daec81ca6d,SKIP,
c9f506a2973e0acbd0d2df7b9353c9291f6c94a8,SKIP,
cbb2c7e00d0c0b3f641250d981b9c87286d31058,ADD,HBASE-23069
cbb86942eda4b65ddfc5ec436c78a04e5dd21631,SKIP,
cbdc9fcb8a705f4e5ee28a917a335c6f1ef5df42,SKIP,
ccee3d8dd59dfb181d577b5df483632722db01b1,SKIP,

Can't render this file because it contains an unexpected character in line 7 and column 3.

View File

@ -52,10 +52,13 @@ class _DB:
REVERT = 'REVERT'
SKIP = 'SKIP'
def __init__(self, db_path, **_kwargs):
def __init__(self, db_path, initialize_db, **_kwargs):
self._conn = sqlite3.connect(db_path)
if initialize_db:
for table in 'git_commits', 'jira_versions':
self._conn.execute("DROP TABLE IF EXISTS %s" % table)
self._conn.execute("""
CREATE TABLE IF NOT EXISTS "git_commits"(
jira_id TEXT NOT NULL,
@ -93,11 +96,11 @@ class _DB:
git_sha (str): The commit's SHA.
"""
if action == _DB.Action.ADD:
self._conn.execute(
self.conn.execute(
"INSERT INTO git_commits(jira_id, branch, git_sha) VALUES (upper(?),?,?)",
(jira_id, branch, git_sha))
elif action == _DB.Action.REVERT:
self._conn.execute("""
self.conn.execute("""
DELETE FROM git_commits WHERE
jira_id=upper(?)
AND branch=?
@ -105,7 +108,7 @@ class _DB:
def flush_commits(self):
"""Commit any pending changes to the database."""
self._conn.commit()
self.conn.commit()
def apply_git_tag(self, branch, git_sha, git_tag):
"""Annotate a commit in the commits database as being a part of the specified release.
@ -115,7 +118,7 @@ class _DB:
git_sha (str): The commit's SHA.
git_tag (str): The first release tag following the commit.
"""
self._conn.execute("UPDATE git_commits SET git_tag = ? WHERE branch = ? AND git_sha = ?",
self.conn.execute("UPDATE git_commits SET git_tag = ? WHERE branch = ? AND git_sha = ?",
(git_tag, branch, git_sha))
def apply_fix_version(self, jira_id, fix_version):
@ -126,12 +129,12 @@ class _DB:
jira_id (str): The applicable Issue ID from JIRA.
fix_version (str): The annotated `fixVersion` as seen in JIRA.
"""
self._conn.execute("INSERT INTO jira_versions(jira_id, fix_version) VALUES (upper(?),?)",
self.conn.execute("INSERT INTO jira_versions(jira_id, fix_version) VALUES (upper(?),?)",
(jira_id, fix_version))
def unique_jira_ids_from_git(self):
"""Query the commits database for the population of Jira Issue IDs."""
results = self._conn.execute("SELECT distinct jira_id FROM git_commits").fetchall()
results = self.conn.execute("SELECT distinct jira_id FROM git_commits").fetchall()
return [x[0] for x in results]
def backup(self, target):
@ -184,13 +187,14 @@ class _RepoReader:
_identify_amend_jira_id_pattern = re.compile(r'^amend (.+)', re.IGNORECASE)
def __init__(self, db, fallback_actions_path, remote_name, development_branch,
release_line_regexp, **_kwargs):
release_line_regexp, parse_release_tags, **_kwargs):
self._db = db
self._repo = _RepoReader._open_repo()
self._fallback_actions = _RepoReader._load_fallback_actions(fallback_actions_path)
self._remote_name = remote_name
self._development_branch = development_branch
self._release_line_regexp = release_line_regexp
self._parse_release_tags = parse_release_tags
@property
def repo(self):
@ -363,6 +367,7 @@ class _RepoReader:
if cnt % 50 == 0:
self._db.flush_commits()
commits_since_release.append(commit.hexsha)
if self._parse_release_tags:
tag = self._extract_release_tag(commit)
if tag:
self._set_release_tag(release_branch, tag, commits_since_release)
@ -394,15 +399,6 @@ class _JiraReader:
self.client = jira.JIRA(jira_url)
self.throttle_time_in_sec = 1
def _fetch_fix_versions(self, jira_id):
val = self.client.issue(jira_id, fields='fixVersions')
return [version.name for version in val.fields.fixVersions]
def _fetch_fix_versions_throttled(self, jira_id):
val = self._fetch_fix_versions(jira_id)
time.sleep(self.throttle_time_in_sec)
return val
def populate_db(self):
"""Query Jira for issue IDs found in the commits database, writing them to the jira
database."""
@ -427,8 +423,38 @@ class _JiraReader:
if cnt % 50:
self._db.flush_commits()
counter.update(incr=len(chunk))
time.sleep(5)
self._db.flush_commits()
def fetch_issues(self, jira_ids):
"""Retrieve the specified jira Ids."""
global MANAGER
logging.info("retrieving %s jira_ids from the issue tracker", len(jira_ids))
counter = MANAGER.counter(total=len(jira_ids), desc='fetch from Jira', unit='issue')
chunk_size = 50
chunks = [jira_ids[i:i + chunk_size] for i in range(0, len(jira_ids), chunk_size)]
ret = list()
for chunk in chunks:
query = "key IN (" + ",".join([("'" + jira_id + "'") for jira_id in chunk]) + ")"\
+ " ORDER BY issuetype ASC, priority DESC, key ASC"
results = self.client.search_issues(
jql_str=query, maxResults=chunk_size,
fields='summary,issuetype,priority,resolution,components')
for result in results:
val = dict()
val['key'] = result.key
val['summary'] = result.fields.summary.strip()
val['priority'] = result.fields.priority.name.strip()
val['issue_type'] = result.fields.issuetype.name.strip() \
if result.fields.issuetype else None
val['resolution'] = result.fields.resolution.name.strip() \
if result.fields.resolution else None
val['components'] = [x.name.strip() for x in result.fields.components if x] \
if result.fields.components else []
ret.append(val)
counter.update(incr=len(chunk))
return ret
class Auditor:
"""This class builds databases from git and Jira, making it possible to audit the two for
@ -445,6 +471,11 @@ class Auditor:
self._repo_reader = repo_reader
self._jira_reader = jira_reader
self._db = db
self._release_line_fix_versions = dict()
for k, v in _kwargs.items():
if k.endswith('_fix_version'):
release_line = k[:-len('_fix_version')]
self._release_line_fix_versions[release_line] = v
def populate_db_from_git(self):
"""Process the git repository, populating the commits database."""
@ -462,56 +493,126 @@ class Auditor:
database."""
self._jira_reader.populate_db()
@staticmethod
def _write_report(filename, issues):
with open(filename, 'w') as file:
fieldnames = ['key', 'issue_type', 'priority', 'summary', 'resolution', 'components']
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
for issue in issues:
writer.writerow(issue)
logging.info('generated report at %s', filename)
def report_new_for_release_line(self, release_line):
"""Builds a report of the Jira issues that are new on the target release line, not present
on any of the associated release branches. (i.e., on branch-2 but not
branch-{2.0,2.1,...})"""
matches = [x for x in self._repo_reader.release_line_refs
if x.name == release_line or x.name.endswith('/%s' % release_line)]
release_line_ref = next(iter(matches), None)
if not release_line_ref:
logging.error('release line %s not found. available options are %s.',
release_line, [x.name for x in self._repo_reader.release_line_refs])
return
cursor = self._db.conn.execute("""
SELECT distinct jira_id FROM git_commits
WHERE branch = ?
EXCEPT SELECT distinct jira_id FROM git_commits
WHERE branch LIKE ?
""", (release_line_ref.name, '%s.%%' % release_line_ref.name))
jira_ids = [x[0] for x in cursor.fetchall()]
issues = self._jira_reader.fetch_issues(jira_ids)
filename = 'new_for_%s.csv' % release_line.replace('/', '-')
Auditor._write_report(filename, issues)
@staticmethod
def _str_to_bool(val):
if not val:
return False
return val.lower() in ['true', 't', 'yes', 'y']
@staticmethod
def _build_first_pass_parser():
parser = argparse.ArgumentParser(add_help=False)
parser.add_argument(
building_group = parser.add_argument_group(title='Building the audit database')
building_group.add_argument(
'--populate-from-git',
help='When true, populate the audit database from the Git repository.',
type=Auditor._str_to_bool,
default=True)
building_group.add_argument(
'--populate-from-jira',
help='When true, populate the audit database from Jira.',
type=Auditor._str_to_bool,
default=True)
building_group.add_argument(
'--db-path',
help='Path to the database file, or leave unspecified for a transient db.',
default=':memory:')
parser.add_argument(
building_group.add_argument(
'--initialize-db',
help='When true, initialize the database tables. This is destructive to the contents'
+ ' of an existing database.',
type=Auditor._str_to_bool,
default=False)
report_group = parser.add_argument_group('Generating reports')
report_group.add_argument(
'--report-new-for-release-line',
help=Auditor.report_new_for_release_line.__doc__,
type=str,
default=None)
git_repo_group = parser.add_argument_group('Interactions with the Git repo')
git_repo_group.add_argument(
'--git-repo-path',
help='Path to the git repo, or leave unspecified to infer from the current'
+ ' file\'s path.',
default=__file__)
parser.add_argument(
git_repo_group.add_argument(
'--remote-name',
help='The name of the git remote to use when identifying branches.',
help='The name of the git remote to use when identifying branches.'
+ ' Default: \'origin\'',
default='origin')
parser.add_argument(
git_repo_group.add_argument(
'--development-branch',
help='The name of the branch from which all release lines originate.',
help='The name of the branch from which all release lines originate.'
+ ' Default: \'master\'',
default='master')
parser.add_argument(
git_repo_group.add_argument(
'--development-branch-fix-version',
help='The Jira fixVersion used to indicate an issue is committed to the development'
+ 'branch.',
+ ' branch. Default: \'3.0.0\'',
default='3.0.0')
parser.add_argument(
git_repo_group.add_argument(
'--release-line-regexp',
help='A regexp used to identify release lines.',
default=r'branch-\d+$')
parser.add_argument(
git_repo_group.add_argument(
'--parse-release-tags',
help='When true, look for release tags and annotate commits according to their release'
+ ' version. An Expensive calculation, disabled by default.',
type=Auditor._str_to_bool,
default=False)
git_repo_group.add_argument(
'--fallback-actions-path',
help='Path to a file containing _DB.Actions applicable to specific git shas.',
default='fallback_actions.csv')
parser.add_argument(
jira_group = parser.add_argument_group('Interactions with Jira')
jira_group.add_argument(
'--jira-url',
help='A URL locating the target JIRA instance.',
default='https://issues.apache.org/jira')
return parser
return parser, git_repo_group
@staticmethod
def _build_second_pass_parser(repo_reader, parent_parser):
parser = argparse.ArgumentParser(parents=[parent_parser])
def _build_second_pass_parser(repo_reader, parent_parser, git_repo_group):
for release_line in repo_reader.release_line_refs:
name = release_line.name
parser.add_argument(
git_repo_group.add_argument(
'--%s-fix-version' % name[len(repo_reader.remote_name) + 1:],
help='The Jira fixVersion used to indicate an issue is committed to the specified '
+ 'release line branch',
required=True)
return parser
return argparse.ArgumentParser(parents=[parent_parser])
MANAGER = None
@ -520,19 +621,26 @@ MANAGER = None
def main():
global MANAGER
first_pass_parser = Auditor._build_first_pass_parser()
known_args, extras = first_pass_parser.parse_known_args()
known_args = vars(known_args)
with _DB(**known_args) as db:
first_pass_parser, git_repo_group = Auditor._build_first_pass_parser()
first_pass_args, extras = first_pass_parser.parse_known_args()
first_pass_args_dict = vars(first_pass_args)
with _DB(**first_pass_args_dict) as db:
logging.basicConfig(level=logging.INFO)
repo_reader = _RepoReader(db, **known_args)
jira_reader = _JiraReader(db, **known_args)
second_pass_parser = Auditor._build_second_pass_parser(repo_reader, first_pass_parser)
args = second_pass_parser.parse_args(extras)
auditor = Auditor(repo_reader, jira_reader, db, **vars(args))
repo_reader = _RepoReader(db, **first_pass_args_dict)
jira_reader = _JiraReader(db, **first_pass_args_dict)
second_pass_parser = Auditor._build_second_pass_parser(
repo_reader, first_pass_parser, git_repo_group)
second_pass_args = second_pass_parser.parse_args(extras, first_pass_args)
second_pass_args_dict = vars(second_pass_args)
auditor = Auditor(repo_reader, jira_reader, db, **second_pass_args_dict)
with enlighten.get_manager() as MANAGER:
if second_pass_args.populate_from_git:
auditor.populate_db_from_git()
if second_pass_args.populate_from_jira:
auditor.populate_db_from_jira()
if second_pass_args.report_new_for_release_line:
release_line = second_pass_args.report_new_for_release_line
auditor.report_new_for_release_line(release_line)
if __name__ == '__main__':