HBASE-23763 Add 'new on release line report' for git/jira audit tool (#1104)

Codify building the summary of what's new on a release line
branch (i.e., `branch-2`), but not yet released on earlier release
branches of that line.

Builds a cvs report that looks like https://home.apache.org/~ndimiduk/new_for_branch-2.csv
This commit is contained in:
Nick Dimiduk 2020-01-30 09:31:01 -08:00 committed by GitHub
parent 8b00f9f0b1
commit 85b0c8e6bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 200 additions and 62 deletions

View File

@ -49,44 +49,73 @@ The tool provides basic help docs.
```shell script ```shell script
$ ./venv/bin/python ./git_jira_release_audit.py --help $ ./venv/bin/python ./git_jira_release_audit.py --help
usage: git_jira_release_audit.py [-h] [--db-path DB_PATH] usage: git_jira_release_audit.py [-h] [--populate-from-git POPULATE_FROM_GIT]
[--populate-from-jira POPULATE_FROM_JIRA]
[--db-path DB_PATH]
[--initialize-db INITIALIZE_DB]
[--report-new-for-release-line REPORT_NEW_FOR_RELEASE_LINE]
[--git-repo-path GIT_REPO_PATH] [--git-repo-path GIT_REPO_PATH]
[--remote-name REMOTE_NAME] [--remote-name REMOTE_NAME]
[--development-branch DEVELOPMENT_BRANCH] [--development-branch DEVELOPMENT_BRANCH]
[--development-branch-fix-version DEVELOPMENT_BRANCH_FIX_VERSION] [--development-branch-fix-version DEVELOPMENT_BRANCH_FIX_VERSION]
[--release-line-regexp RELEASE_LINE_REGEXP] [--release-line-regexp RELEASE_LINE_REGEXP]
[--parse-release-tags PARSE_RELEASE_TAGS]
[--fallback-actions-path FALLBACK_ACTIONS_PATH] [--fallback-actions-path FALLBACK_ACTIONS_PATH]
[--jira-url JIRA_URL] --branch-1-fix-version [--jira-url JIRA_URL]
BRANCH_1_FIX_VERSION --branch-2-fix-version
BRANCH_2_FIX_VERSION
optional arguments: optional arguments:
-h, --help show this help message and exit -h, --help show this help message and exit
Building the audit database:
--populate-from-git POPULATE_FROM_GIT
When true, populate the audit database from the Git
repository.
--populate-from-jira POPULATE_FROM_JIRA
When true, populate the audit database from Jira.
--db-path DB_PATH Path to the database file, or leave unspecified for a --db-path DB_PATH Path to the database file, or leave unspecified for a
transient db. transient db.
--initialize-db INITIALIZE_DB
When true, initialize the database tables. This is
destructive to the contents of an existing database.
Generating reports:
--report-new-for-release-line REPORT_NEW_FOR_RELEASE_LINE
Builds a report of the Jira issues that are new on the
target release line, not present on any of the
associated release branches. (i.e., on branch-2 but
not branch-{2.0,2.1,...})
Interactions with the Git repo:
--git-repo-path GIT_REPO_PATH --git-repo-path GIT_REPO_PATH
Path to the git repo, or leave unspecified to infer Path to the git repo, or leave unspecified to infer
from the current file's path. from the current file's path.
--remote-name REMOTE_NAME --remote-name REMOTE_NAME
The name of the git remote to use when identifying The name of the git remote to use when identifying
branches. branches. Default: 'origin'
--development-branch DEVELOPMENT_BRANCH --development-branch DEVELOPMENT_BRANCH
The name of the branch from which all release lines The name of the branch from which all release lines
originate. originate. Default: 'master'
--development-branch-fix-version DEVELOPMENT_BRANCH_FIX_VERSION --development-branch-fix-version DEVELOPMENT_BRANCH_FIX_VERSION
The Jira fixVersion used to indicate an issue is The Jira fixVersion used to indicate an issue is
committed to the development branch. committed to the development branch. Default: '3.0.0'
--release-line-regexp RELEASE_LINE_REGEXP --release-line-regexp RELEASE_LINE_REGEXP
A regexp used to identify release lines. A regexp used to identify release lines.
--parse-release-tags PARSE_RELEASE_TAGS
When true, look for release tags and annotate commits
according to their release version. An Expensive
calculation, disabled by default.
--fallback-actions-path FALLBACK_ACTIONS_PATH --fallback-actions-path FALLBACK_ACTIONS_PATH
Path to a file containing _DB.Actions applicable to specific git shas. Path to a file containing _DB.Actions applicable to
--jira-url JIRA_URL A URL locating the target JIRA instance. specific git shas.
--branch-1-fix-version BRANCH_1_FIX_VERSION --branch-1-fix-version BRANCH_1_FIX_VERSION
The Jira fixVersion used to indicate an issue is The Jira fixVersion used to indicate an issue is
committed to the specified release line branch committed to the specified release line branch
--branch-2-fix-version BRANCH_2_FIX_VERSION --branch-2-fix-version BRANCH_2_FIX_VERSION
The Jira fixVersion used to indicate an issue is The Jira fixVersion used to indicate an issue is
committed to the specified release line branch committed to the specified release line branch
Interactions with Jira:
--jira-url JIRA_URL A URL locating the target JIRA instance.
``` ```
Example Run: Example Run:

View File

@ -183,6 +183,7 @@ c89cfd3406823cf05fa83464c5ddee16bf0d473f,ADD,HBASE-17248
c89cfd3406823cf05fa83464c5ddee16bf0d473f,ADD,HBASE-17248 c89cfd3406823cf05fa83464c5ddee16bf0d473f,ADD,HBASE-17248
c97905a962b88a0c68ca8a51c2e507daec81ca6d,SKIP, c97905a962b88a0c68ca8a51c2e507daec81ca6d,SKIP,
c9f506a2973e0acbd0d2df7b9353c9291f6c94a8,SKIP, c9f506a2973e0acbd0d2df7b9353c9291f6c94a8,SKIP,
cbb2c7e00d0c0b3f641250d981b9c87286d31058,ADD,HBASE-23069
cbb86942eda4b65ddfc5ec436c78a04e5dd21631,SKIP, cbb86942eda4b65ddfc5ec436c78a04e5dd21631,SKIP,
cbdc9fcb8a705f4e5ee28a917a335c6f1ef5df42,SKIP, cbdc9fcb8a705f4e5ee28a917a335c6f1ef5df42,SKIP,
ccee3d8dd59dfb181d577b5df483632722db01b1,SKIP, ccee3d8dd59dfb181d577b5df483632722db01b1,SKIP,

Can't render this file because it contains an unexpected character in line 7 and column 3.

View File

@ -52,10 +52,13 @@ class _DB:
REVERT = 'REVERT' REVERT = 'REVERT'
SKIP = 'SKIP' SKIP = 'SKIP'
def __init__(self, db_path, **_kwargs): def __init__(self, db_path, initialize_db, **_kwargs):
self._conn = sqlite3.connect(db_path) self._conn = sqlite3.connect(db_path)
if initialize_db:
for table in 'git_commits', 'jira_versions': for table in 'git_commits', 'jira_versions':
self._conn.execute("DROP TABLE IF EXISTS %s" % table) self._conn.execute("DROP TABLE IF EXISTS %s" % table)
self._conn.execute(""" self._conn.execute("""
CREATE TABLE IF NOT EXISTS "git_commits"( CREATE TABLE IF NOT EXISTS "git_commits"(
jira_id TEXT NOT NULL, jira_id TEXT NOT NULL,
@ -93,11 +96,11 @@ class _DB:
git_sha (str): The commit's SHA. git_sha (str): The commit's SHA.
""" """
if action == _DB.Action.ADD: if action == _DB.Action.ADD:
self._conn.execute( self.conn.execute(
"INSERT INTO git_commits(jira_id, branch, git_sha) VALUES (upper(?),?,?)", "INSERT INTO git_commits(jira_id, branch, git_sha) VALUES (upper(?),?,?)",
(jira_id, branch, git_sha)) (jira_id, branch, git_sha))
elif action == _DB.Action.REVERT: elif action == _DB.Action.REVERT:
self._conn.execute(""" self.conn.execute("""
DELETE FROM git_commits WHERE DELETE FROM git_commits WHERE
jira_id=upper(?) jira_id=upper(?)
AND branch=? AND branch=?
@ -105,7 +108,7 @@ class _DB:
def flush_commits(self): def flush_commits(self):
"""Commit any pending changes to the database.""" """Commit any pending changes to the database."""
self._conn.commit() self.conn.commit()
def apply_git_tag(self, branch, git_sha, git_tag): def apply_git_tag(self, branch, git_sha, git_tag):
"""Annotate a commit in the commits database as being a part of the specified release. """Annotate a commit in the commits database as being a part of the specified release.
@ -115,7 +118,7 @@ class _DB:
git_sha (str): The commit's SHA. git_sha (str): The commit's SHA.
git_tag (str): The first release tag following the commit. git_tag (str): The first release tag following the commit.
""" """
self._conn.execute("UPDATE git_commits SET git_tag = ? WHERE branch = ? AND git_sha = ?", self.conn.execute("UPDATE git_commits SET git_tag = ? WHERE branch = ? AND git_sha = ?",
(git_tag, branch, git_sha)) (git_tag, branch, git_sha))
def apply_fix_version(self, jira_id, fix_version): def apply_fix_version(self, jira_id, fix_version):
@ -126,12 +129,12 @@ class _DB:
jira_id (str): The applicable Issue ID from JIRA. jira_id (str): The applicable Issue ID from JIRA.
fix_version (str): The annotated `fixVersion` as seen in JIRA. fix_version (str): The annotated `fixVersion` as seen in JIRA.
""" """
self._conn.execute("INSERT INTO jira_versions(jira_id, fix_version) VALUES (upper(?),?)", self.conn.execute("INSERT INTO jira_versions(jira_id, fix_version) VALUES (upper(?),?)",
(jira_id, fix_version)) (jira_id, fix_version))
def unique_jira_ids_from_git(self): def unique_jira_ids_from_git(self):
"""Query the commits database for the population of Jira Issue IDs.""" """Query the commits database for the population of Jira Issue IDs."""
results = self._conn.execute("SELECT distinct jira_id FROM git_commits").fetchall() results = self.conn.execute("SELECT distinct jira_id FROM git_commits").fetchall()
return [x[0] for x in results] return [x[0] for x in results]
def backup(self, target): def backup(self, target):
@ -184,13 +187,14 @@ class _RepoReader:
_identify_amend_jira_id_pattern = re.compile(r'^amend (.+)', re.IGNORECASE) _identify_amend_jira_id_pattern = re.compile(r'^amend (.+)', re.IGNORECASE)
def __init__(self, db, fallback_actions_path, remote_name, development_branch, def __init__(self, db, fallback_actions_path, remote_name, development_branch,
release_line_regexp, **_kwargs): release_line_regexp, parse_release_tags, **_kwargs):
self._db = db self._db = db
self._repo = _RepoReader._open_repo() self._repo = _RepoReader._open_repo()
self._fallback_actions = _RepoReader._load_fallback_actions(fallback_actions_path) self._fallback_actions = _RepoReader._load_fallback_actions(fallback_actions_path)
self._remote_name = remote_name self._remote_name = remote_name
self._development_branch = development_branch self._development_branch = development_branch
self._release_line_regexp = release_line_regexp self._release_line_regexp = release_line_regexp
self._parse_release_tags = parse_release_tags
@property @property
def repo(self): def repo(self):
@ -363,6 +367,7 @@ class _RepoReader:
if cnt % 50 == 0: if cnt % 50 == 0:
self._db.flush_commits() self._db.flush_commits()
commits_since_release.append(commit.hexsha) commits_since_release.append(commit.hexsha)
if self._parse_release_tags:
tag = self._extract_release_tag(commit) tag = self._extract_release_tag(commit)
if tag: if tag:
self._set_release_tag(release_branch, tag, commits_since_release) self._set_release_tag(release_branch, tag, commits_since_release)
@ -394,15 +399,6 @@ class _JiraReader:
self.client = jira.JIRA(jira_url) self.client = jira.JIRA(jira_url)
self.throttle_time_in_sec = 1 self.throttle_time_in_sec = 1
def _fetch_fix_versions(self, jira_id):
val = self.client.issue(jira_id, fields='fixVersions')
return [version.name for version in val.fields.fixVersions]
def _fetch_fix_versions_throttled(self, jira_id):
val = self._fetch_fix_versions(jira_id)
time.sleep(self.throttle_time_in_sec)
return val
def populate_db(self): def populate_db(self):
"""Query Jira for issue IDs found in the commits database, writing them to the jira """Query Jira for issue IDs found in the commits database, writing them to the jira
database.""" database."""
@ -427,8 +423,38 @@ class _JiraReader:
if cnt % 50: if cnt % 50:
self._db.flush_commits() self._db.flush_commits()
counter.update(incr=len(chunk)) counter.update(incr=len(chunk))
time.sleep(5)
self._db.flush_commits() self._db.flush_commits()
def fetch_issues(self, jira_ids):
"""Retrieve the specified jira Ids."""
global MANAGER
logging.info("retrieving %s jira_ids from the issue tracker", len(jira_ids))
counter = MANAGER.counter(total=len(jira_ids), desc='fetch from Jira', unit='issue')
chunk_size = 50
chunks = [jira_ids[i:i + chunk_size] for i in range(0, len(jira_ids), chunk_size)]
ret = list()
for chunk in chunks:
query = "key IN (" + ",".join([("'" + jira_id + "'") for jira_id in chunk]) + ")"\
+ " ORDER BY issuetype ASC, priority DESC, key ASC"
results = self.client.search_issues(
jql_str=query, maxResults=chunk_size,
fields='summary,issuetype,priority,resolution,components')
for result in results:
val = dict()
val['key'] = result.key
val['summary'] = result.fields.summary.strip()
val['priority'] = result.fields.priority.name.strip()
val['issue_type'] = result.fields.issuetype.name.strip() \
if result.fields.issuetype else None
val['resolution'] = result.fields.resolution.name.strip() \
if result.fields.resolution else None
val['components'] = [x.name.strip() for x in result.fields.components if x] \
if result.fields.components else []
ret.append(val)
counter.update(incr=len(chunk))
return ret
class Auditor: class Auditor:
"""This class builds databases from git and Jira, making it possible to audit the two for """This class builds databases from git and Jira, making it possible to audit the two for
@ -445,6 +471,11 @@ class Auditor:
self._repo_reader = repo_reader self._repo_reader = repo_reader
self._jira_reader = jira_reader self._jira_reader = jira_reader
self._db = db self._db = db
self._release_line_fix_versions = dict()
for k, v in _kwargs.items():
if k.endswith('_fix_version'):
release_line = k[:-len('_fix_version')]
self._release_line_fix_versions[release_line] = v
def populate_db_from_git(self): def populate_db_from_git(self):
"""Process the git repository, populating the commits database.""" """Process the git repository, populating the commits database."""
@ -462,56 +493,126 @@ class Auditor:
database.""" database."""
self._jira_reader.populate_db() self._jira_reader.populate_db()
@staticmethod
def _write_report(filename, issues):
with open(filename, 'w') as file:
fieldnames = ['key', 'issue_type', 'priority', 'summary', 'resolution', 'components']
writer = csv.DictWriter(file, fieldnames=fieldnames)
writer.writeheader()
for issue in issues:
writer.writerow(issue)
logging.info('generated report at %s', filename)
def report_new_for_release_line(self, release_line):
"""Builds a report of the Jira issues that are new on the target release line, not present
on any of the associated release branches. (i.e., on branch-2 but not
branch-{2.0,2.1,...})"""
matches = [x for x in self._repo_reader.release_line_refs
if x.name == release_line or x.name.endswith('/%s' % release_line)]
release_line_ref = next(iter(matches), None)
if not release_line_ref:
logging.error('release line %s not found. available options are %s.',
release_line, [x.name for x in self._repo_reader.release_line_refs])
return
cursor = self._db.conn.execute("""
SELECT distinct jira_id FROM git_commits
WHERE branch = ?
EXCEPT SELECT distinct jira_id FROM git_commits
WHERE branch LIKE ?
""", (release_line_ref.name, '%s.%%' % release_line_ref.name))
jira_ids = [x[0] for x in cursor.fetchall()]
issues = self._jira_reader.fetch_issues(jira_ids)
filename = 'new_for_%s.csv' % release_line.replace('/', '-')
Auditor._write_report(filename, issues)
@staticmethod
def _str_to_bool(val):
if not val:
return False
return val.lower() in ['true', 't', 'yes', 'y']
@staticmethod @staticmethod
def _build_first_pass_parser(): def _build_first_pass_parser():
parser = argparse.ArgumentParser(add_help=False) parser = argparse.ArgumentParser(add_help=False)
parser.add_argument( building_group = parser.add_argument_group(title='Building the audit database')
building_group.add_argument(
'--populate-from-git',
help='When true, populate the audit database from the Git repository.',
type=Auditor._str_to_bool,
default=True)
building_group.add_argument(
'--populate-from-jira',
help='When true, populate the audit database from Jira.',
type=Auditor._str_to_bool,
default=True)
building_group.add_argument(
'--db-path', '--db-path',
help='Path to the database file, or leave unspecified for a transient db.', help='Path to the database file, or leave unspecified for a transient db.',
default=':memory:') default=':memory:')
parser.add_argument( building_group.add_argument(
'--initialize-db',
help='When true, initialize the database tables. This is destructive to the contents'
+ ' of an existing database.',
type=Auditor._str_to_bool,
default=False)
report_group = parser.add_argument_group('Generating reports')
report_group.add_argument(
'--report-new-for-release-line',
help=Auditor.report_new_for_release_line.__doc__,
type=str,
default=None)
git_repo_group = parser.add_argument_group('Interactions with the Git repo')
git_repo_group.add_argument(
'--git-repo-path', '--git-repo-path',
help='Path to the git repo, or leave unspecified to infer from the current' help='Path to the git repo, or leave unspecified to infer from the current'
+ ' file\'s path.', + ' file\'s path.',
default=__file__) default=__file__)
parser.add_argument( git_repo_group.add_argument(
'--remote-name', '--remote-name',
help='The name of the git remote to use when identifying branches.', help='The name of the git remote to use when identifying branches.'
+ ' Default: \'origin\'',
default='origin') default='origin')
parser.add_argument( git_repo_group.add_argument(
'--development-branch', '--development-branch',
help='The name of the branch from which all release lines originate.', help='The name of the branch from which all release lines originate.'
+ ' Default: \'master\'',
default='master') default='master')
parser.add_argument( git_repo_group.add_argument(
'--development-branch-fix-version', '--development-branch-fix-version',
help='The Jira fixVersion used to indicate an issue is committed to the development' help='The Jira fixVersion used to indicate an issue is committed to the development'
+ 'branch.', + ' branch. Default: \'3.0.0\'',
default='3.0.0') default='3.0.0')
parser.add_argument( git_repo_group.add_argument(
'--release-line-regexp', '--release-line-regexp',
help='A regexp used to identify release lines.', help='A regexp used to identify release lines.',
default=r'branch-\d+$') default=r'branch-\d+$')
parser.add_argument( git_repo_group.add_argument(
'--parse-release-tags',
help='When true, look for release tags and annotate commits according to their release'
+ ' version. An Expensive calculation, disabled by default.',
type=Auditor._str_to_bool,
default=False)
git_repo_group.add_argument(
'--fallback-actions-path', '--fallback-actions-path',
help='Path to a file containing _DB.Actions applicable to specific git shas.', help='Path to a file containing _DB.Actions applicable to specific git shas.',
default='fallback_actions.csv') default='fallback_actions.csv')
parser.add_argument( jira_group = parser.add_argument_group('Interactions with Jira')
jira_group.add_argument(
'--jira-url', '--jira-url',
help='A URL locating the target JIRA instance.', help='A URL locating the target JIRA instance.',
default='https://issues.apache.org/jira') default='https://issues.apache.org/jira')
return parser return parser, git_repo_group
@staticmethod @staticmethod
def _build_second_pass_parser(repo_reader, parent_parser): def _build_second_pass_parser(repo_reader, parent_parser, git_repo_group):
parser = argparse.ArgumentParser(parents=[parent_parser])
for release_line in repo_reader.release_line_refs: for release_line in repo_reader.release_line_refs:
name = release_line.name name = release_line.name
parser.add_argument( git_repo_group.add_argument(
'--%s-fix-version' % name[len(repo_reader.remote_name) + 1:], '--%s-fix-version' % name[len(repo_reader.remote_name) + 1:],
help='The Jira fixVersion used to indicate an issue is committed to the specified ' help='The Jira fixVersion used to indicate an issue is committed to the specified '
+ 'release line branch', + 'release line branch',
required=True) required=True)
return parser return argparse.ArgumentParser(parents=[parent_parser])
MANAGER = None MANAGER = None
@ -520,19 +621,26 @@ MANAGER = None
def main(): def main():
global MANAGER global MANAGER
first_pass_parser = Auditor._build_first_pass_parser() first_pass_parser, git_repo_group = Auditor._build_first_pass_parser()
known_args, extras = first_pass_parser.parse_known_args() first_pass_args, extras = first_pass_parser.parse_known_args()
known_args = vars(known_args) first_pass_args_dict = vars(first_pass_args)
with _DB(**known_args) as db: with _DB(**first_pass_args_dict) as db:
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
repo_reader = _RepoReader(db, **known_args) repo_reader = _RepoReader(db, **first_pass_args_dict)
jira_reader = _JiraReader(db, **known_args) jira_reader = _JiraReader(db, **first_pass_args_dict)
second_pass_parser = Auditor._build_second_pass_parser(repo_reader, first_pass_parser) second_pass_parser = Auditor._build_second_pass_parser(
args = second_pass_parser.parse_args(extras) repo_reader, first_pass_parser, git_repo_group)
auditor = Auditor(repo_reader, jira_reader, db, **vars(args)) second_pass_args = second_pass_parser.parse_args(extras, first_pass_args)
second_pass_args_dict = vars(second_pass_args)
auditor = Auditor(repo_reader, jira_reader, db, **second_pass_args_dict)
with enlighten.get_manager() as MANAGER: with enlighten.get_manager() as MANAGER:
if second_pass_args.populate_from_git:
auditor.populate_db_from_git() auditor.populate_db_from_git()
if second_pass_args.populate_from_jira:
auditor.populate_db_from_jira() auditor.populate_db_from_jira()
if second_pass_args.report_new_for_release_line:
release_line = second_pass_args.report_new_for_release_line
auditor.report_new_for_release_line(release_line)
if __name__ == '__main__': if __name__ == '__main__':