HBASE-25489 improve performance of --parse-release-tags (#2867)

Profiler shows a lot of time spent in the UPDATE SQL statement. Remove the tight loop and let SQL
do a bulk-update instead.

Signed-off-by: Huaxiang Sun <huaxiangsun@apache.org>
Signed-off-by: Michael Stack <stack@apache.org>
This commit is contained in:
Nick Dimiduk 2021-01-08 14:43:56 -08:00 committed by GitHub
parent 49aba57181
commit 84c4033b1a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
1 changed files with 10 additions and 10 deletions

View File

@ -122,16 +122,21 @@ class _DB:
"""Commit any pending changes to the database.""" """Commit any pending changes to the database."""
self.conn.commit() self.conn.commit()
def apply_git_tag(self, branch, git_sha, git_tag): def apply_git_tag(self, branch, git_tag, git_shas):
"""Annotate a commit in the commits database as being a part of the specified release. """Annotate a commit in the commits database as being a part of the specified release.
Args: Args:
branch (str): The name of the git branch from which the commit originates. branch (str): The name of the git branch from which the commit originates.
git_sha (str): The commit's SHA.
git_tag (str): The first release tag following the commit. git_tag (str): The first release tag following the commit.
git_shas: The commits' SHAs.
""" """
self.conn.execute("UPDATE git_commits SET git_tag = ? WHERE branch = ? AND git_sha = ?", self.conn.execute(
(git_tag, branch, git_sha)) (
f"UPDATE git_commits SET git_tag = ?"
f" WHERE branch = ?"
f" AND git_sha in ({','.join('?' for _ in git_shas)})"
),
[git_tag, branch] + git_shas)
def apply_fix_version(self, jira_id, fix_version): def apply_fix_version(self, jira_id, fix_version):
"""Annotate a Jira issue in the jira database as being part of the specified release """Annotate a Jira issue in the jira database as being part of the specified release
@ -327,12 +332,7 @@ class _RepoReader:
return None return None
def _set_release_tag(self, branch, tag, shas): def _set_release_tag(self, branch, tag, shas):
cnt = 0 self._db.apply_git_tag(branch, tag, shas)
for sha in shas:
self._db.apply_git_tag(branch, sha, tag)
cnt += 1
if cnt % 50 == 0:
self._db.flush_commits()
self._db.flush_commits() self._db.flush_commits()
def _resolve_ambiguity(self, commit): def _resolve_ambiguity(self, commit):