from __future__ import print_function """ To be done: - Investigate whether it is possible to obtain the last svn revision number without switching to it. - Investigate file mode differences reported by gitk, see svn revision 171449. - simplify difference check to a single call to diff. Verify that all common files are equal, ignore non common files, check stderr and stdout of diff. """ # Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """ Workaround for slow updates from an svn branch to git. See also jira issue INFRA-9182 Situation: Remote svn repo ---> (slow) git-svn fetch ---> Remote git repo (upstream) | | | | v v Local svn working copy ---> this workaround ---> Local git repo When the remote git-svn fetch is slow, the remote git repo is behind the remote svn repo. When this script is run it will first check that the local working copy and repository are clean. Then it switches the svn working copy to the branch, which updates from the remote. Then it fetches the branch from the git upstream repo, and merges the branch locally. Normally the local svn and git will then be at the same svn revision, and the script will exit. Otherwise the remote git repo is out of date, and the following happens. It is checked that the hostname and path and the uuid of the remote svn repo as reported by the local svn working copy and as reported by the local git repo are the same. For the branch branchname in a local git repository following an upstream git-svn git repository, this maintains commits on a temporary git branch branchname.svn in the local git repository. These commits contain metdata that differs slightly from git svn (svn2git-id: instead of git-svn-id:). Otherwise the messages of the added commits are the same as their counterparts from git svn, except occasionally for an added or missed empty line when the svn commit message ends in new line. Normally the added git commits and their git-svn counterparts have no differences between their working trees. However such differences can occur, for example occasionally file modes are different in the git working tree. See also the documentation of git-svn reset and the limitations below. In order not to interfere with git-svn this script only adds commits to a temporary branch branchname.svn, and the commit messages are chosen differently, they do not contain git-svn-id: . In case an earlier branchname.svn exists, it will first be deleted if necessary, and restarted at the later branch. Therefore branchname.svn is temporary and should only be used locally. By default, no more than 20 commits will be added to branchname.svn in a single run. The earlier revision number is taken from the git-svn-id: message of git svn, or from the latest revision number in the commit message on branchname.svn, whichever is later. This allows branchname.svn to be used as a local git branch instead of branchname to develop new features locally, for example by merging branchname.svn into a feature branch. This works by interpretation of the lines of svn update messages (U/A/D etc.) by copying these files and their protection bits from the local svn working copy into the git working tree, and by deleting files and directories in the git working tree. An example commit in lucene-solr that adds a binary file, on which this script provides a correct git working tree: svn revision 1707457 git-svn commit 3c0390f71e1f08a17f32bc207b4003362f8b6ac2 Limitations: All svn properties are ignored here. Commit messages added to the git repo occasionally do not have the same number of empty lines as the corresponding svn commit message. """ """ This was developed on Linux using the following program versions: python 2.7.6 python 3.4.3 git 1.9.1 svn 1.8.8 GNU bash, version 4.3.11(1)-release (x86_64-pc-linux-gnu) sed (GNU sed) 4.2.2 grep (GNU grep) 2.16 diff (GNU diffutils) 3.3 cp (GNU coreutils) 8.21 rm (GNU coreutils) 8.21 mkdir (GNU coreutils) 8.21 gitk (part of git) was used for manual testing: - delete branchname.svn, reset branchname.svn and branchname to earlier to simulate going back in history, - diff a commit generated here to a commit from git svn, ideally there are no differences, - update, reload, show commits in reverse order of commit date, ... """ import os import subprocess import shutil from xml import sax from xml.sax.handler import ContentHandler try: from urllib.parse import urlparse # python 3 except ImportError: from urlparse import urlparse # python 2 import sys binaryToString = sys.version_info >= (3, 0) def decodeBytesToString(bytes): return bytes.decode("utf-8") class SvnInfoHandler(ContentHandler): commitTag = "commit" revisionAttr = "revision" urlTag = "url" uuidTag = "uuid" charCollectTags = (urlTag, uuidTag) # also used as SvnInfoHandler attributes def __init__(self): self.lastChangeRev = None self.lastLogEntry = None for tag in self.charCollectTags: setattr(self, tag, None) self.chars = None def startElement(self, name, attrs): if name == self.commitTag: self.lastChangeRev = int(attrs.getValue(self.revisionAttr)) elif name in self.charCollectTags: self.chars = "" def characters(self, content): if self.chars is not None: self.chars += content def endElement(self, name): if name in self.charCollectTags: chars = self.chars setattr(self, name, chars) self.chars = None def getLastChangeRevision(self): return self.lastChangeRev class SvnLogEntry(object): pass # attributes set in SvnLogHandler: revision, author, date, msg class SvnLogHandler(ContentHandler): # collect list of SvnLogEntry's logEntryTag = "logentry" revisionAttr = "revision" # also used as SvnLogEntry attribute authorTag = "author" dateTag = "date" msgTag = "msg" charCollectTags = (authorTag, dateTag, msgTag) # also used as SvnLogEntry attributes def __init__(self): self.logEntries = [] self.chars = None def startElement(self, name, attrs): if name == self.logEntryTag: self.lastLogEntry = SvnLogEntry() setattr(self.lastLogEntry, self.revisionAttr, int(attrs.getValue(self.revisionAttr))) for tag in self.charCollectTags: setattr(self.lastLogEntry, tag, None) return if name in self.charCollectTags: self.chars = "" def characters(self, content): if self.chars is not None: self.chars += content def endElement(self, name): if name in self.charCollectTags: chars = self.chars setattr(self.lastLogEntry, name, chars) self.chars = None return if name == self.logEntryTag: self.logEntries.append(self.lastLogEntry) self.lastLogEntry = None def getLogEntries(self): return self.logEntries class SubProcessAtPath(object): def __init__(self, pathName, verbose=True): self.pathName = pathName self.verbose = verbose def getPathName(self): return self.pathName def chDirToPath(self): if self.pathName != os.getcwd(): os.chdir(self.pathName) assert self.pathName == os.getcwd() def __str__(self): return self.__class__.__name__ + "(" + self.pathName + ")" def checkCall(self, *args, **kwArgs): self.chDirToPath() if self.verbose: print("check_call args:", " ".join(*args), str(**kwArgs)) subprocess.check_call(*args, **kwArgs) def checkOutput(self, *args, **kwArgs): self.chDirToPath() if self.verbose: print("check_output args:", " ".join(*args), str(**kwArgs)) result = subprocess.check_output(*args, **kwArgs) if self.verbose: print("check_output result:", result) return result def checkOutputAsStr(self, *args, **kwArgs): self.chDirToPath() if self.verbose: print("check_output args:", " ".join(*args), str(**kwArgs)) result = subprocess.check_output(*args, **kwArgs) if binaryToString: result = decodeBytesToString(result) if self.verbose: print("check_output result:", result) return result def nonEmptyLines(text): return [line for line in text.split("\n") if len(line) > 0] class SvnWorkingCopy(SubProcessAtPath): def __init__(self, pathName): SubProcessAtPath.__init__(self, pathName, verbose=False) self.url = None self.uuid = None svnCmd = "svn" def ensureNoLocalModifications(self): localMods = self.checkOutputAsStr((self.svnCmd, "status")) if localMods: errorExit(self, "should not have local modifications:\n", localMods) def updateOutput(self, revision): result = self.checkOutputAsStr((self.svnCmd, "update", "-r", str(revision))) return result def switch(self, repoBranchName): self.checkCall((self.svnCmd, "switch", ("^/" + repoBranchName), "--ignore-ancestry")) def parseInfo(self): infoXml = self.checkOutput((self.svnCmd, "info", "--xml")) # bytes in python 3. infoHandler = SvnInfoHandler() sax.parseString(infoXml, infoHandler) self.uuid = infoHandler.uuid self.url = infoHandler.url self.lastChangeRev = infoHandler.getLastChangeRevision() def getUrl(self): if self.url == None: self.parseInfo() return self.url def getUuid(self): if self.uuid == None: self.parseInfo() return self.uuid def lastChangedRevision(self): self.parseInfo() return self.lastChangeRev def getLogEntries(self, fromRevision, toRevision, maxNumLogEntries): revRange = self.revisionsRange(fromRevision, toRevision) logXml = self.checkOutput((self.svnCmd, "log", "-r", revRange, "--xml", "-l", str(maxNumLogEntries))) logHandler = SvnLogHandler() sax.parseString(logXml, logHandler) return logHandler.getLogEntries() def revisionsRange(self, fromRevision, toRevision): return str(fromRevision) + ":" + str(toRevision) class GitRepository(SubProcessAtPath): def __init__(self, pathName): SubProcessAtPath.__init__(self, pathName, verbose=False) self.currentBranch = None gitCmd = "git" def checkOutBranch(self, branchName): self.checkCall((self.gitCmd, "checkout", branchName)) self.currentBranch = branchName def getCurrentBranch(self): if self.currentBranch is None: gitStatusOut = self.checkOutputAsStr((self.gitCmd, "status")) if gitStatusOut.startswith("On branch "): self.currentBranch = gitStatusOut.split()[2] # works also without () ??? else: errorExit(self, "not on a branch:", gitStatusOut) return self.currentBranch def workingDirectoryClean(self): gitStatusOut = self.checkOutputAsStr((self.gitCmd, "status")) expSubString = "nothing to commit, working directory clean" return gitStatusOut.find(expSubString) >= 0 def listBranches(self, pattern): result = self.checkOutputAsStr((self.gitCmd, "branch", "--list", pattern)) return result def branchExists(self, branchName): listOut = self.listBranches(branchName) # CHECKME: using branchName as pattern may not always be ok. return len(listOut) > 0 def deleteBranch(self, branchName): self.checkCall((self.gitCmd, "branch", "-D", branchName)) if branchName == self.currentBranch: self.currentBranch = None def createBranch(self, branchName): self.checkCall((self.gitCmd, "branch", branchName)) def fetch(self, upStream): self.checkCall((self.gitCmd, "fetch", upStream)) def merge(self, branch, fromBranch): self.checkCall((self.gitCmd, "merge", branch, fromBranch)) def getCommitMessage(self, commitRef): result = self.checkOutputAsStr((self.gitCmd, "log", "--format=%B", "-n", "1", commitRef)) return result def getCommitAuthorName(self, commitRef): result = self.checkOutputAsStr((self.gitCmd, "log", "--format=%aN", "-n", "1", commitRef)) return result def getCommitAuthorEmail(self, commitRef): result = self.checkOutputAsStr((self.gitCmd, "log", "--format=%aE", "-n", "1", commitRef)) return result def getLatestCommitForAuthor(self, svnAuthor): # print('Get git commit for author "%s, type=%s"' % (svnAuthor, str(type(svnAuthor)))) authorCommit = self.checkOutputAsStr( " ".join((self.gitCmd, "rev-list", "--all", "-i", ("--author=" + svnAuthor), # see git commit documentation on --author "|", # pipe should have a buffer for at most a few commit ids. "head", "-1" # the first line )), shell=True) # use shell pipe authorCommit = authorCommit.rstrip("\n") return authorCommit gitSvnMarker = "git-svn-id:" # added and used by git svn dcommit svn2gitMarker = "svn2git-id:" # added and used here. def getSvnRemoteUuidRevisionFromCommitMessage(self, commitMessage, marker): words = commitMessage.split() if not marker in words: return (None, None, None) svnId = words[words.index(marker) + 1] splitSvnId = svnId.split("@") svnRemote = splitSvnId[0] svnRevision = int(splitSvnId[1]) svnRepoUuid = words[words.index(marker) + 2] return (svnRemote, svnRepoUuid, svnRevision) def getSvnRemoteAndUuidAndRevision(self, gitSvnCommitRef): gitSvnCommitMessage = self.getCommitMessage(gitSvnCommitRef) return self.getSvnRemoteUuidRevisionFromCommitMessage(gitSvnCommitMessage, self.gitSvnMarker) def lastTempGitSvnRevision(self, tempBranchCommitRef): # at a commit generated here on the temp branch. gitCommitMessage = self.getCommitMessage(tempBranchCommitRef) (svnRemote, svnRepoUuid, svnRevision) = self.getSvnRemoteUuidRevisionFromCommitMessage(gitCommitMessage, self.svn2gitMarker) return svnRevision def addAllToIndex(self): self.checkCall((self.gitCmd, "add", "-A", self.getPathName())) def commit(self, message, authorName, authorEmail, authorDate, committerName, committerEmail, committerDate): author = ''.join((authorName, " <", authorEmail, ">")) os.environ["GIT_COMMITTER_NAME"] = committerName # no need to save/restore earlier environment state. os.environ["GIT_COMMITTER_EMAIL"] = committerEmail os.environ["GIT_COMMITTER_DATE"] = committerDate self.checkCall((self.gitCmd, "commit", "--allow-empty", # in case only svn poperties changed. ("--message=" + message), ("--author=" + author), ("--date=" + authorDate) )) def cleanDirsForced(self): self.checkCall((self.gitCmd, "clean", "-fd")) # Use -fdx to also remove ignored files. def errorExit(*messageParts): raise RuntimeError(" ".join(map(str, messageParts))) def allSuccessivePairs(lst): return [lst[i:i+2] for i in range(len(lst)-1)] def octal(mode): return format(mode, 'o') def checkEqualProtectionBits(fn1, fn2): stat1 = os.stat(fn1) stat2 = os.stat(fn2) if stat1.st_mode != stat2.st_mode: print("Protection bits %s of %s" % (octal(stat1.st_mode), fn1)) print("Protection bits %s of %s" % (octal(stat2.st_mode), fn2)) return False return True def verifyGitFilesAgainstSvn(gitRepo, svnWorkingCopy): # The files under version control at the git repo can be enumerated quickly by: git ls-tree -r trunk.svn | cut --fields=2- # This makes sense because all files, including binary files, are added. # svn ls -R is too slow to use here (this lists about 12 file names per second, lucene-solr has well over 4000). fileNamesOut = gitRepo.checkOutputAsStr((gitRepo.gitCmd, "ls-tree", "-r", "--name-only", gitRepo.getCurrentBranch())) fileNames = nonEmptyLines(fileNamesOut) print("verifyGitFilesAgainstSvn checking", len(fileNames), "files") result = True for fileName in fileNames: #print("fileName", fileName) fileNameInGitRepo = os.path.join(gitRepo.getPathName(), fileName) #print("fileNameInGitRepo", fileNameInGitRepo) fileNameInSvnWorkingCopy = os.path.join(svnWorkingCopy.getPathName(), fileName) #print("fileNameInSvnWorkingCopy", fileNameInSvnWorkingCopy) try: diffOutput = subprocess.check_output(("diff", "-q", fileNameInGitRepo, fileNameInSvnWorkingCopy)) except (subprocess.CalledProcessError, exitError): print("difference in file", fileName) print("diff exitError", exitError) result = False if not checkEqualProtectionBits(fileNameInSvnWorkingCopy, fileNameInGitRepo): result = False if result: print("no differences") else: print("some differences") """ On clean checkouts of both svn and git the command: diff -r svndir gitdir reports only .svn .git and empty directories in the svn working copy, for example: Only in ./svnwork/lucene-solr/lucene/analysis/icu: lib This diff output could be checked here. To clean an svn working copy: rm -r * # also .hgignore .caches, all except .svn svn update # this is a local svn operation To clean a git working directory: rm -r * # all except .git git checkout branchname -- . """ def deleteEmptyDirs(pathName, topDirName): """ Delete higher level directories of pathName when empty, but do not delete topDirName """ head, tail = os.path.split(pathName) while (head != topDirName) and not os.listdir(head): assert head.startswith(topDirName) # , topDirName + " <<>> " + head # subprocess.check_call(("rm", "-r", head)) # delete empty directory os.rmdir(head) head, tail = os.path.split(head) def setGitWorkingTreeViaSvnCheckout(svnWorkingCopy, revision, gitRepo): svnUpdateOutputLines = svnWorkingCopy.updateOutput(revision) """ Some example lines: U solr/solrj/src/test/org/apache/solr/client/solrj/io/sql/JdbcTest.java U solr/core Updated to revision 1707390. From svn help update: For each updated item a line will be printed with characters reporting the action taken. These characters have the following meaning: A Added D Deleted U Updated C Conflict G Merged E Existed R Replaced Characters in the first column report about the item itself. Characters in the second column report about properties of the item. A 'B' in the third column signifies that the lock for the file has been broken or stolen. A 'C' in the fourth column indicates a tree conflict, while a 'C' in the first and second columns indicate textual conflicts in files and in property values, respectively. """ for svnUpdateLine in nonEmptyLines(svnUpdateOutputLines): if svnUpdateLine.startswith("Updating "): # first line continue if svnUpdateLine.startswith("Updated to"): # last line revisionStr = svnUpdateLine.split()[3][:-1] assert revision == int(revisionStr), revisionStr continue print(svnUpdateLine) itemChar = svnUpdateLine[0] itemPropChar = svnUpdateLine[1] lockChar = svnUpdateLine[2] treeConflictChar = svnUpdateLine[3] fileName = svnUpdateLine[5:] validItemChars = (" ", "A", "D", "U") assert itemChar in validItemChars, "revision %d itemChar %s, fileName %s" % (revision, itemChar, fileName) assert itemPropChar in validItemChars, "revision %d itemPropChar %s, working copy not clean fileName %s" % (revision, itemPropChar, fileName) assert lockChar == " ", "revision %d lockChar %s fileName %s" % (revision, lockChar, fileName) assert treeConflictChar == " ", "revision %d treeConflictChar %s fileName %s" % (revision, treeConflictChar, fileName) fileNameInSvnWorkingCopy = os.path.join(svnWorkingCopy.getPathName(), fileName) fileNameInGitRepo = os.path.join(gitRepo.getPathName(), fileName) setFileProtectionBits = False if itemChar == "D": # deleted in svn working copy if os.path.isdir(fileNameInGitRepo): print("Deleting directory %s" % fileNameInGitRepo) # subprocess.check_call(("rm", "-r", fileNameInGitRepo)) # delete in git working tree shutil.rmtree(fileNameInGitRepo) # delete completely in git working tree deleteEmptyDirs(fileNameInGitRepo, gitRepo.getPathName()) # delete empty dirs in git repo elif os.path.isfile(fileNameInGitRepo): print("Deleting file %s" % fileNameInGitRepo) # subprocess.check_call(("rm", fileNameInGitRepo)) # delete in git working tree os.remove(fileNameInGitRepo) deleteEmptyDirs(fileNameInGitRepo, gitRepo.getPathName()) else: print("Non deleting non existing file %s" % fileName) elif itemChar in ("A", "U"): # added or updated in svn working copy if os.path.isdir(fileNameInSvnWorkingCopy): if not os.path.isdir(fileNameInGitRepo): print("Creating directory %s" % fileName) #subprocess.check_call(("mkdir", fileNameInGitRepo)) # new directory in git working tree os.mkdir(fileNameInGitRepo) else: print("Not creating existing directory %s" % fileName) elif os.path.isfile(fileNameInSvnWorkingCopy): head, tail = os.path.split(fileNameInGitRepo) if not os.path.isdir(head): print("Creating directory for file %s" % fileNameInGitRepo) os.mkdir(head) # print("Copying file %s" % fileName # Common case) # subprocess.check_call(("cp", fileNameInSvnWorkingCopy, fileNameInGitRepo)) # copy into git working tree shutil.copyfile(fileNameInSvnWorkingCopy, fileNameInGitRepo) setFileProtectionBits = True else: assert False, "Cannot add or update non existing file %s" % fileNameInSvnWorkingCopy else: assert itemChar == " " # nothing to do if itemPropChar != " ": print("At revision %d ignoring svn property change type %s for file %s" % (revision, itemPropChar, fileName)) setFileProtectionBits = True # svn:executable may have been set or unset. if setFileProtectionBits: if os.path.isfile(fileNameInSvnWorkingCopy): statSvn = os.stat(fileNameInSvnWorkingCopy) statGit = os.stat(fileNameInGitRepo) if statSvn.st_mode != statGit.st_mode: print("Changing mode from %s to %s for %s" % (octal(statGit.st_mode), octal(statSvn.st_mode), fileNameInGitRepo)) os.chmod(fileNameInGitRepo, statSvn.st_mode) def assertUrlsSameExceptScheme(url1, url2): # may only differ by scheme http:// or https:// scheme1, netloc1, path1, params1, query1, fragment1 = urlparse(url1) scheme2, netloc2, path2, params2, query2, fragment2 = urlparse(url2) #print(scheme1, netloc1, path1, params1, query1, fragment1) #print(scheme2, netloc2, path2, params2, query2, fragment2) assert netloc1 == netloc2 assert path1 == path2 assert params1 == params2 assert query1 == query2 assert fragment1 == fragment2 def maintainTempGitSvnBranch(branchName, tempGitBranchName, svnWorkingCopyOfBranchPath, svnRepoBranchName, gitRepoPath, gitUpstream, maxCommits=20, # generate at most this number of commits on tempGitBranchName, rerun to add more. testMode=False): assert maxCommits >= 1 gitRepo = GitRepository(gitRepoPath) gitRepo.checkOutBranch(branchName) # fails with git message when working directory is not clean svnWorkingCopy = SvnWorkingCopy(svnWorkingCopyOfBranchPath) svnWorkingCopy.ensureNoLocalModifications() svnWorkingCopy.switch(svnRepoBranchName) # switch to repo branch, update to latest revision lastSvnRevision = svnWorkingCopy.lastChangedRevision() # int to allow comparison #print(svnWorkingCopy, "lastSvnRevision:", lastSvnRevision) gitRepo.fetch(gitUpstream) if testMode: pass # leave branch where it is, as if the last commits from upstream did not arrive else: gitRepo.merge(branchName, gitUpstream + "/" + branchName) (gitSvnRemote, gitSvnRepoUuid, lastSvnRevisionOnGitSvnBranch) = gitRepo.getSvnRemoteAndUuidAndRevision(branchName) svnUrl = svnWorkingCopy.getUrl() svnRepoUuid = svnWorkingCopy.getUuid() print("gitSvnRemote:", gitSvnRemote) print("svnUrl:", svnUrl) print("svn repo uuid:", svnRepoUuid) assertUrlsSameExceptScheme(gitSvnRemote, svnUrl) assert gitSvnRepoUuid == svnRepoUuid # check whether tempGitBranchName exists: diffBaseRevision = lastSvnRevisionOnGitSvnBranch svnTempRevision = None doCommitOnExistingTempBranch = False if gitRepo.branchExists(tempGitBranchName): print(tempGitBranchName, "exists") # update lastSvnRevisionOnGitSvnBranch from there. svnTempRevision = gitRepo.lastTempGitSvnRevision(tempGitBranchName) if svnTempRevision is None: print("Warning: no svn revision found on branch:", tempGitBranchName) else: if svnTempRevision > lastSvnRevisionOnGitSvnBranch: diffBaseRevision = svnTempRevision doCommitOnExistingTempBranch = True gitRepo.checkOutBranch(tempGitBranchName) if lastSvnRevision == diffBaseRevision: print(gitRepo, gitRepo.getCurrentBranch(), "up to date with", svnWorkingCopy, svnRepoBranchName) verifyGitFilesAgainstSvn(gitRepo, svnWorkingCopy) return if lastSvnRevision < diffBaseRevision: # unlikely, do nothing print(gitRepo, gitRepo.getCurrentBranch(), "later than", svnWorkingCopy, ", nothing to update.") return print(gitRepo, gitRepo.getCurrentBranch(), "earlier than", svnWorkingCopy) if not gitRepo.workingDirectoryClean(): errorExit(gitRepo, "on branch", gitRepo.getCurrentBranch(), "not clean") print(gitRepo,"on branch", gitRepo.getCurrentBranch(), "and clean") if not doCommitOnExistingTempBranch: # restart temp branch from branch assert gitRepo.getCurrentBranch() == branchName if gitRepo.branchExists(tempGitBranchName): # tempGitBranchName exists, delete it first. print("Branch", tempGitBranchName, "exists, deleting") gitRepo.deleteBranch(tempGitBranchName) if gitRepo.branchExists(tempGitBranchName): errorExit("Could not delete branch", tempGitBranchName, "from", gitRepo) gitRepo.createBranch(tempGitBranchName) gitRepo.checkOutBranch(tempGitBranchName) print("Started branch", tempGitBranchName, "at", branchName) assert gitRepo.getCurrentBranch() == tempGitBranchName maxNumLogEntries = maxCommits + 1 svnLogEntries = svnWorkingCopy.getLogEntries(diffBaseRevision, lastSvnRevision, maxNumLogEntries) numCommits = 0 startRevision = svnLogEntries[0].revision ignore = svnWorkingCopy.updateOutput(startRevision) for (logEntryFrom, logEntryTo) in allSuccessivePairs(svnLogEntries): setGitWorkingTreeViaSvnCheckout(svnWorkingCopy, logEntryTo.revision, gitRepo) gitRepo.addAllToIndex() # add all changes from the git working tree to the git index. # commit, put toRevision at end so it can be picked up later. commitMessageMetaData = gitRepo.svn2gitMarker + " " + gitSvnRemote + "@" + str(logEntryTo.revision) + " " + gitSvnRepoUuid # git-svn adds this commit metadata: # git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1719562 13f79535-47bb-0310-9956-ffa450edef68 # This script uses svn2git-id: instead of git-svn-id: message = logEntryTo.msg + "\n\n" + commitMessageMetaData authorCommit = gitRepo.getLatestCommitForAuthor(logEntryTo.author) authorName = gitRepo.getCommitAuthorName(authorCommit) authorEmail = gitRepo.getCommitAuthorEmail(authorCommit) # print("Author name and email:", authorName, authorEmail) gitRepo.commit(message, authorName, authorEmail, logEntryTo.date, authorName, authorEmail, logEntryTo.date) # author is also git committer, just like git-svn numCommits += 1 #print("Commit author:", logEntryTo.author) print("Commit date:", logEntryTo.date) #print("Commit message:", logEntryTo.msg) gitRepo.cleanDirsForced() # delete untracked directories and files if not gitRepo.workingDirectoryClean(): errorExit(gitRepo, "on branch", gitRepo.getCurrentBranch(), "not clean, numCommits:", numCommits) diffBaseRevision = logEntryTo.revision print('') # show empty line after commit info print("Added", numCommits, "commit(s) to branch", tempGitBranchName) if lastSvnRevision == diffBaseRevision: print(gitRepo, gitRepo.getCurrentBranch(), "up to date with", svnWorkingCopy, svnRepoBranchName) verifyGitFilesAgainstSvn(gitRepo, svnWorkingCopy) return if __name__ == "__main__": testMode = False # when true, leave branch where it is, as if the last commits from upstream did not arrive defaultMaxCommits = 20 maxCommits = defaultMaxCommits import sys argv = sys.argv[1:] while argv: if argv[0] == "test": testMode = True else: try: maxCommits = int(argv[0]) assert maxCommits >= 1 except: errorExit("Argument(s) [test] [maximum number of commits], defaults are false and " + defaultMaxCommits) argv = argv[1:] repo = "lucene-solr" branchName = "trunk" tempGitBranchName = branchName + ".svn" home = os.path.expanduser("~") svnWorkingCopiesPath = os.path.join(home, "svnwork") gitReposPath = os.path.join(home, "gitrepos") if sys.argv[0].startswith(svnWorkingCopiesPath) or sys.argv[0].startswith(gitReposPath): errorExit(sys.argv[0] + " cannot run from svn working copy or git working tree, copy to another place and run from there.") svnWorkingCopyOfBranchPath = os.path.join(svnWorkingCopiesPath, repo) svnRepoBranchName = "lucene/dev/" + branchName # for svn switch to gitRepoPath = os.path.join(gitReposPath, repo) gitUpstream = "upstream" maintainTempGitSvnBranch(branchName, tempGitBranchName, svnWorkingCopyOfBranchPath, svnRepoBranchName, gitRepoPath, gitUpstream, maxCommits=maxCommits, testMode=testMode)