mirror of https://github.com/apache/lucene.git
LUCENE-6922: latest version of svn to git mirror workaround script, from Paul Elschot
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1720686 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
8eb40736d5
commit
3972c0e9eb
|
@ -1,3 +1,12 @@
|
|||
from __future__ import print_function
|
||||
"""
|
||||
To be done:
|
||||
- Investigate whether it is possible to obtain the last svn revision number without switching to it.
|
||||
- Investigate file mode differences reported by gitk, see svn revision 171449.
|
||||
- simplify difference check to a single call to diff.
|
||||
Verify that all common files are equal, ignore non common files, check stderr and stdout of diff.
|
||||
"""
|
||||
|
||||
# Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
# contributor license agreements. See the NOTICE file distributed with
|
||||
# this work for additional information regarding copyright ownership.
|
||||
|
@ -29,21 +38,25 @@ the remote svn repo.
|
|||
|
||||
When this script is run it will first check that the local working copy and repository are clean.
|
||||
Then it switches the svn working copy to the branch, which updates from the remote.
|
||||
Then it the fetches branch from the git upstream repo, and merges the branch locally.
|
||||
Then it fetches the branch from the git upstream repo, and merges the branch locally.
|
||||
Normally the local svn and git will then be at the same svn revision, and the script will exit.
|
||||
|
||||
Otherwise the remote git repo is out of date, and the following happens.
|
||||
It is checked that the hostname and path and the uuid of the remote svn repo
|
||||
as reported by the local svn working copy and as reported by the local git repo
|
||||
are the same.
|
||||
|
||||
For the branch branchname in a local git repository following an upstream git-svn git repository,
|
||||
this maintains commits on a temporary git branch branchname.svn in the local git repository.
|
||||
These commits contain a message ending like this:
|
||||
"SvnRepoUrl diff -r EarlierSvnRevisionNumber:NextSvnRevisionNumber".
|
||||
Otherwise the messages of the added commits are the same as their counterparts from git svn.
|
||||
These commits contain metdata that differs slightly from git svn (svn2git-id: instead of git-svn-id:).
|
||||
Otherwise the messages of the added commits are the same as their counterparts from git svn,
|
||||
except occasionally for an added or missed empty line when the svn commit message ends in new line.
|
||||
|
||||
Normally the added git commits and their git-svn counterparts have no differences between their working trees.
|
||||
However such differences can occur, see also the documentation of git-svn reset and the limitations below.
|
||||
However such differences can occur, for example occasionally file modes are different in the git working tree.
|
||||
See also the documentation of git-svn reset and the limitations below.
|
||||
In order not to interfere with git-svn this script only adds commits to a temporary branch
|
||||
branchname.svn, and the commit messages are chosen differently, they do not contain git-svn-id.
|
||||
branchname.svn, and the commit messages are chosen differently, they do not contain git-svn-id: .
|
||||
|
||||
In case an earlier branchname.svn exists, it will first be deleted if necessary,
|
||||
and restarted at the later branch.
|
||||
|
@ -51,68 +64,98 @@ Therefore branchname.svn is temporary and should only be used locally.
|
|||
|
||||
By default, no more than 20 commits will be added to branchname.svn in a single run.
|
||||
|
||||
The earlier revision number is taken from the git-svn-id message of git svn,
|
||||
The earlier revision number is taken from the git-svn-id: message of git svn,
|
||||
or from the latest revision number in the commit message on branchname.svn,
|
||||
whichever is later.
|
||||
|
||||
This allows branchname.svn to be used as a local git branch instead of branchname
|
||||
to develop new features locally, for example by merging branchname.svn into a feature branch.
|
||||
"""
|
||||
|
||||
""" Limitations:
|
||||
This works by interpretation of the lines of svn update messages (U/A/D etc.)
|
||||
by copying these files and their protection bits from the local svn working copy into the git working tree,
|
||||
and by deleting files and directories in the git working tree.
|
||||
|
||||
This currently works by patching text, and therefore this does not work on binary files.
|
||||
An example commit in lucene-solr that adds a binary file, on which this currently does not work correctly:
|
||||
An example commit in lucene-solr that adds a binary file, on which this script provides a correct git working tree:
|
||||
svn revision 1707457
|
||||
git commit 3c0390f71e1f08a17f32bc207b4003362f8b6ac2
|
||||
git-svn commit 3c0390f71e1f08a17f32bc207b4003362f8b6ac2
|
||||
|
||||
When the local svn working copy contains file after updating to the latest available revision,
|
||||
and there is an interim commit that deletes this file, this file is left as an empty file in the working directory
|
||||
of the local git repository.
|
||||
|
||||
Limitations:
|
||||
|
||||
All svn properties are ignored here.
|
||||
Commit messages added to the git repo occasionally do not have the same number of empty lines
|
||||
as the corresponding svn commit message.
|
||||
"""
|
||||
|
||||
""" To be done:
|
||||
Take binary files from the patch, and check out binary files directly from the remote svn repo directly into the local git repo.
|
||||
|
||||
Going really far: checkout each interim svn revision, and use all (changed) files from there instead of the text diff.
|
||||
Determining all files under version control with svn (svn ls) is far too slow for this (esp. when compared to git ls-tree),
|
||||
so this is probably better done by using svnsync to setup a local mirror repository following the remote,
|
||||
and then using svnlook on the local mirror repository.
|
||||
Doing that only to avoid the limitations of this workaround does not appear to be worthwhile.
|
||||
"""
|
||||
|
||||
""" This was developed on Linux using the following program versions:
|
||||
python 2.7.6
|
||||
python 3.4.3
|
||||
git 1.9.1
|
||||
svn 1.8.8
|
||||
GNU bash, version 4.3.11(1)-release (x86_64-pc-linux-gnu)
|
||||
sed (GNU sed) 4.2.2
|
||||
grep (GNU grep) 2.16
|
||||
diff (GNU diffutils) 3.3
|
||||
cp (GNU coreutils) 8.21
|
||||
rm (GNU coreutils) 8.21
|
||||
mkdir (GNU coreutils) 8.21
|
||||
|
||||
gitk (part of git) was used for manual testing:
|
||||
- reset branch to an earlier commit to simulate a non working update from svn to git,
|
||||
- delete branchname.svn, reset branchname.svn to earlier,
|
||||
- diff a commit generated here to a commit from git svn,
|
||||
- update, reload, show commits by commit date, ...
|
||||
- delete branchname.svn, reset branchname.svn and branchname to earlier to simulate going back in history,
|
||||
- diff a commit generated here to a commit from git svn, ideally there are no differences,
|
||||
- update, reload, show commits in reverse order of commit date, ...
|
||||
"""
|
||||
|
||||
import os
|
||||
import subprocess
|
||||
import shutil
|
||||
|
||||
from xml import sax
|
||||
from xml.sax.handler import ContentHandler
|
||||
|
||||
import types
|
||||
try:
|
||||
from urllib.parse import urlparse # python 3
|
||||
except ImportError:
|
||||
from urlparse import urlparse # python 2
|
||||
|
||||
import sys
|
||||
binaryToString = sys.version_info >= (3, 0)
|
||||
|
||||
def decodeBytesToString(bytes):
|
||||
return bytes.decode("utf-8")
|
||||
|
||||
|
||||
class SvnInfoHandler(ContentHandler):
|
||||
commitTag = "commit"
|
||||
revisionAttr = "revision"
|
||||
|
||||
urlTag = "url"
|
||||
uuidTag = "uuid"
|
||||
charCollectTags = (urlTag, uuidTag) # also used as SvnInfoHandler attributes
|
||||
|
||||
def __init__(self):
|
||||
self.lastChangeRev = None
|
||||
self.lastLogEntry = None
|
||||
for tag in self.charCollectTags:
|
||||
setattr(self, tag, None)
|
||||
self.chars = None
|
||||
|
||||
def startElement(self, name, attrs):
|
||||
if name == "commit":
|
||||
if name == self.commitTag:
|
||||
self.lastChangeRev = int(attrs.getValue(self.revisionAttr))
|
||||
elif name in self.charCollectTags:
|
||||
self.chars = ""
|
||||
|
||||
def characters(self, content):
|
||||
if self.chars is not None:
|
||||
self.chars += content
|
||||
|
||||
def endElement(self, name):
|
||||
if name in self.charCollectTags:
|
||||
chars = self.chars
|
||||
setattr(self, name, chars)
|
||||
self.chars = None
|
||||
|
||||
def getLastChangeRevision(self):
|
||||
return self.lastChangeRev
|
||||
|
@ -151,7 +194,8 @@ class SvnLogHandler(ContentHandler): # collect list of SvnLogEntry's
|
|||
|
||||
def endElement(self, name):
|
||||
if name in self.charCollectTags:
|
||||
setattr(self.lastLogEntry, name, self.chars)
|
||||
chars = self.chars
|
||||
setattr(self.lastLogEntry, name, chars)
|
||||
self.chars = None
|
||||
return
|
||||
|
||||
|
@ -165,7 +209,6 @@ class SvnLogHandler(ContentHandler): # collect list of SvnLogEntry's
|
|||
|
||||
class SubProcessAtPath(object):
|
||||
def __init__(self, pathName, verbose=True):
|
||||
assert pathName != ""
|
||||
self.pathName = pathName
|
||||
self.verbose = verbose
|
||||
|
||||
|
@ -181,45 +224,77 @@ class SubProcessAtPath(object):
|
|||
return self.__class__.__name__ + "(" + self.pathName + ")"
|
||||
|
||||
def checkCall(self, *args, **kwArgs):
|
||||
assert type(*args) != types.StringType
|
||||
self.chDirToPath()
|
||||
if self.verbose:
|
||||
print "check_call args:", " ".join(*args)
|
||||
print("check_call args:", " ".join(*args), str(**kwArgs))
|
||||
subprocess.check_call(*args, **kwArgs)
|
||||
|
||||
def checkOutput(self, *args, **kwArgs):
|
||||
assert type(*args) != types.StringType
|
||||
self.chDirToPath()
|
||||
if self.verbose:
|
||||
print "check_output args:", " ".join(*args)
|
||||
print("check_output args:", " ".join(*args), str(**kwArgs))
|
||||
result = subprocess.check_output(*args, **kwArgs)
|
||||
if self.verbose:
|
||||
print "check_output result:", result
|
||||
print("check_output result:", result)
|
||||
return result
|
||||
|
||||
def checkOutputAsStr(self, *args, **kwArgs):
|
||||
self.chDirToPath()
|
||||
if self.verbose:
|
||||
print("check_output args:", " ".join(*args), str(**kwArgs))
|
||||
result = subprocess.check_output(*args, **kwArgs)
|
||||
if binaryToString:
|
||||
result = decodeBytesToString(result)
|
||||
if self.verbose:
|
||||
print("check_output result:", result)
|
||||
return result
|
||||
|
||||
def nonEmptyLines(text):
|
||||
return [line for line in text.split("\n") if len(line) > 0]
|
||||
|
||||
|
||||
|
||||
class SvnWorkingCopy(SubProcessAtPath):
|
||||
def __init__(self, pathName):
|
||||
SubProcessAtPath.__init__(self, pathName, verbose=False)
|
||||
self.url = None
|
||||
self.uuid = None
|
||||
|
||||
svnCmd = "svn"
|
||||
|
||||
def ensureNoLocalModifications(self):
|
||||
localMods = self.checkOutput((self.svnCmd, "status"))
|
||||
localMods = self.checkOutputAsStr((self.svnCmd, "status"))
|
||||
if localMods:
|
||||
errorExit(self, "should not have local modifications:\n", localMods)
|
||||
|
||||
def update(self):
|
||||
self.checkCall((self.svnCmd, "update"))
|
||||
def updateOutput(self, revision):
|
||||
result = self.checkOutputAsStr((self.svnCmd, "update", "-r", str(revision)))
|
||||
return result
|
||||
|
||||
def switch(self, repoBranchName):
|
||||
self.checkCall((self.svnCmd, "switch", ("^/" + repoBranchName)))
|
||||
|
||||
def lastChangedRevision(self):
|
||||
infoXml = self.checkOutput((self.svnCmd, "info", "--xml"))
|
||||
def parseInfo(self):
|
||||
infoXml = self.checkOutput((self.svnCmd, "info", "--xml")) # bytes in python 3.
|
||||
infoHandler = SvnInfoHandler()
|
||||
sax.parseString(infoXml, infoHandler)
|
||||
return infoHandler.getLastChangeRevision()
|
||||
self.uuid = infoHandler.uuid
|
||||
self.url = infoHandler.url
|
||||
self.lastChangeRev = infoHandler.getLastChangeRevision()
|
||||
|
||||
def getUrl(self):
|
||||
if self.url == None:
|
||||
self.parseInfo()
|
||||
return self.url
|
||||
|
||||
def getUuid(self):
|
||||
if self.uuid == None:
|
||||
self.parseInfo()
|
||||
return self.uuid
|
||||
|
||||
def lastChangedRevision(self):
|
||||
self.parseInfo()
|
||||
return self.lastChangeRev
|
||||
|
||||
def getLogEntries(self, fromRevision, toRevision, maxNumLogEntries):
|
||||
revRange = self.revisionsRange(fromRevision, toRevision)
|
||||
|
@ -231,29 +306,6 @@ class SvnWorkingCopy(SubProcessAtPath):
|
|||
def revisionsRange(self, fromRevision, toRevision):
|
||||
return str(fromRevision) + ":" + str(toRevision)
|
||||
|
||||
def createPatchFile(self, fromRevision, toRevision, patchFileName):
|
||||
revRange = self.revisionsRange(fromRevision, toRevision)
|
||||
patchFile = open(patchFileName, 'w')
|
||||
try:
|
||||
print "Creating patch from", self.pathName, "between revisions", revRange
|
||||
self.checkCall((self.svnCmd, "diff", "-r", revRange,
|
||||
"--ignore-properties"), # git apply can fail on svn properties.
|
||||
stdout=patchFile)
|
||||
finally:
|
||||
patchFile.close()
|
||||
print "Created patch file", patchFileName
|
||||
|
||||
def patchedFileNames(self, patchFileName): # return a sequence of the patched file names
|
||||
if os.path.getsize(patchFileName) == 0: # changed only svn properties, no files changed.
|
||||
return []
|
||||
|
||||
indexPrefix = "Index: "
|
||||
regExp = "^" + indexPrefix # at beginning of line
|
||||
patchedFileNamesLines = self.checkOutput(("grep", regExp, patchFileName)) # grep exits 1 whithout any match.
|
||||
indexPrefixLength = len(indexPrefix)
|
||||
return [line[indexPrefixLength:]
|
||||
for line in patchedFileNamesLines.split("\n")
|
||||
if len(line) > 0]
|
||||
|
||||
|
||||
class GitRepository(SubProcessAtPath):
|
||||
|
@ -269,20 +321,21 @@ class GitRepository(SubProcessAtPath):
|
|||
|
||||
def getCurrentBranch(self):
|
||||
if self.currentBranch is None:
|
||||
gitStatusOut = self.checkOutput((self.gitCmd, "status"))
|
||||
gitStatusOut = self.checkOutputAsStr((self.gitCmd, "status"))
|
||||
if gitStatusOut.startswith("On branch "):
|
||||
self.currentBranch = gitStatusOut.split[2]
|
||||
self.currentBranch = gitStatusOut.split()[2] # works also without () ???
|
||||
else:
|
||||
errorExit(self, "not on a branch:", gitStatusOut)
|
||||
return self.currentBranch
|
||||
|
||||
def workingDirectoryClean(self):
|
||||
gitStatusOut = self.checkOutput((self.gitCmd, "status"))
|
||||
gitStatusOut = self.checkOutputAsStr((self.gitCmd, "status"))
|
||||
expSubString = "nothing to commit, working directory clean"
|
||||
return gitStatusOut.find(expSubString) >= 0
|
||||
|
||||
def listBranches(self, pattern):
|
||||
return self.checkOutput((self.gitCmd, "branch", "--list", pattern))
|
||||
result = self.checkOutputAsStr((self.gitCmd, "branch", "--list", pattern))
|
||||
return result
|
||||
|
||||
def branchExists(self, branchName):
|
||||
listOut = self.listBranches(branchName) # CHECKME: using branchName as pattern may not always be ok.
|
||||
|
@ -303,56 +356,53 @@ class GitRepository(SubProcessAtPath):
|
|||
self.checkCall((self.gitCmd, "merge", branch, fromBranch))
|
||||
|
||||
def getCommitMessage(self, commitRef):
|
||||
return self.checkOutput((self.gitCmd, "log", "--format=%B", "-n", "1", commitRef))
|
||||
result = self.checkOutputAsStr((self.gitCmd, "log", "--format=%B", "-n", "1", commitRef))
|
||||
return result
|
||||
|
||||
def getCommitAuthorName(self, commitRef):
|
||||
return self.checkOutput((self.gitCmd, "log", "--format=%aN", "-n", "1", commitRef))
|
||||
result = self.checkOutputAsStr((self.gitCmd, "log", "--format=%aN", "-n", "1", commitRef))
|
||||
return result
|
||||
|
||||
def getCommitAuthorEmail(self, commitRef):
|
||||
return self.checkOutput((self.gitCmd, "log", "--format=%aE", "-n", "1", commitRef))
|
||||
result = self.checkOutputAsStr((self.gitCmd, "log", "--format=%aE", "-n", "1", commitRef))
|
||||
return result
|
||||
|
||||
def getLatestCommitForAuthor(self, svnAuthor):
|
||||
authorCommit = self.checkOutput(
|
||||
" ".join((self.gitCmd,
|
||||
"rev-list", "--all", "-i", ("--author=" + svnAuthor), # see git commit documentation on --author
|
||||
"|", # pipe should have a buffer for at most a few commit ids.
|
||||
"head", "-1")),
|
||||
shell=True) # use shell pipe
|
||||
# print('Get git commit for author "%s, type=%s"' % (svnAuthor, str(type(svnAuthor))))
|
||||
authorCommit = self.checkOutputAsStr(
|
||||
" ".join((self.gitCmd, "rev-list", "--all", "-i", ("--author=" + svnAuthor), # see git commit documentation on --author
|
||||
"|", # pipe should have a buffer for at most a few commit ids.
|
||||
"head", "-1" # the first line
|
||||
)),
|
||||
shell=True) # use shell pipe
|
||||
authorCommit = authorCommit.rstrip("\n")
|
||||
return authorCommit
|
||||
|
||||
def getSvnRemoteAndRevision(self, gitSvnCommitRef):
|
||||
gitSvnCommitMessage = self.getCommitMessage(gitSvnCommitRef)
|
||||
words = gitSvnCommitMessage.split();
|
||||
svnIdMarker = "git-svn-id:"
|
||||
assert words.index(svnIdMarker) >= 0
|
||||
svnId = words[words.index(svnIdMarker) + 1]
|
||||
gitSvnMarker = "git-svn-id:" # added and used by git svn dcommit
|
||||
svn2gitMarker = "svn2git-id:" # added and used here.
|
||||
|
||||
def getSvnRemoteUuidRevisionFromCommitMessage(self, commitMessage, marker):
|
||||
words = commitMessage.split()
|
||||
if not marker in words:
|
||||
return (None, None, None)
|
||||
svnId = words[words.index(marker) + 1]
|
||||
splitSvnId = svnId.split("@")
|
||||
svnRemote = splitSvnId[0]
|
||||
svnRevision = int(splitSvnId[1])
|
||||
return (svnRemote, svnRevision)
|
||||
svnRepoUuid = words[words.index(marker) + 2]
|
||||
return (svnRemote, svnRepoUuid, svnRevision)
|
||||
|
||||
def lastTempGitSvnRevision(self, branchName): # at a commit generated here on the temp branch.
|
||||
gitCommitMessage = self.getCommitMessage(branchName)
|
||||
parts = gitCommitMessage.split(":")
|
||||
lastPart = parts[-1].split()[0] # remove appended newlines
|
||||
try:
|
||||
return int(lastPart)
|
||||
except: # not generated here, ignore.
|
||||
print "Warning: svn revision range not found at end of commit message:\n", gitCommitMessage
|
||||
return None
|
||||
def getSvnRemoteAndUuidAndRevision(self, gitSvnCommitRef):
|
||||
gitSvnCommitMessage = self.getCommitMessage(gitSvnCommitRef)
|
||||
return self.getSvnRemoteUuidRevisionFromCommitMessage(gitSvnCommitMessage, self.gitSvnMarker)
|
||||
|
||||
def applyPatch(self, patchFileName, stripDepth):
|
||||
self.checkCall((self.gitCmd, "apply",
|
||||
("-p" + str(stripDepth)),
|
||||
"--whitespace=nowarn",
|
||||
patchFileName))
|
||||
def lastTempGitSvnRevision(self, tempBranchCommitRef): # at a commit generated here on the temp branch.
|
||||
gitCommitMessage = self.getCommitMessage(tempBranchCommitRef)
|
||||
(svnRemote, svnRepoUuid, svnRevision) = self.getSvnRemoteUuidRevisionFromCommitMessage(gitCommitMessage, self.svn2gitMarker)
|
||||
return svnRevision
|
||||
|
||||
def addAllToIndex(self):
|
||||
self.checkCall((self.gitCmd, "add", "-A"))
|
||||
|
||||
def deleteForced(self, fileName):
|
||||
self.checkCall((self.gitCmd, "rm", "-f", fileName))
|
||||
self.checkCall((self.gitCmd, "add", "-A", self.getPathName()))
|
||||
|
||||
def commit(self, message,
|
||||
authorName, authorEmail, authorDate,
|
||||
|
@ -362,13 +412,13 @@ class GitRepository(SubProcessAtPath):
|
|||
os.environ["GIT_COMMITTER_EMAIL"] = committerEmail
|
||||
os.environ["GIT_COMMITTER_DATE"] = committerDate
|
||||
self.checkCall((self.gitCmd, "commit",
|
||||
"--allow-empty", # only svn poperties changed.
|
||||
"--allow-empty", # in case only svn poperties changed.
|
||||
("--message=" + message),
|
||||
("--author=" + author),
|
||||
("--date=" + authorDate) ))
|
||||
|
||||
def cleanDirsForced(self):
|
||||
self.checkCall((self.gitCmd, "clean", "-fd"))
|
||||
self.checkCall((self.gitCmd, "clean", "-fd")) # Use -fdx to also remove ignored files.
|
||||
|
||||
|
||||
|
||||
|
@ -379,11 +429,197 @@ def errorExit(*messageParts):
|
|||
def allSuccessivePairs(lst):
|
||||
return [lst[i:i+2] for i in range(len(lst)-1)]
|
||||
|
||||
def octal(mode):
|
||||
return format(mode, 'o')
|
||||
|
||||
def checkEqualProtectionBits(fn1, fn2):
|
||||
stat1 = os.stat(fn1)
|
||||
stat2 = os.stat(fn2)
|
||||
if stat1.st_mode != stat2.st_mode:
|
||||
print("Protection bits %s of %s" % (octal(stat1.st_mode), fn1))
|
||||
print("Protection bits %s of %s" % (octal(stat2.st_mode), fn2))
|
||||
return False
|
||||
return True
|
||||
|
||||
|
||||
def verifyGitFilesAgainstSvn(gitRepo, svnWorkingCopy):
|
||||
# The files under version control at the git repo can be enumerated quickly by: git ls-tree -r trunk.svn | cut --fields=2-
|
||||
# This makes sense because all files, including binary files, are added.
|
||||
# svn ls -R is too slow to use here (this lists about 12 file names per second, lucene-solr has well over 4000).
|
||||
fileNamesOut = gitRepo.checkOutputAsStr((gitRepo.gitCmd, "ls-tree", "-r", "--name-only", gitRepo.getCurrentBranch()))
|
||||
fileNames = nonEmptyLines(fileNamesOut)
|
||||
print("verifyGitFilesAgainstSvn checking", len(fileNames), "files")
|
||||
result = True
|
||||
for fileName in fileNames:
|
||||
#print("fileName", fileName)
|
||||
fileNameInGitRepo = os.path.join(gitRepo.getPathName(), fileName)
|
||||
#print("fileNameInGitRepo", fileNameInGitRepo)
|
||||
fileNameInSvnWorkingCopy = os.path.join(svnWorkingCopy.getPathName(), fileName)
|
||||
#print("fileNameInSvnWorkingCopy", fileNameInSvnWorkingCopy)
|
||||
try:
|
||||
diffOutput = subprocess.check_output(("diff", "-q", fileNameInGitRepo, fileNameInSvnWorkingCopy))
|
||||
except (subprocess.CalledProcessError, exitError):
|
||||
print("difference in file", fileName)
|
||||
print("diff exitError", exitError)
|
||||
result = False
|
||||
|
||||
if not checkEqualProtectionBits(fileNameInSvnWorkingCopy, fileNameInGitRepo):
|
||||
result = False
|
||||
|
||||
if result:
|
||||
print("no differences")
|
||||
else:
|
||||
print("some differences")
|
||||
|
||||
"""
|
||||
On clean checkouts of both svn and git the command:
|
||||
diff -r svndir gitdir
|
||||
|
||||
reports only .svn .git and empty directories in the svn working copy, for example:
|
||||
|
||||
Only in ./svnwork/lucene-solr/lucene/analysis/icu: lib
|
||||
|
||||
This diff output could be checked here.
|
||||
To clean an svn working copy:
|
||||
|
||||
rm -r * # also .hgignore .caches, all except .svn
|
||||
svn update # this is a local svn operation
|
||||
|
||||
To clean a git working directory:
|
||||
|
||||
rm -r * # all except .git
|
||||
git checkout branchname -- .
|
||||
|
||||
"""
|
||||
|
||||
|
||||
def deleteEmptyDirs(pathName, topDirName):
|
||||
""" Delete higher level directories of pathName when empty, but do not delete topDirName """
|
||||
head, tail = os.path.split(pathName)
|
||||
while (head != topDirName) and not os.listdir(head):
|
||||
assert head.startswith(topDirName) # , topDirName + " <<>> " + head
|
||||
# subprocess.check_call(("rm", "-r", head)) # delete empty directory
|
||||
os.rmdir(head)
|
||||
head, tail = os.path.split(head)
|
||||
|
||||
|
||||
def setGitWorkingTreeViaSvnCheckout(svnWorkingCopy, revision, gitRepo):
|
||||
svnUpdateOutputLines = svnWorkingCopy.updateOutput(revision)
|
||||
""" Some example lines:
|
||||
U solr/solrj/src/test/org/apache/solr/client/solrj/io/sql/JdbcTest.java
|
||||
U solr/core
|
||||
Updated to revision 1707390.
|
||||
|
||||
From svn help update:
|
||||
|
||||
For each updated item a line will be printed with characters reporting
|
||||
the action taken. These characters have the following meaning:
|
||||
|
||||
A Added
|
||||
D Deleted
|
||||
U Updated
|
||||
C Conflict
|
||||
G Merged
|
||||
E Existed
|
||||
R Replaced
|
||||
|
||||
Characters in the first column report about the item itself.
|
||||
Characters in the second column report about properties of the item.
|
||||
A 'B' in the third column signifies that the lock for the file has
|
||||
been broken or stolen.
|
||||
A 'C' in the fourth column indicates a tree conflict, while a 'C' in
|
||||
the first and second columns indicate textual conflicts in files
|
||||
and in property values, respectively.
|
||||
|
||||
"""
|
||||
for svnUpdateLine in nonEmptyLines(svnUpdateOutputLines):
|
||||
|
||||
if svnUpdateLine.startswith("Updating "): # first line
|
||||
continue
|
||||
|
||||
if svnUpdateLine.startswith("Updated to"): # last line
|
||||
revisionStr = svnUpdateLine.split()[3][:-1]
|
||||
assert revision == int(revisionStr), revisionStr
|
||||
continue
|
||||
|
||||
print(svnUpdateLine)
|
||||
itemChar = svnUpdateLine[0]
|
||||
itemPropChar = svnUpdateLine[1]
|
||||
lockChar = svnUpdateLine[2]
|
||||
treeConflictChar = svnUpdateLine[3]
|
||||
fileName = svnUpdateLine[5:]
|
||||
|
||||
validItemChars = (" ", "A", "D", "U")
|
||||
assert itemChar in validItemChars, "revision %d itemChar %s, fileName %s" % (revision, itemChar, fileName)
|
||||
assert itemPropChar in validItemChars, "revision %d itemPropChar %s, working copy not clean fileName %s" % (revision, itemPropChar, fileName)
|
||||
assert lockChar == " ", "revision %d lockChar %s fileName %s" % (revision, lockChar, fileName)
|
||||
assert treeConflictChar == " ", "revision %d treeConflictChar %s fileName %s" % (revision, treeConflictChar, fileName)
|
||||
|
||||
fileNameInGitRepo = os.path.join(gitRepo.getPathName(), fileName)
|
||||
setFileProtectionBits = False
|
||||
if itemChar == "D": # deleted in svn working copy
|
||||
if os.path.isdir(fileNameInGitRepo):
|
||||
print("Deleting directory %s" % fileNameInGitRepo)
|
||||
# subprocess.check_call(("rm", "-r", fileNameInGitRepo)) # delete in git working tree
|
||||
shutil.rmtree(fileNameInGitRepo) # delete completely in git working tree
|
||||
deleteEmptyDirs(fileNameInGitRepo, gitRepo.getPathName()) # delete empty dirs in git repo
|
||||
elif os.path.isfile(fileNameInGitRepo):
|
||||
print("Deleting file %s" % fileNameInGitRepo)
|
||||
# subprocess.check_call(("rm", fileNameInGitRepo)) # delete in git working tree
|
||||
os.remove(fileNameInGitRepo)
|
||||
deleteEmptyDirs(fileNameInGitRepo, gitRepo.getPathName())
|
||||
else:
|
||||
print("Non deleting non existing file %s" % fileName)
|
||||
elif itemChar in ("A", "U"): # added or updated in svn working copy
|
||||
fileNameInSvnWorkingCopy = os.path.join(svnWorkingCopy.getPathName(), fileName)
|
||||
if os.path.isdir(fileNameInSvnWorkingCopy):
|
||||
if not os.path.isdir(fileNameInGitRepo):
|
||||
print("Creating directory %s" % fileName)
|
||||
#subprocess.check_call(("mkdir", fileNameInGitRepo)) # new directory in git working tree
|
||||
os.mkdir(fileNameInGitRepo)
|
||||
else:
|
||||
print("Not creating existing directory %s" % fileName)
|
||||
elif os.path.isfile(fileNameInSvnWorkingCopy):
|
||||
head, tail = os.path.split(fileNameInGitRepo)
|
||||
if not os.path.isdir(head):
|
||||
print("Creating directory for file %s" % fileNameInGitRepo)
|
||||
os.mkdir(head)
|
||||
# print("Copying file %s" % fileName # Common case)
|
||||
# subprocess.check_call(("cp", fileNameInSvnWorkingCopy, fileNameInGitRepo)) # copy into git working tree
|
||||
shutil.copyfile(fileNameInSvnWorkingCopy, fileNameInGitRepo)
|
||||
setFileProtectionBits = True
|
||||
else:
|
||||
assert False, "Cannot add or update non existing file %s" % fileNameInSvnWorkingCopy
|
||||
else:
|
||||
assert itemChar == " " # nothing to do
|
||||
|
||||
if itemPropChar != " ":
|
||||
print("At revision %d ignoring svn property change type %s for file %s" % (revision, itemPropChar, fileName))
|
||||
setFileProtectionBits = True # svn:executable may have been set or unset.
|
||||
|
||||
if setFileProtectionBits:
|
||||
statSvn = os.stat(fileNameInSvnWorkingCopy)
|
||||
statGit = os.stat(fileNameInGitRepo)
|
||||
if statSvn.st_mode != statGit.st_mode:
|
||||
print("Changing mode from %s to %s for %s" % (octal(statGit.st_mode), octal(statSvn.st_mode), fileNameInGitRepo))
|
||||
os.chmod(fileNameInGitRepo, statSvn.st_mode)
|
||||
|
||||
|
||||
def assertUrlsSameExceptScheme(url1, url2): # may only differ by scheme http:// or https://
|
||||
scheme1, netloc1, path1, params1, query1, fragment1 = urlparse(url1)
|
||||
scheme2, netloc2, path2, params2, query2, fragment2 = urlparse(url2)
|
||||
#print(scheme1, netloc1, path1, params1, query1, fragment1)
|
||||
#print(scheme2, netloc2, path2, params2, query2, fragment2)
|
||||
assert netloc1 == netloc2
|
||||
assert path1 == path2
|
||||
assert params1 == params2
|
||||
assert query1 == query2
|
||||
assert fragment1 == fragment2
|
||||
|
||||
|
||||
def maintainTempGitSvnBranch(branchName, tempGitBranchName,
|
||||
svnWorkingCopyOfBranchPath, svnRepoBranchName,
|
||||
gitRepoPath, gitUpstream,
|
||||
patchFileName,
|
||||
maxCommits=20, # generate at most this number of commits on tempGitBranchName, rerun to add more.
|
||||
testMode=False):
|
||||
|
||||
|
@ -396,8 +632,8 @@ def maintainTempGitSvnBranch(branchName, tempGitBranchName,
|
|||
svnWorkingCopy.ensureNoLocalModifications()
|
||||
svnWorkingCopy.switch(svnRepoBranchName) # switch to repo branch, update to latest revision
|
||||
|
||||
lastSvnRevision = svnWorkingCopy.lastChangedRevision()
|
||||
# print svnWorkingCopy, "lastSvnRevision:", lastSvnRevision
|
||||
lastSvnRevision = svnWorkingCopy.lastChangedRevision() # int to allow comparison
|
||||
#print(svnWorkingCopy, "lastSvnRevision:", lastSvnRevision)
|
||||
|
||||
gitRepo.fetch(gitUpstream)
|
||||
if testMode:
|
||||
|
@ -405,9 +641,14 @@ def maintainTempGitSvnBranch(branchName, tempGitBranchName,
|
|||
else:
|
||||
gitRepo.merge(branchName, gitUpstream + "/" + branchName)
|
||||
|
||||
(svnRemote, lastSvnRevisionOnGitSvnBranch) = gitRepo.getSvnRemoteAndRevision(branchName)
|
||||
print "svnRemote:", svnRemote
|
||||
#print gitRepo, branchName, "lastSvnRevisionOnGitSvnBranch:", lastSvnRevisionOnGitSvnBranch
|
||||
(gitSvnRemote, gitSvnRepoUuid, lastSvnRevisionOnGitSvnBranch) = gitRepo.getSvnRemoteAndUuidAndRevision(branchName)
|
||||
svnUrl = svnWorkingCopy.getUrl()
|
||||
svnRepoUuid = svnWorkingCopy.getUuid()
|
||||
print("gitSvnRemote:", gitSvnRemote)
|
||||
print("svnUrl:", svnUrl)
|
||||
print("svn repo uuid:", svnRepoUuid)
|
||||
assertUrlsSameExceptScheme(gitSvnRemote, svnUrl)
|
||||
assert gitSvnRepoUuid == svnRepoUuid
|
||||
|
||||
# check whether tempGitBranchName exists:
|
||||
diffBaseRevision = lastSvnRevisionOnGitSvnBranch
|
||||
|
@ -415,11 +656,11 @@ def maintainTempGitSvnBranch(branchName, tempGitBranchName,
|
|||
doCommitOnExistingTempBranch = False
|
||||
|
||||
if gitRepo.branchExists(tempGitBranchName):
|
||||
print tempGitBranchName, "exists"
|
||||
print(tempGitBranchName, "exists")
|
||||
# update lastSvnRevisionOnGitSvnBranch from there.
|
||||
svnTempRevision = gitRepo.lastTempGitSvnRevision(tempGitBranchName)
|
||||
if svnTempRevision is None:
|
||||
print "Warning: no svn revision found on branch:", tempGitBranchName
|
||||
print("Warning: no svn revision found on branch:", tempGitBranchName)
|
||||
else:
|
||||
if svnTempRevision > lastSvnRevisionOnGitSvnBranch:
|
||||
diffBaseRevision = svnTempRevision
|
||||
|
@ -427,108 +668,85 @@ def maintainTempGitSvnBranch(branchName, tempGitBranchName,
|
|||
gitRepo.checkOutBranch(tempGitBranchName)
|
||||
|
||||
if lastSvnRevision == diffBaseRevision:
|
||||
print gitRepo, gitRepo.getCurrentBranch(), "up to date with", svnWorkingCopy, svnRepoBranchName
|
||||
print(gitRepo, gitRepo.getCurrentBranch(), "up to date with", svnWorkingCopy, svnRepoBranchName)
|
||||
verifyGitFilesAgainstSvn(gitRepo, svnWorkingCopy)
|
||||
return
|
||||
|
||||
if lastSvnRevision < diffBaseRevision: # unlikely, do nothing
|
||||
print gitRepo, gitRepo.getCurrentBranch(), "later than", svnWorkingCopy, ", nothing to update."
|
||||
# CHECK: generate svn commits from the git commits?
|
||||
print(gitRepo, gitRepo.getCurrentBranch(), "later than", svnWorkingCopy, ", nothing to update.")
|
||||
return
|
||||
|
||||
print gitRepo, gitRepo.getCurrentBranch(), "earlier than", svnWorkingCopy
|
||||
print(gitRepo, gitRepo.getCurrentBranch(), "earlier than", svnWorkingCopy)
|
||||
|
||||
if not gitRepo.workingDirectoryClean():
|
||||
errorExit(gitRepo, "on branch", gitRepo.getCurrentBranch(), "not clean")
|
||||
|
||||
print gitRepo,"on branch", gitRepo.getCurrentBranch(), "and clean"
|
||||
print(gitRepo,"on branch", gitRepo.getCurrentBranch(), "and clean")
|
||||
|
||||
if not doCommitOnExistingTempBranch: # restart temp branch from branch
|
||||
assert gitRepo.getCurrentBranch() == branchName
|
||||
if gitRepo.branchExists(tempGitBranchName): # tempGitBranchName exists, delete it first.
|
||||
print "Branch", tempGitBranchName, "exists, deleting"
|
||||
print("Branch", tempGitBranchName, "exists, deleting")
|
||||
gitRepo.deleteBranch(tempGitBranchName)
|
||||
if gitRepo.branchExists(tempGitBranchName):
|
||||
errorExit("Could not delete branch", tempGitBranchName, "from", gitRepo)
|
||||
|
||||
gitRepo.createBranch(tempGitBranchName)
|
||||
gitRepo.checkOutBranch(tempGitBranchName)
|
||||
print "Started branch", tempGitBranchName, "at", branchName
|
||||
print("Started branch", tempGitBranchName, "at", branchName)
|
||||
|
||||
assert gitRepo.getCurrentBranch() == tempGitBranchName
|
||||
|
||||
patchStripDepth = 0 # patch generated at svn repo.
|
||||
|
||||
maxNumLogEntries = maxCommits + 1
|
||||
svnLogEntries = svnWorkingCopy.getLogEntries(diffBaseRevision, lastSvnRevision, maxNumLogEntries)
|
||||
|
||||
numCommits = 0
|
||||
|
||||
startRevision = svnLogEntries[0].revision
|
||||
ignore = svnWorkingCopy.updateOutput(startRevision)
|
||||
|
||||
for (logEntryFrom, logEntryTo) in allSuccessivePairs(svnLogEntries):
|
||||
# create patch file from svn between the revisions:
|
||||
svnWorkingCopy.createPatchFile(logEntryFrom.revision, logEntryTo.revision, patchFileName)
|
||||
setGitWorkingTreeViaSvnCheckout(svnWorkingCopy, logEntryTo.revision, gitRepo)
|
||||
|
||||
patchedFileNames = svnWorkingCopy.patchedFileNames(patchFileName)
|
||||
|
||||
if os.path.getsize(patchFileName) > 0:
|
||||
gitRepo.applyPatch(patchFileName, patchStripDepth)
|
||||
print "Applied patch", patchFileName
|
||||
else: # only svn properties changed, do git commit for commit info only.
|
||||
print "Empty patch", patchFileName
|
||||
|
||||
gitRepo.addAllToIndex() # add all patch changes to the git index to be committed.
|
||||
|
||||
# Applying the patch leaves files that have been actually deleted at zero size.
|
||||
# Therefore delete empty patched files from the git repo that do not exist in svn working copy:
|
||||
for patchedFileName in patchedFileNames:
|
||||
fileNameInGitRepo = os.path.join(gitRepo.getPathName(), patchedFileName)
|
||||
fileNameInSvnWorkingCopy = os.path.join(svnWorkingCopy.getPathName(), patchedFileName)
|
||||
|
||||
if os.path.isdir(fileNameInGitRepo):
|
||||
# print "Directory:", fileNameInGitRepo
|
||||
continue
|
||||
|
||||
if not os.path.isfile(fileNameInGitRepo):
|
||||
print "Possibly new binary file in svn, ignored here:", fileNameInGitRepo
|
||||
# FIXME: Take a new binary file out of the svn repository directly.
|
||||
continue
|
||||
|
||||
fileSize = os.path.getsize(fileNameInGitRepo)
|
||||
if fileSize > 0:
|
||||
# print "Non empty file patched normally:", fileNameInGitRepo
|
||||
continue
|
||||
|
||||
# fileNameInGitRepo exists and is empty
|
||||
if os.path.isfile(fileNameInSvnWorkingCopy):
|
||||
# FIXME: this only works correctly when the svn working copy is hecked out at the target revision.
|
||||
print "Left empty file:", fileNameInGitRepo
|
||||
continue
|
||||
|
||||
gitRepo.deleteForced(fileNameInGitRepo) # force, the file is not up to date. This also stages the delete for commit.
|
||||
# print "Deleted empty file", fileNameInGitRepo # not needed, git rm is verbose enough
|
||||
gitRepo.addAllToIndex() # add all changes from the git working tree to the git index.
|
||||
|
||||
# commit, put toRevision at end so it can be picked up later.
|
||||
revisionsRange = svnWorkingCopy.revisionsRange(logEntryFrom.revision, logEntryTo.revision)
|
||||
message = logEntryTo.msg + "\n\n" + svnRemote + " diff -r " + revisionsRange
|
||||
|
||||
commitMessageMetaData = gitRepo.svn2gitMarker + " " + gitSvnRemote + "@" + str(logEntryTo.revision) + " " + gitSvnRepoUuid
|
||||
# git-svn adds this commit metadata:
|
||||
# git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1719562 13f79535-47bb-0310-9956-ffa450edef68
|
||||
# This script uses svn2git-id: instead of git-svn-id:
|
||||
|
||||
message = logEntryTo.msg + "\n\n" + commitMessageMetaData
|
||||
|
||||
authorCommit = gitRepo.getLatestCommitForAuthor(logEntryTo.author)
|
||||
authorName = gitRepo.getCommitAuthorName(authorCommit)
|
||||
authorEmail = gitRepo.getCommitAuthorEmail(authorCommit)
|
||||
# print "Author name and email:", authorName, authorEmail
|
||||
# print("Author name and email:", authorName, authorEmail)
|
||||
gitRepo.commit(message,
|
||||
authorName, authorEmail, logEntryTo.date,
|
||||
authorName, authorEmail, logEntryTo.date) # author is also git committer, just like git-svn
|
||||
|
||||
numCommits += 1
|
||||
|
||||
# print "Commit author:", logEntryTo.author
|
||||
# print "Commit date:", logEntryTo.date
|
||||
print "Commit message:", logEntryTo.msg
|
||||
#print("Commit author:", logEntryTo.author)
|
||||
print("Commit date:", logEntryTo.date)
|
||||
#print("Commit message:", logEntryTo.msg)
|
||||
|
||||
gitRepo.cleanDirsForced() # delete untracked directories and files
|
||||
|
||||
if not gitRepo.workingDirectoryClean():
|
||||
errorExit(gitRepo, "on branch", gitRepo.getCurrentBranch(), "not clean, numCommits:", numCommits)
|
||||
|
||||
print "Added", numCommits, "commit(s) to branch", tempGitBranchName
|
||||
diffBaseRevision = logEntryTo.revision
|
||||
print('') # show empty line after commit info
|
||||
|
||||
print("Added", numCommits, "commit(s) to branch", tempGitBranchName)
|
||||
|
||||
if lastSvnRevision == diffBaseRevision:
|
||||
print(gitRepo, gitRepo.getCurrentBranch(), "up to date with", svnWorkingCopy, svnRepoBranchName)
|
||||
verifyGitFilesAgainstSvn(gitRepo, svnWorkingCopy)
|
||||
return
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
@ -547,7 +765,7 @@ if __name__ == "__main__":
|
|||
maxCommits = int(argv[0])
|
||||
assert maxCommits >= 1
|
||||
except:
|
||||
errorExit("Argument(s) should be test and/or a maximum number of commits, defaults are false and " + defaultMaxCommits)
|
||||
errorExit("Argument(s) [test] [maximum number of commits], defaults are false and " + defaultMaxCommits)
|
||||
argv = argv[1:]
|
||||
|
||||
repo = "lucene-solr"
|
||||
|
@ -556,17 +774,14 @@ if __name__ == "__main__":
|
|||
|
||||
home = os.path.expanduser("~")
|
||||
|
||||
svnWorkingCopyOfBranchPath = os.path.join(home, "svnwork", repo, branchName)
|
||||
svnWorkingCopyOfBranchPath = os.path.join(home, "svnwork", repo)
|
||||
svnRepoBranchName = "lucene/dev/" + branchName # for svn switch to
|
||||
|
||||
gitRepo = os.path.join(home, "gitrepos", repo)
|
||||
gitRepoPath = os.path.join(home, "gitrepos", repo)
|
||||
gitUpstream = "upstream"
|
||||
|
||||
patchFileName = os.path.join(home, "patches", tempGitBranchName)
|
||||
|
||||
maintainTempGitSvnBranch(branchName, tempGitBranchName,
|
||||
svnWorkingCopyOfBranchPath, svnRepoBranchName,
|
||||
gitRepo, gitUpstream,
|
||||
patchFileName,
|
||||
gitRepoPath, gitUpstream,
|
||||
maxCommits=maxCommits,
|
||||
testMode=testMode)
|
||||
|
|
Loading…
Reference in New Issue