From ae956db41c4b40d3ea7c028d6abe9b71da1ae74c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=C3=B8ydahl?= Date: Wed, 13 Oct 2021 15:24:14 +0200 Subject: [PATCH] LUCENE-9997 Revisit smoketester for 9.0 build (#355) * LUCENE-9997 Revisit smoketester for 9.0 build * Remove checkBrokenLinks * Add back checkBrokenLinks * Review feedback. Remove traces of solr-specific testNotice() method Move backCompat test up to other "if isSrc" block * Review feedback. Bring back the 'checkMaven()' method, as it checks lucene maven artifacts. But since we dont have pom template files anymore, no need to compare with templates * Review feedback. Fix script compatibility by comparing against X.Y instead of X.Y.Z * Review feedback. Remove unnecessary if lucene test Convert some ant commands to gradle * Update MANIFEST tests to match the gradle-produced manifest * LUCENE-10107 Read multi-line commit from Manifest Backport from branch_8x * Collapse for project in 'lucene' loops and methods taking 'project' as argument Disable checkJavadocLinks, as this dependency no longer exists in 'scripts' folder * Review feedback - fix more ant stuff, convert to gradle equivalent * Review feedback: Refactor file open * Comment out javadoc generation - was only used to check broken links? * Fix charset of gpg console output to always be utf-8 Fix two more places to use with open() * Accept 'LICENSE' without txt or md suffix in top-level * Disable vector dictionary abuse exception if started with -Dsmoketester * Reformat code * Use -Dsmoketester flag when invoking IndexFiles --- dev-tools/scripts/buildAndPushRelease.py | 6 +- dev-tools/scripts/smokeTestRelease.py | 892 ++++++------------ .../org/apache/lucene/demo/IndexFiles.java | 4 +- 3 files changed, 315 insertions(+), 587 deletions(-) diff --git a/dev-tools/scripts/buildAndPushRelease.py b/dev-tools/scripts/buildAndPushRelease.py index 7f573c133bc..7593ad9dfd7 100755 --- a/dev-tools/scripts/buildAndPushRelease.py +++ b/dev-tools/scripts/buildAndPushRelease.py @@ -329,7 +329,11 @@ def main(): print('Next run the smoker tester:') p = re.compile(".*/") m = p.match(sys.argv[0]) - print('%s -u %ssmokeTestRelease.py %s' % (sys.executable, m.group(), url)) + if not c.sign: + signed = "--not-signed" + else: + signed = "" + print('%s -u %ssmokeTestRelease.py %s %s' % (sys.executable, m.group(), signed, url)) if __name__ == '__main__': try: diff --git a/dev-tools/scripts/smokeTestRelease.py b/dev-tools/scripts/smokeTestRelease.py index eca6e65d821..c07a40945e9 100755 --- a/dev-tools/scripts/smokeTestRelease.py +++ b/dev-tools/scripts/smokeTestRelease.py @@ -35,19 +35,19 @@ import urllib.parse import urllib.request import xml.etree.ElementTree as ET import zipfile -from collections import defaultdict from collections import namedtuple import scriptutil -import checkJavadocLinks +# import checkJavadocLinks -# This tool expects to find /lucene and /solr off the base URL. You +# This tool expects to find /lucene off the base URL. You # must have a working gpg, tar, unzip in your path. This has been # tested on Linux and on Cygwin under Windows 7. cygwin = platform.system().lower().startswith('cygwin') cygwinWindowsRoot = os.popen('cygpath -w /').read().strip().replace('\\','/') if cygwin else '' + def unshortenURL(url): parsed = urllib.parse.urlparse(url) if parsed[0] in ('http', 'https'): @@ -56,7 +56,7 @@ def unshortenURL(url): response = h.getresponse() if int(response.status/100) == 3 and response.getheader('Location'): return response.getheader('Location') - return url + return url # TODO # + verify KEYS contains key that signed the release @@ -67,7 +67,6 @@ def unshortenURL(url): # - check license/notice exist # - check no "extra" files # - make sure jars exist inside bin release -# - run "ant test" # - make sure docs exist # - use java5 for lucene/modules @@ -76,6 +75,7 @@ reHREF = re.compile('(.*?)') # Set to False to avoid re-downloading the packages... FORCE_CLEAN = True + def getHREFs(urlString): # Deref any redirects @@ -102,7 +102,7 @@ def getHREFs(urlString): print('\nFAILED to open url %s' % urlString) traceback.print_exc() raise - + for subUrl, text in reHREF.findall(html): fullURL = urllib.parse.urljoin(urlString, subUrl) links.append((text, fullURL)) @@ -117,19 +117,23 @@ def load(urlString): content = urllib.request.urlopen(urlString).read().decode('utf-8') return content + def noJavaPackageClasses(desc, file): with zipfile.ZipFile(file) as z2: for name2 in z2.namelist(): if name2.endswith('.class') and (name2.startswith('java/') or name2.startswith('javax/')): - raise RuntimeError('%s contains sheisty class "%s"' % (desc, name2)) + raise RuntimeError('%s contains sheisty class "%s"' % (desc, name2)) + def decodeUTF8(bytes): return codecs.getdecoder('UTF-8')(bytes)[0] + MANIFEST_FILE_NAME = 'META-INF/MANIFEST.MF' NOTICE_FILE_NAME = 'META-INF/NOTICE.txt' LICENSE_FILE_NAME = 'META-INF/LICENSE.txt' + def checkJARMetaData(desc, jarFile, gitRevision, version): with zipfile.ZipFile(jarFile, 'r') as z: @@ -141,21 +145,19 @@ def checkJARMetaData(desc, jarFile, gitRevision, version): raise RuntimeError('%s is missing %s' % (desc, name)) except KeyError: raise RuntimeError('%s is missing %s' % (desc, name)) - + s = decodeUTF8(z.read(MANIFEST_FILE_NAME)) - + for verify in ( 'Specification-Vendor: The Apache Software Foundation', 'Implementation-Vendor: The Apache Software Foundation', - # Make sure 1.8 compiler was used to build release bits: + 'Specification-Title: Lucene Search Engine:', + 'Implementation-Title: org.apache.lucene', 'X-Compile-Source-JDK: 11', - # Make sure 1.8, 1.9 or 1.10 ant was used to build release bits: (this will match 1.8.x, 1.9.x, 1.10.x) - ('Ant-Version: Apache Ant 1.8', 'Ant-Version: Apache Ant 1.9', 'Ant-Version: Apache Ant 1.10'), - # Make sure .class files are 1.8 format: 'X-Compile-Target-JDK: 11', 'Specification-Version: %s' % version, - # Make sure the release was compiled with 1.8: - 'Created-By: 11'): + 'X-Build-JDK: 11.', + 'Extension-Name: org.apache.lucene'): if type(verify) is not tuple: verify = (verify,) for x in verify: @@ -169,119 +171,86 @@ def checkJARMetaData(desc, jarFile, gitRevision, version): if gitRevision != 'skip': # Make sure this matches the version and git revision we think we are releasing: - # TODO: LUCENE-7023: is it OK that Implementation-Version's value now spans two lines? - verifyRevision = 'Implementation-Version: %s %s' % (version, gitRevision) - if s.find(verifyRevision) == -1: - raise RuntimeError('%s is missing "%s" inside its META-INF/MANIFEST.MF (wrong git revision?)' % \ + match = re.search("Implementation-Version: (.+\r\n .+)", s, re.MULTILINE) + if match: + implLine = match.group(1).replace("\r\n ", "") + verifyRevision = '%s %s' % (version, gitRevision) + if implLine.find(verifyRevision) == -1: + raise RuntimeError('%s is missing "%s" inside its META-INF/MANIFEST.MF (wrong git revision?)' % \ (desc, verifyRevision)) + else: + raise RuntimeError('%s is missing Implementation-Version inside its META-INF/MANIFEST.MF' % desc) notice = decodeUTF8(z.read(NOTICE_FILE_NAME)) - license = decodeUTF8(z.read(LICENSE_FILE_NAME)) + lucene_license = decodeUTF8(z.read(LICENSE_FILE_NAME)) + + if LUCENE_LICENSE is None: + raise RuntimeError('BUG in smokeTestRelease!') + if LUCENE_NOTICE is None: + raise RuntimeError('BUG in smokeTestRelease!') + if notice != LUCENE_NOTICE: + raise RuntimeError('%s: %s contents doesn\'t match main NOTICE.txt' % \ + (desc, NOTICE_FILE_NAME)) + if lucene_license != LUCENE_LICENSE: + raise RuntimeError('%s: %s contents doesn\'t match main LICENSE.txt' % \ + (desc, LICENSE_FILE_NAME)) - justFileName = os.path.split(desc)[1] - - if justFileName.lower().find('solr') != -1: - if SOLR_LICENSE is None: - raise RuntimeError('BUG in smokeTestRelease!') - if SOLR_NOTICE is None: - raise RuntimeError('BUG in smokeTestRelease!') - if notice != SOLR_NOTICE: - raise RuntimeError('%s: %s contents doesn\'t match main NOTICE.txt' % \ - (desc, NOTICE_FILE_NAME)) - if license != SOLR_LICENSE: - raise RuntimeError('%s: %s contents doesn\'t match main LICENSE.txt' % \ - (desc, LICENSE_FILE_NAME)) - else: - if LUCENE_LICENSE is None: - raise RuntimeError('BUG in smokeTestRelease!') - if LUCENE_NOTICE is None: - raise RuntimeError('BUG in smokeTestRelease!') - if notice != LUCENE_NOTICE: - raise RuntimeError('%s: %s contents doesn\'t match main NOTICE.txt' % \ - (desc, NOTICE_FILE_NAME)) - if license != LUCENE_LICENSE: - raise RuntimeError('%s: %s contents doesn\'t match main LICENSE.txt' % \ - (desc, LICENSE_FILE_NAME)) def normSlashes(path): return path.replace(os.sep, '/') - -def checkAllJARs(topDir, project, gitRevision, version, tmpDir, baseURL): + +def checkAllJARs(topDir, gitRevision, version): print(' verify JAR metadata/identity/no javax.* or java.* classes...') - if project == 'solr': - luceneDistFilenames = dict() - for file in getBinaryDistFiles('lucene', tmpDir, version, baseURL): - luceneDistFilenames[os.path.basename(file)] = file for root, dirs, files in os.walk(topDir): normRoot = normSlashes(root) - if project == 'solr' and normRoot.endswith('/server/lib'): - # Solr's example intentionally ships servlet JAR: - continue - for file in files: if file.lower().endswith('.jar'): - if project == 'solr': - if ((normRoot.endswith('/test-framework/lib') and file.startswith('jersey-')) - or (normRoot.endswith('/contrib/extraction/lib') and file.startswith('xml-apis-'))): - print(' **WARNING**: skipping check of %s/%s: it has javax.* classes' % (root, file)) - continue - else: - if normRoot.endswith('/replicator/lib') and file.startswith('javax.servlet'): - continue + if normRoot.endswith('/replicator/lib') and file.startswith('javax.servlet'): + continue fullPath = '%s/%s' % (root, file) noJavaPackageClasses('JAR file "%s"' % fullPath, fullPath) - if file.lower().find('lucene') != -1 or file.lower().find('solr') != -1: + if file.lower().find('lucene') != -1: checkJARMetaData('JAR file "%s"' % fullPath, fullPath, gitRevision, version) - if project == 'solr' and file.lower().find('lucene') != -1: - jarFilename = os.path.basename(file) - if jarFilename not in luceneDistFilenames: - raise RuntimeError('Artifact %s is not present in Lucene binary distribution' % fullPath) - identical = filecmp.cmp(fullPath, luceneDistFilenames[jarFilename], shallow=False) - if not identical: - raise RuntimeError('Artifact %s is not identical to %s in Lucene binary distribution' - % (fullPath, luceneDistFilenames[jarFilename])) -def checkSigs(project, urlString, version, tmpDir, isSigned, keysFile): - +def checkSigs(urlString, version, tmpDir, isSigned, keysFile): print(' test basics...') ents = getDirEntries(urlString) artifact = None changesURL = None mavenURL = None + artifactURL = None expectedSigs = [] if isSigned: expectedSigs.append('asc') expectedSigs.extend(['sha512']) - + sigs = [] artifacts = [] + for text, subURL in ents: if text == 'KEYS': - raise RuntimeError('%s: release dir should not contain a KEYS file - only toplevel /dist/lucene/KEYS is used' % project) + raise RuntimeError('lucene: release dir should not contain a KEYS file - only toplevel /dist/lucene/KEYS is used') elif text == 'maven/': mavenURL = subURL elif text.startswith('changes'): if text not in ('changes/', 'changes-%s/' % version): - raise RuntimeError('%s: found %s vs expected changes-%s/' % (project, text, version)) + raise RuntimeError('lucene: found %s vs expected changes-%s/' % (text, version)) changesURL = subURL - elif artifact == None: + elif artifact is None: artifact = text artifactURL = subURL - if project == 'solr': - expected = 'solr-%s' % version - else: - expected = 'lucene-%s' % version + expected = 'lucene-%s' % version if not artifact.startswith(expected): - raise RuntimeError('%s: unknown artifact %s: expected prefix %s' % (project, text, expected)) + raise RuntimeError('lucene: unknown artifact %s: expected prefix %s' % (text, expected)) sigs = [] elif text.startswith(artifact + '.'): sigs.append(text[len(artifact)+1:]) else: if sigs != expectedSigs: - raise RuntimeError('%s: artifact %s has wrong sigs: expected %s but got %s' % (project, artifact, expectedSigs, sigs)) + raise RuntimeError('lucene: artifact %s has wrong sigs: expected %s but got %s' % (artifact, expectedSigs, sigs)) artifacts.append((artifact, artifactURL)) artifact = text artifactURL = subURL @@ -290,35 +259,30 @@ def checkSigs(project, urlString, version, tmpDir, isSigned, keysFile): if sigs != []: artifacts.append((artifact, artifactURL)) if sigs != expectedSigs: - raise RuntimeError('%s: artifact %s has wrong sigs: expected %s but got %s' % (project, artifact, expectedSigs, sigs)) + raise RuntimeError('lucene: artifact %s has wrong sigs: expected %s but got %s' % (artifact, expectedSigs, sigs)) - if project == 'lucene': - expected = ['lucene-%s-src.tgz' % version, - 'lucene-%s.tgz' % version, - 'lucene-%s.zip' % version] - else: - expected = ['solr-%s-src.tgz' % version, - 'solr-%s.tgz' % version, - 'solr-%s.zip' % version] + expected = ['lucene-%s-src.tgz' % version, + 'lucene-%s.tgz' % version, + 'lucene-%s.zip' % version] actual = [x[0] for x in artifacts] if expected != actual: - raise RuntimeError('%s: wrong artifacts: expected %s but got %s' % (project, expected, actual)) - + raise RuntimeError('lucene: wrong artifacts: expected %s but got %s' % (expected, actual)) + # Set up clean gpg world; import keys file: - gpgHomeDir = '%s/%s.gpg' % (tmpDir, project) + gpgHomeDir = '%s/lucene.gpg' % tmpDir if os.path.exists(gpgHomeDir): shutil.rmtree(gpgHomeDir) os.makedirs(gpgHomeDir, 0o700) run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile), - '%s/%s.gpg.import.log' % (tmpDir, project)) + '%s/lucene.gpg.import.log' % tmpDir) if mavenURL is None: - raise RuntimeError('%s is missing maven' % project) + raise RuntimeError('lucene is missing maven') if changesURL is None: - raise RuntimeError('%s is missing changes-%s' % (project, version)) - testChanges(project, version, changesURL) + raise RuntimeError('lucene is missing changes-%s' % version) + testChanges(version, changesURL) for artifact, urlString in artifacts: print(' download %s...' % artifact) @@ -331,31 +295,31 @@ def checkSigs(project, urlString, version, tmpDir, isSigned, keysFile): scriptutil.download(artifact + '.asc', urlString + '.asc', tmpDir, force_clean=FORCE_CLEAN) sigFile = '%s/%s.asc' % (tmpDir, artifact) artifactFile = '%s/%s' % (tmpDir, artifact) - logFile = '%s/%s.%s.gpg.verify.log' % (tmpDir, project, artifact) - run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile), + logFile = '%s/lucene.%s.gpg.verify.log' % (tmpDir, artifact) + run('gpg --homedir %s --display-charset utf-8 --verify %s %s' % (gpgHomeDir, sigFile, artifactFile), logFile) # Forward any GPG warnings, except the expected one (since it's a clean world) - f = open(logFile) - for line in f.readlines(): - if line.lower().find('warning') != -1 \ - and line.find('WARNING: This key is not certified with a trusted signature') == -1: - print(' GPG: %s' % line.strip()) - f.close() + with open(logFile) as f: + print("File: %s" % logFile) + for line in f.readlines(): + if line.lower().find('warning') != -1 \ + and line.find('WARNING: This key is not certified with a trusted signature') == -1: + print(' GPG: %s' % line.strip()) # Test trust (this is done with the real users config) run('gpg --import %s' % (keysFile), - '%s/%s.gpg.trust.import.log' % (tmpDir, project)) + '%s/lucene.gpg.trust.import.log' % tmpDir) print(' verify trust') - logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact) + logFile = '%s/lucene.%s.gpg.trust.log' % (tmpDir, artifact) run('gpg --verify %s %s' % (sigFile, artifactFile), logFile) # Forward any GPG warnings: - f = open(logFile) - for line in f.readlines(): - if line.lower().find('warning') != -1: - print(' GPG: %s' % line.strip()) - f.close() + with open(logFile) as f: + for line in f.readlines(): + if line.lower().find('warning') != -1: + print(' GPG: %s' % line.strip()) -def testChanges(project, version, changesURLString): + +def testChanges(version, changesURLString): print(' check changes HTML...') changesURL = None for text, subURL in getDirEntries(changesURLString): @@ -366,9 +330,10 @@ def testChanges(project, version, changesURLString): raise RuntimeError('did not see Changes.html link from %s' % changesURLString) s = load(changesURL) - checkChangesContent(s, version, changesURL, project, True) + checkChangesContent(s, version, changesURL, True) -def testChangesText(dir, version, project): + +def testChangesText(dir, version): "Checks all CHANGES.txt under this dir." for root, dirs, files in os.walk(dir): @@ -376,12 +341,14 @@ def testChangesText(dir, version, project): if 'CHANGES.txt' in files: fullPath = '%s/CHANGES.txt' % root #print 'CHECK %s' % fullPath - checkChangesContent(open(fullPath, encoding='UTF-8').read(), version, fullPath, project, False) + checkChangesContent(open(fullPath, encoding='UTF-8').read(), version, fullPath, False) reChangesSectionHREF = re.compile('(.*?)', re.IGNORECASE) -reUnderbarNotDashHTML = re.compile(r'
  • (\s*(LUCENE|SOLR)_\d\d\d\d+)') -reUnderbarNotDashTXT = re.compile(r'\s+((LUCENE|SOLR)_\d\d\d\d+)', re.MULTILINE) -def checkChangesContent(s, version, name, project, isHTML): +reUnderbarNotDashHTML = re.compile(r'
  • (\s*(LUCENE)_\d\d\d\d+)') +reUnderbarNotDashTXT = re.compile(r'\s+((LUCENE)_\d\d\d\d+)', re.MULTILINE) + + +def checkChangesContent(s, version, name, isHTML): currentVersionTuple = versionToTuple(version, name) if isHTML and s.find('Release %s' % version) == -1: @@ -395,16 +362,12 @@ def checkChangesContent(s, version, name, project, isHTML): m = r.search(s) if m is not None: raise RuntimeError('incorrect issue (_ instead of -) in %s: %s' % (name, m.group(1))) - + if s.lower().find('not yet released') != -1: raise RuntimeError('saw "not yet released" in %s' % name) if not isHTML: - if project == 'lucene': - sub = 'Lucene %s' % version - else: - sub = version - + sub = 'Lucene %s' % version if s.find(sub) == -1: # benchmark never seems to include release info: if name.find('/benchmark/') == -1: @@ -433,6 +396,8 @@ def checkChangesContent(s, version, name, project, isHTML): reVersion = re.compile(r'(\d+)\.(\d+)(?:\.(\d+))?\s*(-alpha|-beta|final|RC\d+)?\s*(?:\[.*\])?', re.IGNORECASE) + + def versionToTuple(version, name): versionMatch = reVersion.match(version) if versionMatch is None: @@ -456,22 +421,24 @@ reUnixPath = re.compile(r'\b[a-zA-Z_]+=(?:"(?:\\"|[^"])*"' + '|(?:\\\\.|[^"\'\\s + r'|("/(?:\\.|[^"])*")' \ + r"|('/(?:\\.|[^'])*')") + def unix2win(matchobj): if matchobj.group(1) is not None: return cygwinWindowsRoot + matchobj.group() if matchobj.group(2) is not None: return '"%s%s' % (cygwinWindowsRoot, matchobj.group().lstrip('"')) if matchobj.group(3) is not None: return "'%s%s" % (cygwinWindowsRoot, matchobj.group().lstrip("'")) return matchobj.group() + def cygwinifyPaths(command): - # The problem: Native Windows applications running under Cygwin - # (e.g. Ant, which isn't available as a Cygwin package) can't + # The problem: Native Windows applications running under Cygwin can't # handle Cygwin's Unix-style paths. However, environment variable # values are automatically converted, so only paths outside of # environment variable values should be converted to Windows paths. # Assumption: all paths will be absolute. - if '; ant ' in command: command = reUnixPath.sub(unix2win, command) + if '; gradlew ' in command: command = reUnixPath.sub(unix2win, command) return command + def printFileContents(fileName): # Assume log file was written in system's default encoding, but @@ -489,6 +456,7 @@ def printFileContents(fileName): print(codecs.getdecoder(sys.stdout.encoding)(bytes)[0]) print() + def run(command, logFile): if cygwin: command = cygwinifyPaths(command) if os.system('%s > %s 2>&1' % (command, logFile)): @@ -496,13 +464,14 @@ def run(command, logFile): print('\ncommand "%s" failed:' % command) printFileContents(logFile) raise RuntimeError('command "%s" failed; see log file %s' % (command, logPath)) - + + def verifyDigests(artifact, urlString, tmpDir): print(' verify sha512 digest') sha512Expected, t = load(urlString + '.sha512').strip().split() if t != '*'+artifact: raise RuntimeError('SHA512 %s.sha512 lists artifact %s but expected *%s' % (urlString, t, artifact)) - + s512 = hashlib.sha512() f = open('%s/%s' % (tmpDir, artifact), 'rb') while True: @@ -515,6 +484,7 @@ def verifyDigests(artifact, urlString, tmpDir): if sha512Actual != sha512Expected: raise RuntimeError('SHA512 digest mismatch for %s: expected %s but got %s' % (artifact, sha512Expected, sha512Actual)) + def getDirEntries(urlString): if urlString.startswith('file:/') and not urlString.startswith('file://'): # stupid bogus ant URI @@ -541,14 +511,15 @@ def getDirEntries(urlString): if text == 'Parent Directory' or text == '..': return links[(i+1):] -def unpackAndVerify(java, project, tmpDir, artifact, gitRevision, version, testArgs, baseURL): + +def unpackAndVerify(java, tmpDir, artifact, gitRevision, version, testArgs): destDir = '%s/unpack' % tmpDir if os.path.exists(destDir): shutil.rmtree(destDir) os.makedirs(destDir) os.chdir(destDir) print(' unpack %s...' % artifact) - unpackLogFile = '%s/%s-unpack-%s.log' % (tmpDir, project, artifact) + unpackLogFile = '%s/lucene-unpack-%s.log' % (tmpDir, artifact) if artifact.endswith('.tar.gz') or artifact.endswith('.tgz'): run('tar xzf %s/%s' % (tmpDir, artifact), unpackLogFile) elif artifact.endswith('.zip'): @@ -556,87 +527,70 @@ def unpackAndVerify(java, project, tmpDir, artifact, gitRevision, version, testA # make sure it unpacks to proper subdir l = os.listdir(destDir) - expected = '%s-%s' % (project, version) + expected = 'lucene-%s' % version if l != [expected]: raise RuntimeError('unpack produced entries %s; expected only %s' % (l, expected)) unpackPath = '%s/%s' % (destDir, expected) - verifyUnpacked(java, project, artifact, unpackPath, gitRevision, version, testArgs, tmpDir, baseURL) + verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs) return unpackPath LUCENE_NOTICE = None LUCENE_LICENSE = None -SOLR_NOTICE = None -SOLR_LICENSE = None -def verifyUnpacked(java, project, artifact, unpackPath, gitRevision, version, testArgs, tmpDir, baseURL): + +def verifyUnpacked(java, artifact, unpackPath, gitRevision, version, testArgs): global LUCENE_NOTICE global LUCENE_LICENSE - global SOLR_NOTICE - global SOLR_LICENSE os.chdir(unpackPath) isSrc = artifact.find('-src') != -1 - + l = os.listdir(unpackPath) - textFiles = ['LICENSE', 'NOTICE', 'README'] - if project == 'lucene': - textFiles.extend(('JRE_VERSION_MIGRATION', 'CHANGES', 'MIGRATE', 'SYSTEM_REQUIREMENTS')) - if isSrc: - textFiles.append('BUILD') + textFiles = ['LICENSE', 'NOTICE', 'README', 'JRE_VERSION_MIGRATION', 'CHANGES', 'MIGRATE', 'SYSTEM_REQUIREMENTS'] + if isSrc: + textFiles.append('BUILD') for fileName in textFiles: + print("Checking textfile %s" % fileName) fileNameTxt = fileName + '.txt' fileNameMd = fileName + '.md' - if fileNameTxt in l: + if fileName in l: + l.remove(fileName) + elif fileNameTxt in l: l.remove(fileNameTxt) elif fileNameMd in l: l.remove(fileNameMd) else: raise RuntimeError('file "%s".[txt|md] is missing from artifact %s' % (fileName, artifact)) - if project == 'lucene': - if LUCENE_NOTICE is None: - LUCENE_NOTICE = open('%s/NOTICE.txt' % unpackPath, encoding='UTF-8').read() - if LUCENE_LICENSE is None: - LUCENE_LICENSE = open('%s/LICENSE.txt' % unpackPath, encoding='UTF-8').read() - else: - if SOLR_NOTICE is None: - SOLR_NOTICE = open('%s/NOTICE.txt' % unpackPath, encoding='UTF-8').read() - if SOLR_LICENSE is None: - SOLR_LICENSE = open('%s/LICENSE.txt' % unpackPath, encoding='UTF-8').read() + if LUCENE_NOTICE is None: + LUCENE_NOTICE = open('%s/NOTICE.txt' % unpackPath, encoding='UTF-8').read() + if LUCENE_LICENSE is None: + LUCENE_LICENSE = open('%s/LICENSE.txt' % unpackPath, encoding='UTF-8').read() if not isSrc: # TODO: we should add verifyModule/verifySubmodule (e.g. analysis) here and recurse through - if project == 'lucene': - expectedJARs = () - else: - expectedJARs = () + expectedJARs = () for fileName in expectedJARs: fileName += '.jar' if fileName not in l: - raise RuntimeError('%s: file "%s" is missing from artifact %s' % (project, fileName, artifact)) + raise RuntimeError('lucene: file "%s" is missing from artifact %s' % (fileName, artifact)) l.remove(fileName) - if project == 'lucene': - # TODO: clean this up to not be a list of modules that we must maintain - extras = ('analysis', 'backward-codecs', 'benchmark', 'classification', 'codecs', 'core', 'demo', 'docs', 'expressions', 'facet', 'grouping', 'highlighter', 'join', 'luke', 'memory', 'misc', 'monitor', 'queries', 'queryparser', 'replicator', 'sandbox', 'spatial-extras', 'spatial3d', 'suggest', 'test-framework', 'licenses') - if isSrc: - extras += ('build.gradle', 'build.xml', 'common-build.xml', 'module-build.xml', 'top-level-ivy-settings.xml', 'default-nested-ivy-settings.xml', 'ivy-versions.properties', 'ivy-ignore-conflicts.properties', 'tools', 'site', 'dev-docs') - else: - extras = () - - # TODO: if solr, verify lucene/licenses, solr/licenses are present + # TODO: clean this up to not be a list of modules that we must maintain + extras = ('analysis', 'backward-codecs', 'benchmark', 'classification', 'codecs', 'core', 'demo', 'docs', 'expressions', 'facet', 'grouping', 'highlighter', 'join', 'luke', 'memory', 'misc', 'monitor', 'queries', 'queryparser', 'replicator', 'sandbox', 'spatial-extras', 'spatial3d', 'suggest', 'test-framework', 'licenses') + if isSrc: + extras += ('build.gradle', 'build.xml', 'common-build.xml', 'module-build.xml', 'top-level-ivy-settings.xml', 'default-nested-ivy-settings.xml', 'ivy-versions.properties', 'ivy-ignore-conflicts.properties', 'tools', 'site', 'dev-docs') for e in extras: if e not in l: - raise RuntimeError('%s: %s missing from artifact %s' % (project, e, artifact)) + raise RuntimeError('lucene: %s missing from artifact %s' % (e, artifact)) l.remove(e) - if project == 'lucene': - if len(l) > 0: - raise RuntimeError('%s: unexpected files/dirs in artifact %s: %s' % (project, artifact, l)) + if len(l) > 0: + raise RuntimeError('lucene: unexpected files/dirs in artifact %s: %s' % (artifact, l)) if isSrc: print(' make sure no JARs/WARs in src dist...') @@ -653,219 +607,51 @@ def verifyUnpacked(java, project, artifact, unpackPath, gitRevision, version, te print(' %s' % line.strip()) raise RuntimeError('source release has WARs...') - # Can't run documentation-lint in lucene src, because dev-tools is missing - validateCmd = 'ant validate' if project == 'lucene' else 'ant validate documentation-lint'; + # TODO: test below gradle commands + # Can't run documentation-lint in lucene src, because dev-tools is missing TODO: No longer true + validateCmd = 'gradlew check -x test' print(' run "%s"' % validateCmd) java.run_java11(validateCmd, '%s/validate.log' % unpackPath) - if project == 'lucene': - print(" run tests w/ Java 11 and testArgs='%s'..." % testArgs) - java.run_java11('ant clean test %s' % testArgs, '%s/test.log' % unpackPath) - java.run_java11('ant jar', '%s/compile.log' % unpackPath) - testDemo(java.run_java11, isSrc, version, '11') + print(" run tests w/ Java 11 and testArgs='%s'..." % testArgs) + java.run_java11('gradlew clean test %s' % testArgs, '%s/test.log' % unpackPath) + java.run_java11('gradlew assemble', '%s/compile.log' % unpackPath) + testDemo(java.run_java11, isSrc, version, '11') - print(' generate javadocs w/ Java 11...') - java.run_java11('ant javadocs', '%s/javadocs.log' % unpackPath) - checkBrokenLinks('%s/build/docs' % unpackPath) + #print(' generate javadocs w/ Java 11...') + # TODO: Do we need to check broken javadoc links in smoketester, or is that done in build now? + #java.run_java11('gradlew javadoc', '%s/javadocs.log' % unpackPath) + # checkBrokenLinks('%s/build/docs' % unpackPath) - if java.run_java12: - print(" run tests w/ Java 12 and testArgs='%s'..." % testArgs) - java.run_java12('ant clean test %s' % testArgs, '%s/test.log' % unpackPath) - java.run_java12('ant jar', '%s/compile.log' % unpackPath) - testDemo(java.run_java12, isSrc, version, '12') + if java.run_java12: + print(" run tests w/ Java 12 and testArgs='%s'..." % testArgs) + java.run_java12('gradlew clean test %s' % testArgs, '%s/test.log' % unpackPath) + java.run_java12('gradlew assemble', '%s/compile.log' % unpackPath) + testDemo(java.run_java12, isSrc, version, '12') - #print(' generate javadocs w/ Java 12...') - #java.run_java12('ant javadocs', '%s/javadocs.log' % unpackPath) - #checkBrokenLinks('%s/build/docs' % unpackPath) + #print(' generate javadocs w/ Java 12...') + #java.run_java12('ant javadocs', '%s/javadocs.log' % unpackPath) + #checkBrokenLinks('%s/build/docs' % unpackPath) - else: - os.chdir('solr') - - print(" run tests w/ Java 11 and testArgs='%s'..." % testArgs) - java.run_java11('ant clean test -Dtests.slow=false %s' % testArgs, '%s/test.log' % unpackPath) - - # test javadocs - print(' generate javadocs w/ Java 11...') - java.run_java11('ant clean javadocs', '%s/javadocs.log' % unpackPath) - checkBrokenLinks('%s/solr/build/docs') - - print(' test solr example w/ Java 11...') - java.run_java11('ant clean server', '%s/antexample.log' % unpackPath) - testSolrExample(unpackPath, java.java11_home, True) - - if java.run_java12: - print(" run tests w/ Java 12 and testArgs='%s'..." % testArgs) - java.run_java12('ant clean test -Dtests.slow=false %s' % testArgs, '%s/test.log' % unpackPath) - - #print(' generate javadocs w/ Java 12...') - #java.run_java12('ant clean javadocs', '%s/javadocs.log' % unpackPath) - #checkBrokenLinks('%s/solr/build/docs' % unpackPath) - - print(' test solr example w/ Java 12...') - java.run_java12('ant clean server', '%s/antexample.log' % unpackPath) - testSolrExample(unpackPath, java.java12_home, True) - - os.chdir('..') - print(' check NOTICE') - testNotice(unpackPath) - - else: - - checkAllJARs(os.getcwd(), project, gitRevision, version, tmpDir, baseURL) - - if project == 'lucene': - testDemo(java.run_java11, isSrc, version, '11') - if java.run_java12: - testDemo(java.run_java12, isSrc, version, '12') - - else: - print(' copying unpacked distribution for Java 11 ...') - java11UnpackPath = '%s-java11' % unpackPath - if os.path.exists(java11UnpackPath): - shutil.rmtree(java11UnpackPath) - shutil.copytree(unpackPath, java11UnpackPath) - os.chdir(java11UnpackPath) - print(' test solr example w/ Java 11...') - testSolrExample(java11UnpackPath, java.java11_home, False) - - if java.run_java12: - print(' copying unpacked distribution for Java 12 ...') - java12UnpackPath = '%s-java12' % unpackPath - if os.path.exists(java12UnpackPath): - shutil.rmtree(java12UnpackPath) - shutil.copytree(unpackPath, java12UnpackPath) - os.chdir(java12UnpackPath) - print(' test solr example w/ Java 12...') - testSolrExample(java12UnpackPath, java.java12_home, False) - - os.chdir(unpackPath) - - testChangesText('.', version, project) - - if project == 'lucene' and isSrc: print(' confirm all releases have coverage in TestBackwardsCompatibility') confirmAllReleasesAreTestedForBackCompat(version, unpackPath) - -def testNotice(unpackPath): - solrNotice = open('%s/NOTICE.txt' % unpackPath, encoding='UTF-8').read() - luceneNotice = open('%s/lucene/NOTICE.txt' % unpackPath, encoding='UTF-8').read() - - expected = """ -========================================================================= -== Apache Lucene Notice == -========================================================================= - -""" + luceneNotice + """--- -""" - - if solrNotice.find(expected) == -1: - raise RuntimeError('Solr\'s NOTICE.txt does not have the verbatim copy, plus header/footer, of Lucene\'s NOTICE.txt') - -def readSolrOutput(p, startupEvent, failureEvent, logFile): - f = open(logFile, 'wb') - try: - while True: - line = p.stdout.readline() - if len(line) == 0: - p.poll() - if not startupEvent.isSet(): - failureEvent.set() - startupEvent.set() - break - f.write(line) - f.flush() - #print('SOLR: %s' % line.strip()) - if not startupEvent.isSet(): - if line.find(b'Started ServerConnector@') != -1 and line.find(b'{HTTP/1.1}{0.0.0.0:8983}') != -1: - startupEvent.set() - elif p.poll() is not None: - failureEvent.set() - startupEvent.set() - break - except: - print() - print('Exception reading Solr output:') - traceback.print_exc() - failureEvent.set() - startupEvent.set() - finally: - f.close() - -def is_port_in_use(port): - import socket - with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: - return s.connect_ex(('localhost', port)) == 0 - -def testSolrExample(unpackPath, javaPath, isSrc): - # test solr using some examples it comes with - logFile = '%s/solr-example.log' % unpackPath - if isSrc: - os.chdir(unpackPath+'/solr') - subprocess.call(['chmod','+x',unpackPath+'/solr/bin/solr', unpackPath+'/solr/bin/solr.cmd', unpackPath+'/solr/bin/solr.in.cmd']) else: - os.chdir(unpackPath) - print(' start Solr instance (log=%s)...' % logFile) - env = {} - env.update(os.environ) - env['JAVA_HOME'] = javaPath - env['PATH'] = '%s/bin:%s' % (javaPath, env['PATH']) + checkAllJARs(os.getcwd(), gitRevision, version) - # Stop Solr running on port 8983 (in case a previous run didn't shutdown cleanly) - try: - if not cygwin: - subprocess.call(['bin/solr','stop','-p','8983']) - else: - subprocess.call('env "PATH=`cygpath -S -w`:$PATH" bin/solr.cmd stop -p 8983', shell=True) - except: - print(' Stop failed due to: '+sys.exc_info()[0]) + testDemo(java.run_java11, isSrc, version, '11') + if java.run_java12: + testDemo(java.run_java12, isSrc, version, '12') - print(' Running techproducts example on port 8983 from %s' % unpackPath) - try: - if not cygwin: - runExampleStatus = subprocess.call(['bin/solr','-e','techproducts']) - else: - runExampleStatus = subprocess.call('env "PATH=`cygpath -S -w`:$PATH" bin/solr.cmd -e techproducts', shell=True) - - if runExampleStatus != 0: - raise RuntimeError('Failed to run the techproducts example, check log for previous errors.') + testChangesText('.', version) - os.chdir('example') - print(' test utf8...') - run('sh ./exampledocs/test_utf8.sh http://localhost:8983/solr/techproducts', 'utf8.log') - print(' run query...') - s = load('http://localhost:8983/solr/techproducts/select/?q=video') - if s.find('"numFound":3,"start":0') == -1: - print('FAILED: response is:\n%s' % s) - raise RuntimeError('query on solr example instance failed') - s = load('http://localhost:8983/api/cores') - if s.find('"status":0,') == -1: - print('FAILED: response is:\n%s' % s) - raise RuntimeError('query api v2 on solr example instance failed') - finally: - # Stop server: - print(' stop server using: bin/solr stop -p 8983') - if isSrc: - os.chdir(unpackPath+'/solr') - else: - os.chdir(unpackPath) - - if not cygwin: - subprocess.call(['bin/solr','stop','-p','8983']) - else: - subprocess.call('env "PATH=`cygpath -S -w`:$PATH" bin/solr.cmd stop -p 8983', shell=True) - if isSrc: - os.chdir(unpackPath+'/solr') - else: - os.chdir(unpackPath) - -# check for broken links -def checkBrokenLinks(path): - # also validate html/check for broken links - if checkJavadocLinks.checkAll(path): - raise RuntimeError('broken javadocs links found!') +# def checkBrokenLinks(path): +# # also validate html/check for broken links +# if checkJavadocLinks.checkAll(path): +# raise RuntimeError('broken javadocs links found!') + def testDemo(run_java, isSrc, version, jdk): if os.path.exists('index'): @@ -879,7 +665,7 @@ def testDemo(run_java, isSrc, version, jdk): else: cp = 'core/lucene-core-{0}.jar{1}demo/lucene-demo-{0}.jar{1}analysis/common/lucene-analyzers-common-{0}.jar{1}queryparser/lucene-queryparser-{0}.jar'.format(version, sep) docsDir = 'docs' - run_java('java -cp "%s" org.apache.lucene.demo.IndexFiles -index index -docs %s' % (cp, docsDir), 'index.log') + run_java('java -cp "%s" -Dsmoketester=true org.apache.lucene.demo.IndexFiles -index index -docs %s' % (cp, docsDir), 'index.log') run_java('java -cp "%s" org.apache.lucene.demo.SearchFiles -index index -query lucene' % cp, 'search.log') reMatchingDocs = re.compile('(\d+) total matching documents') m = reMatchingDocs.search(open('search.log', encoding='UTF-8').read()) @@ -900,49 +686,40 @@ def testDemo(run_java, isSrc, version, jdk): if removeTrailingZeros(actualVersion) != removeTrailingZeros(version): raise RuntimeError('wrong version from CheckIndex: got "%s" but expected "%s"' % (actualVersion, version)) + def removeTrailingZeros(version): return re.sub(r'(\.0)*$', '', version) -def checkMaven(solrSrcUnpackPath, baseURL, tmpDir, gitRevision, version, isSigned, keysFile): - POMtemplates = defaultdict() - getPOMtemplates(solrSrcUnpackPath, POMtemplates, tmpDir) + +def checkMaven(baseURL, tmpDir, gitRevision, version, isSigned, keysFile): print(' download artifacts') - artifacts = {'lucene': [], 'solr': []} - for project in ('lucene', 'solr'): - artifactsURL = '%s/%s/maven/org/apache/%s/' % (baseURL, project, project) - targetDir = '%s/maven/org/apache/%s' % (tmpDir, project) - if not os.path.exists(targetDir): - os.makedirs(targetDir) - crawl(artifacts[project], artifactsURL, targetDir) + artifacts = [] + artifactsURL = '%s/lucene/maven/org/apache/lucene/' % baseURL + targetDir = '%s/maven/org/apache/lucene' % tmpDir + if not os.path.exists(targetDir): + os.makedirs(targetDir) + crawl(artifacts, artifactsURL, targetDir) print() verifyPOMperBinaryArtifact(artifacts, version) - verifyArtifactPerPOMtemplate(POMtemplates, artifacts, tmpDir, version) verifyMavenDigests(artifacts) checkJavadocAndSourceArtifacts(artifacts, version) verifyDeployedPOMsCoordinates(artifacts, version) if isSigned: - verifyMavenSigs(baseURL, tmpDir, artifacts, keysFile) + verifyMavenSigs(tmpDir, artifacts, keysFile) - distFiles = getBinaryDistFilesForMavenChecks(tmpDir, version, baseURL) + distFiles = getBinaryDistFiles(tmpDir, version, baseURL) checkIdenticalMavenArtifacts(distFiles, artifacts, version) - checkAllJARs('%s/maven/org/apache/lucene' % tmpDir, 'lucene', gitRevision, version, tmpDir, baseURL) - checkAllJARs('%s/maven/org/apache/solr' % tmpDir, 'solr', gitRevision, version, tmpDir, baseURL) + checkAllJARs('%s/maven/org/apache/lucene' % tmpDir, gitRevision, version) -def getBinaryDistFilesForMavenChecks(tmpDir, version, baseURL): - # TODO: refactor distribution unpacking so that it only happens once per distribution per smoker run - distFiles = defaultdict() - for project in ('lucene', 'solr'): - distFiles[project] = getBinaryDistFiles(project, tmpDir, version, baseURL) - return distFiles - -def getBinaryDistFiles(project, tmpDir, version, baseURL): - distribution = '%s-%s.tgz' % (project, version) + +def getBinaryDistFiles(tmpDir, version, baseURL): + distribution = 'lucene-%s.tgz' % version if not os.path.exists('%s/%s' % (tmpDir, distribution)): - distURL = '%s/%s/%s' % (baseURL, project, distribution) + distURL = '%s/lucene/%s' % (baseURL, distribution) print(' download %s...' % distribution, end=' ') scriptutil.download(distribution, distURL, tmpDir, force_clean=FORCE_CLEAN) - destDir = '%s/unpack-%s-getBinaryDistFiles' % (tmpDir, project) + destDir = '%s/unpack-lucene-getBinaryDistFiles' % tmpDir if os.path.exists(destDir): shutil.rmtree(destDir) os.makedirs(destDir) @@ -955,17 +732,18 @@ def getBinaryDistFiles(project, tmpDir, version, baseURL): distributionFiles.extend([os.path.join(root, file) for file in files]) return distributionFiles + def checkJavadocAndSourceArtifacts(artifacts, version): print(' check for javadoc and sources artifacts...') - for project in ('lucene', 'solr'): - for artifact in artifacts[project]: - if artifact.endswith(version + '.jar'): - javadocJar = artifact[:-4] + '-javadoc.jar' - if javadocJar not in artifacts[project]: - raise RuntimeError('missing: %s' % javadocJar) - sourcesJar = artifact[:-4] + '-sources.jar' - if sourcesJar not in artifacts[project]: - raise RuntimeError('missing: %s' % sourcesJar) + for artifact in artifacts: + if artifact.endswith(version + '.jar'): + javadocJar = artifact[:-4] + '-javadoc.jar' + if javadocJar not in artifacts: + raise RuntimeError('missing: %s' % javadocJar) + sourcesJar = artifact[:-4] + '-sources.jar' + if sourcesJar not in artifacts: + raise RuntimeError('missing: %s' % sourcesJar) + def getZipFileEntries(fileName): entries = [] @@ -976,57 +754,57 @@ def getZipFileEntries(fileName): entries.sort() return entries + def checkIdenticalMavenArtifacts(distFiles, artifacts, version): print(' verify that Maven artifacts are same as in the binary distribution...') - reJarWar = re.compile(r'%s\.[wj]ar$' % version) # exclude *-javadoc.jar and *-sources.jar - for project in ('lucene', 'solr'): - distFilenames = dict() - for file in distFiles[project]: - baseName = os.path.basename(file) - distFilenames[baseName] = file - for artifact in artifacts[project]: - if reJarWar.search(artifact): - artifactFilename = os.path.basename(artifact) - if artifactFilename not in distFilenames: - raise RuntimeError('Maven artifact %s is not present in %s binary distribution' - % (artifact, project)) - else: - identical = filecmp.cmp(artifact, distFilenames[artifactFilename], shallow=False) - if not identical: - raise RuntimeError('Maven artifact %s is not identical to %s in %s binary distribution' - % (artifact, distFilenames[artifactFilename], project)) + reJarWar = re.compile(r'%s\.[wj]ar$' % version) # exclude *-javadoc.jar and *-sources.jar + distFilenames = dict() + for file in distFiles: + baseName = os.path.basename(file) + distFilenames[baseName] = file + for artifact in artifacts: + if reJarWar.search(artifact): + artifactFilename = os.path.basename(artifact) + if artifactFilename not in distFilenames: + raise RuntimeError('Maven artifact %s is not present in lucene binary distribution' % artifact) + else: + identical = filecmp.cmp(artifact, distFilenames[artifactFilename], shallow=False) + if not identical: + raise RuntimeError('Maven artifact %s is not identical to %s in lucene binary distribution' + % (artifact, distFilenames[artifactFilename])) + def verifyMavenDigests(artifacts): print(" verify Maven artifacts' md5/sha1 digests...") reJarWarPom = re.compile(r'\.(?:[wj]ar|pom)$') - for project in ('lucene', 'solr'): - for artifactFile in [a for a in artifacts[project] if reJarWarPom.search(a)]: - if artifactFile + '.md5' not in artifacts[project]: - raise RuntimeError('missing: MD5 digest for %s' % artifactFile) - if artifactFile + '.sha1' not in artifacts[project]: - raise RuntimeError('missing: SHA1 digest for %s' % artifactFile) - with open(artifactFile + '.md5', encoding='UTF-8') as md5File: - md5Expected = md5File.read().strip() - with open(artifactFile + '.sha1', encoding='UTF-8') as sha1File: - sha1Expected = sha1File.read().strip() - md5 = hashlib.md5() - sha1 = hashlib.sha1() - inputFile = open(artifactFile, 'rb') - while True: - bytes = inputFile.read(65536) - if len(bytes) == 0: - break - md5.update(bytes) - sha1.update(bytes) - inputFile.close() - md5Actual = md5.hexdigest() - sha1Actual = sha1.hexdigest() - if md5Actual != md5Expected: - raise RuntimeError('MD5 digest mismatch for %s: expected %s but got %s' - % (artifactFile, md5Expected, md5Actual)) - if sha1Actual != sha1Expected: - raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' - % (artifactFile, sha1Expected, sha1Actual)) + for artifactFile in [a for a in artifacts if reJarWarPom.search(a)]: + if artifactFile + '.md5' not in artifacts: + raise RuntimeError('missing: MD5 digest for %s' % artifactFile) + if artifactFile + '.sha1' not in artifacts: + raise RuntimeError('missing: SHA1 digest for %s' % artifactFile) + with open(artifactFile + '.md5', encoding='UTF-8') as md5File: + md5Expected = md5File.read().strip() + with open(artifactFile + '.sha1', encoding='UTF-8') as sha1File: + sha1Expected = sha1File.read().strip() + md5 = hashlib.md5() + sha1 = hashlib.sha1() + inputFile = open(artifactFile, 'rb') + while True: + bytes = inputFile.read(65536) + if len(bytes) == 0: + break + md5.update(bytes) + sha1.update(bytes) + inputFile.close() + md5Actual = md5.hexdigest() + sha1Actual = sha1.hexdigest() + if md5Actual != md5Expected: + raise RuntimeError('MD5 digest mismatch for %s: expected %s but got %s' + % (artifactFile, md5Expected, md5Actual)) + if sha1Actual != sha1Expected: + raise RuntimeError('SHA1 digest mismatch for %s: expected %s but got %s' + % (artifactFile, sha1Expected, sha1Actual)) + def getPOMcoordinate(treeRoot): namespace = '{http://maven.apache.org/POM/4.0.0}' @@ -1043,60 +821,59 @@ def getPOMcoordinate(treeRoot): packaging = 'jar' if packaging is None else packaging.text.strip() return groupId, artifactId, packaging, version -def verifyMavenSigs(baseURL, tmpDir, artifacts, keysFile): + +def verifyMavenSigs(tmpDir, artifacts, keysFile): print(' verify maven artifact sigs', end=' ') - for project in ('lucene', 'solr'): - # Set up clean gpg world; import keys file: - gpgHomeDir = '%s/%s.gpg' % (tmpDir, project) - if os.path.exists(gpgHomeDir): - shutil.rmtree(gpgHomeDir) - os.makedirs(gpgHomeDir, 0o700) - run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile), - '%s/%s.gpg.import.log' % (tmpDir, project)) + # Set up clean gpg world; import keys file: + gpgHomeDir = '%s/lucene.gpg' % tmpDir + if os.path.exists(gpgHomeDir): + shutil.rmtree(gpgHomeDir) + os.makedirs(gpgHomeDir, 0o700) + run('gpg --homedir %s --import %s' % (gpgHomeDir, keysFile), + '%s/lucene.gpg.import.log' % tmpDir) - reArtifacts = re.compile(r'\.(?:pom|[jw]ar)$') - for artifactFile in [a for a in artifacts[project] if reArtifacts.search(a)]: - artifact = os.path.basename(artifactFile) - sigFile = '%s.asc' % artifactFile - # Test sig (this is done with a clean brand-new GPG world) - logFile = '%s/%s.%s.gpg.verify.log' % (tmpDir, project, artifact) - run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile), - logFile) - # Forward any GPG warnings, except the expected one (since it's a clean world) - f = open(logFile) - for line in f.readlines(): - if line.lower().find('warning') != -1 \ - and line.find('WARNING: This key is not certified with a trusted signature') == -1 \ - and line.find('WARNING: using insecure memory') == -1: - print(' GPG: %s' % line.strip()) - f.close() + reArtifacts = re.compile(r'\.(?:pom|[jw]ar)$') + for artifactFile in [a for a in artifacts if reArtifacts.search(a)]: + artifact = os.path.basename(artifactFile) + sigFile = '%s.asc' % artifactFile + # Test sig (this is done with a clean brand-new GPG world) + logFile = '%s/lucene.%s.gpg.verify.log' % (tmpDir, artifact) + run('gpg --homedir %s --verify %s %s' % (gpgHomeDir, sigFile, artifactFile), + logFile) - # Test trust (this is done with the real users config) - run('gpg --import %s' % keysFile, - '%s/%s.gpg.trust.import.log' % (tmpDir, project)) - logFile = '%s/%s.%s.gpg.trust.log' % (tmpDir, project, artifact) - run('gpg --verify %s %s' % (sigFile, artifactFile), logFile) - # Forward any GPG warnings: - f = open(logFile) - for line in f.readlines(): - if line.lower().find('warning') != -1 \ - and line.find('WARNING: This key is not certified with a trusted signature') == -1 \ - and line.find('WARNING: using insecure memory') == -1: - print(' GPG: %s' % line.strip()) - f.close() + # Forward any GPG warnings, except the expected one (since it's a clean world) + print_warnings_in_file(logFile) - sys.stdout.write('.') + # Test trust (this is done with the real users config) + run('gpg --import %s' % keysFile, + '%s/lucene.gpg.trust.import.log' % tmpDir) + logFile = '%s/lucene.%s.gpg.trust.log' % (tmpDir, artifact) + run('gpg --verify %s %s' % (sigFile, artifactFile), logFile) + # Forward any GPG warnings: + print_warnings_in_file(logFile) + + sys.stdout.write('.') print() + +def print_warnings_in_file(file): + with open(file) as f: + for line in f.readlines(): + if line.lower().find('warning') != -1 \ + and line.find('WARNING: This key is not certified with a trusted signature') == -1 \ + and line.find('WARNING: using insecure memory') == -1: + print(' GPG: %s' % line.strip()) + + def verifyPOMperBinaryArtifact(artifacts, version): print(' verify that each binary artifact has a deployed POM...') reBinaryJarWar = re.compile(r'%s\.[jw]ar$' % re.escape(version)) - for project in ('lucene', 'solr'): - for artifact in [a for a in artifacts[project] if reBinaryJarWar.search(a)]: - POM = artifact[:-4] + '.pom' - if POM not in artifacts[project]: - raise RuntimeError('missing: POM for %s' % artifact) + for artifact in [a for a in artifacts if reBinaryJarWar.search(a)]: + POM = artifact[:-4] + '.pom' + if POM not in artifacts: + raise RuntimeError('missing: POM for %s' % artifact) + def verifyDeployedPOMsCoordinates(artifacts, version): """ @@ -1104,61 +881,19 @@ def verifyDeployedPOMsCoordinates(artifacts, version): its filepath, and verify that the corresponding artifact exists. """ print(" verify deployed POMs' coordinates...") - for project in ('lucene', 'solr'): - for POM in [a for a in artifacts[project] if a.endswith('.pom')]: - treeRoot = ET.parse(POM).getroot() - groupId, artifactId, packaging, POMversion = getPOMcoordinate(treeRoot) - POMpath = '%s/%s/%s/%s-%s.pom' \ - % (groupId.replace('.', '/'), artifactId, version, artifactId, version) - if not POM.endswith(POMpath): - raise RuntimeError("Mismatch between POM coordinate %s:%s:%s and filepath: %s" - % (groupId, artifactId, POMversion, POM)) - # Verify that the corresponding artifact exists - artifact = POM[:-3] + packaging - if artifact not in artifacts[project]: - raise RuntimeError('Missing corresponding .%s artifact for POM %s' % (packaging, POM)) + for POM in [a for a in artifacts if a.endswith('.pom')]: + treeRoot = ET.parse(POM).getroot() + groupId, artifactId, packaging, POMversion = getPOMcoordinate(treeRoot) + POMpath = '%s/%s/%s/%s-%s.pom' \ + % (groupId.replace('.', '/'), artifactId, version, artifactId, version) + if not POM.endswith(POMpath): + raise RuntimeError("Mismatch between POM coordinate %s:%s:%s and filepath: %s" + % (groupId, artifactId, POMversion, POM)) + # Verify that the corresponding artifact exists + artifact = POM[:-3] + packaging + if artifact not in artifacts: + raise RuntimeError('Missing corresponding .%s artifact for POM %s' % (packaging, POM)) -def verifyArtifactPerPOMtemplate(POMtemplates, artifacts, tmpDir, version): - print(' verify that there is an artifact for each POM template...') - namespace = '{http://maven.apache.org/POM/4.0.0}' - xpathPlugin = '{0}build/{0}plugins/{0}plugin'.format(namespace) - xpathSkipConfiguration = '{0}configuration/{0}skip'.format(namespace) - for project in ('lucene', 'solr'): - for POMtemplate in POMtemplates[project]: - treeRoot = ET.parse(POMtemplate).getroot() - skipDeploy = False - for plugin in treeRoot.findall(xpathPlugin): - artifactId = plugin.find('%sartifactId' % namespace).text.strip() - if artifactId == 'maven-deploy-plugin': - skip = plugin.find(xpathSkipConfiguration) - if skip is not None: skipDeploy = (skip.text.strip().lower() == 'true') - if not skipDeploy: - groupId, artifactId, packaging, POMversion = getPOMcoordinate(treeRoot) - # Ignore POMversion, since its value will not have been interpolated - artifact = '%s/maven/%s/%s/%s/%s-%s.%s' \ - % (tmpDir, groupId.replace('.', '/'), artifactId, - version, artifactId, version, packaging) - if artifact not in artifacts['lucene'] and artifact not in artifacts['solr']: - raise RuntimeError('Missing artifact %s' % artifact) - -def getPOMtemplates(solrSrcUnpackPath, POMtemplates, tmpDir): - print(' find pom.xml.template files in the unpacked Solr source distribution') - allPOMtemplates = [] - rePOMtemplate = re.compile(r'^pom\.xml\.template$') - for root, dirs, files in os.walk(solrSrcUnpackPath): - allPOMtemplates.extend([os.path.join(root, f) for f in files if rePOMtemplate.search(f)]) - - reLucenePOMtemplate = re.compile(r'.*/maven/lucene.*/pom\.xml\.template$') - POMtemplates['lucene'] = [p for p in allPOMtemplates if reLucenePOMtemplate.search(p)] - if POMtemplates['lucene'] is None: - raise RuntimeError('No Lucene POMs found at %s' % solrSrcUnpackPath) - reSolrPOMtemplate = re.compile(r'.*/maven/solr.*/pom\.xml\.template$') - POMtemplates['solr'] = [p for p in allPOMtemplates if reSolrPOMtemplate.search(p)] - if POMtemplates['solr'] is None: - raise RuntimeError('No Solr POMs found at %s' % solrSrcUnpackPath) - POMtemplates['grandfather'] = [p for p in allPOMtemplates if '/maven/pom.xml.template' in p] - if len(POMtemplates['grandfather']) == 0: - raise RuntimeError('No Lucene/Solr grandfather POM found at %s' % solrSrcUnpackPath) def crawl(downloadedFiles, urlString, targetDir, exclusions=set()): for text, subURL in getDirEntries(urlString): @@ -1174,6 +909,7 @@ def crawl(downloadedFiles, urlString, targetDir, exclusions=set()): downloadedFiles.append(path) sys.stdout.write('.') + def make_java_config(parser, java12_home): def _make_runner(java_home, version): print('Java %s JAVA_HOME=%s' % (version, java_home)) @@ -1184,7 +920,7 @@ def make_java_config(parser, java12_home): s = subprocess.check_output('%s; java -version' % cmd_prefix, shell=True, stderr=subprocess.STDOUT).decode('utf-8') if s.find(' version "%s' % version) == -1: - parser.error('got wrong version for java %s:\n%s' % (version, s)) + parser.error('got wrong version for java %s:\n%s' % (version, s)) def run_java(cmd, logfile): run('%s; %s' % (cmd_prefix, cmd), logfile) return run_java @@ -1204,7 +940,7 @@ revision_re = re.compile(r'rev([a-f\d]+)') def parse_config(): epilogue = textwrap.dedent(''' Example usage: - python3 -u dev-tools/scripts/smokeTestRelease.py https://dist.apache.org/repos/dist/dev/lucene/lucene-solr-6.0.1-RC2-revc7510a0... + python3 -u dev-tools/scripts/smokeTestRelease.py https://dist.apache.org/repos/dist/dev/lucene/lucene-6.0.1-RC2-revc7510a0... ''') description = 'Utility to test a release.' parser = argparse.ArgumentParser(description=description, epilog=epilogue, @@ -1225,7 +961,7 @@ def parse_config(): help='Only perform download and sha hash check steps') parser.add_argument('url', help='Url pointing to release to test') parser.add_argument('test_args', nargs=argparse.REMAINDER, - help='Arguments to pass to ant for testing, e.g. -Dwhat=ever.') + help='Arguments to pass to gradle for testing, e.g. -Dwhat=ever.') c = parser.parse_args() if c.version is not None: @@ -1284,6 +1020,7 @@ def getAllLuceneReleases(): l.sort() return l + def confirmAllReleasesAreTestedForBackCompat(smokeVersion, unpackPath): print(' find all past Lucene releases...') @@ -1296,7 +1033,8 @@ def confirmAllReleasesAreTestedForBackCompat(smokeVersion, unpackPath): os.chdir(unpackPath) print(' run TestBackwardsCompatibility..') - command = 'ant test -Dtestcase=TestBackwardsCompatibility -Dtests.verbose=true' + command = 'gradlew test -p lucene/backward-codecs --tests TestBackwardsCompatibility --max-workers=1 ' \ + '-Dtests.verbose=true ' p = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) stdout, stderr = p.communicate() if p.returncode != 0: @@ -1375,7 +1113,8 @@ def confirmAllReleasesAreTestedForBackCompat(smokeVersion, unpackPath): def main(): c = parse_config() - scriptVersion = scriptutil.find_current_version() + # Pick . part of version and require script to be from same branch + scriptVersion = re.search(r'((\d+).(\d+)).(\d+)', scriptutil.find_current_version()).group(1).strip() if not c.version.startswith(scriptVersion + '.'): raise RuntimeError('smokeTestRelease.py for %s.X is incompatible with a %s release.' % (scriptVersion, c.version)) @@ -1383,38 +1122,34 @@ def main(): smokeTest(c.java, c.url, c.revision, c.version, c.tmp_dir, c.is_signed, c.local_keys, ' '.join(c.test_args), downloadOnly=c.download_only) + def smokeTest(java, baseURL, gitRevision, version, tmpDir, isSigned, local_keys, testArgs, downloadOnly=False): startTime = datetime.datetime.now() # disable flakey tests for smoke-tester runs: testArgs = '-Dtests.badapples=false %s' % testArgs - + if FORCE_CLEAN: if os.path.exists(tmpDir): raise RuntimeError('temp dir %s exists; please remove first' % tmpDir) if not os.path.exists(tmpDir): os.makedirs(tmpDir) - + lucenePath = None - solrPath = None print() print('Load release URL "%s"...' % baseURL) newBaseURL = unshortenURL(baseURL) if newBaseURL != baseURL: print(' unshortened: %s' % newBaseURL) baseURL = newBaseURL - + for text, subURL in getDirEntries(baseURL): if text.lower().find('lucene') != -1: lucenePath = subURL - elif text.lower().find('solr') != -1: - solrPath = subURL if lucenePath is None: raise RuntimeError('could not find lucene subdir') - if solrPath is None: - raise RuntimeError('could not find solr subdir') print() print('Get KEYS...') @@ -1427,35 +1162,22 @@ def smokeTest(java, baseURL, gitRevision, version, tmpDir, isSigned, local_keys, scriptutil.download('KEYS', keysFileURL, tmpDir, force_clean=FORCE_CLEAN) keysFile = '%s/KEYS' % (tmpDir) - if is_port_in_use(8983): - raise RuntimeError('Port 8983 is already in use. The smoketester needs it to test Solr') - print() print('Test Lucene...') - checkSigs('lucene', lucenePath, version, tmpDir, isSigned, keysFile) + checkSigs(lucenePath, version, tmpDir, isSigned, keysFile) if not downloadOnly: for artifact in ('lucene-%s.tgz' % version, 'lucene-%s.zip' % version): - unpackAndVerify(java, 'lucene', tmpDir, artifact, gitRevision, version, testArgs, baseURL) - unpackAndVerify(java, 'lucene', tmpDir, 'lucene-%s-src.tgz' % version, gitRevision, version, testArgs, baseURL) + unpackAndVerify(java, tmpDir, artifact, gitRevision, version, testArgs) + unpackAndVerify(java, tmpDir, 'lucene-%s-src.tgz' % version, gitRevision, version, testArgs) + print() + print('Test Maven artifacts...') + checkMaven(baseURL, tmpDir, gitRevision, version, isSigned, keysFile) else: print("\nLucene test done (--download-only specified)") - print() - print('Test Solr...') - checkSigs('solr', solrPath, version, tmpDir, isSigned, keysFile) - if not downloadOnly: - for artifact in ('solr-%s.tgz' % version, 'solr-%s.zip' % version): - unpackAndVerify(java, 'solr', tmpDir, artifact, gitRevision, version, testArgs, baseURL) - solrSrcUnpackPath = unpackAndVerify(java, 'solr', tmpDir, 'solr-%s-src.tgz' % version, - gitRevision, version, testArgs, baseURL) - print() - print('Test Maven artifacts for Lucene and Solr...') - checkMaven(solrSrcUnpackPath, baseURL, tmpDir, gitRevision, version, isSigned, keysFile) - else: - print("Solr test done (--download-only specified)") - print('\nSUCCESS! [%s]\n' % (datetime.datetime.now() - startTime)) + if __name__ == '__main__': try: main() diff --git a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java index 71a63839cee..886f34f3251 100644 --- a/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java +++ b/lucene/demo/src/java/org/apache/lucene/demo/IndexFiles.java @@ -175,7 +175,9 @@ public class IndexFiles implements AutoCloseable { + " documents in " + (end.getTime() - start.getTime()) + " milliseconds"); - if (reader.numDocs() > 100 && vectorDictSize < 1_000_000) { + if (reader.numDocs() > 100 + && vectorDictSize < 1_000_000 + && System.getProperty("smoketester") == null) { throw new RuntimeException( "Are you (ab)using the toy vector dictionary? See the package javadocs to understand why you got this exception."); }