LUCENE-8106: reproduceJenkinsFailures.py improvements: add cmdline option parsing; sort max failures to the bottom of the report; retest at branch tip first with and then without the seed if 100% of iterations fail.

2018-02-13 23:47:57 -05:00 · 2018-02-13 23:47:57 -05:00 · a07493d508
parent 00f970cec6
commit a07493d508
1 changed files with 97 additions and 51 deletions
--- a/dev-tools/scripts/reproduceJenkinsFailures.py
+++ b/dev-tools/scripts/reproduceJenkinsFailures.py
@ -13,38 +13,23 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+import argparse
 import os
 import re
 import subprocess
 import sys
+import traceback
 import urllib.error
 import urllib.request
 from textwrap import dedent

-# Number of iterations per test suite
-testIters = 5
-
-usage = dedent('''\
-               Usage:\n
-                 python3 -u %s URL\n
-               Must be run from a Lucene/Solr git workspace. Downloads the Jenkins
-               log pointed to by the given URL, parses it for Git revision and failed
-               Lucene/Solr tests, checks out the Git revision in the local workspace,
-               groups the failed tests by module, then runs
-               'ant test -Dtest.dups=%d -Dtests.class="*.test1[|*.test2[...]]" ...'
-               in each module of interest, failing at the end if any of the runs fails.
-               To control the maximum number of concurrent JVMs used for each module's
-               test run, set 'tests.jvms', e.g. in ~/lucene.build.properties
-               ''' % (sys.argv[0], testIters))
-
-reHelpArg = re.compile(r'-{1,2}(?:\?|h(?:elp)?)')
-
 # Example: Checking out Revision e441a99009a557f82ea17ee9f9c3e9b89c75cee6 (refs/remotes/origin/master)
-reGitRev = re.compile(r'Checking out Revision (\S+)')
+reGitRev = re.compile(r'Checking out Revision (\S+)\s+\(refs/remotes/origin/([^)]+)')

 # Method example: NOTE: reproduce with: ant test  -Dtestcase=ZkSolrClientTest -Dtests.method=testMultipleWatchesAsync -Dtests.seed=6EF5AB70F0032849 -Dtests.slow=true -Dtests.locale=he-IL -Dtests.timezone=NST -Dtests.asserts=true -Dtests.file.encoding=UTF-8
 # Suite example:  NOTE: reproduce with: ant test  -Dtestcase=CloudSolrClientTest -Dtests.seed=DB2DF2D8228BAF27 -Dtests.multiplier=3 -Dtests.slow=true -Dtests.locale=es-AR -Dtests.timezone=America/Argentina/Cordoba -Dtests.asserts=true -Dtests.file.encoding=US-ASCII
 reReproLine = re.compile(r'NOTE:\s+reproduce\s+with:(\s+ant\s+test\s+-Dtestcase=(\S+)\s+(?:-Dtests.method=\S+\s+)?(.*))')
+reTestsSeed = re.compile(r'-Dtests.seed=\S+\s*')

 # Example: https://jenkins.thetaphi.de/job/Lucene-Solr-master-Linux/21108/
 reJenkinsURLWithoutConsoleText = re.compile(r'https?://.*/\d+/?\Z', re.IGNORECASE)
@ -57,12 +42,32 @@ reErrorFailure = re.compile(r'(?:errors|failures)="[^0]')
 # consoleText from Policeman Jenkins's Windows jobs fails to decode as UTF-8
 encoding = 'iso-8859-1'

-tests = {}
-modules = {}
-
 lastFailureCode = 0
 gitCheckoutSucceeded = False

+description = dedent('''\
+                     Must be run from a Lucene/Solr git workspace. Downloads the Jenkins
+                     log pointed to by the given URL, parses it for Git revision and failed
+                     Lucene/Solr tests, checks out the Git revision in the local workspace,
+                     groups the failed tests by module, then runs
+                     'ant test -Dtest.dups=%d -Dtests.class="*.test1[|*.test2[...]]" ...'
+                     in each module of interest, failing at the end if any of the runs fails.
+                     To control the maximum number of concurrent JVMs used for each module's
+                     test run, set 'tests.jvms', e.g. in ~/lucene.build.properties
+                     ''')
+defaultIters = 5
+
+def readConfig():
+  parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter,
+                                   description=description)
+  parser.add_argument('url', metavar='URL',
+                      help='Points to the Jenkins log to parse')
+  parser.add_argument('--no-fetch', dest='fetch', action='store_false', default=True,
+                      help='Do not run "git fetch" prior to "git checkout"')
+  parser.add_argument('--iters', dest='testIters', type=int, default=defaultIters, metavar='N',
+                      help='Number of iterations per test suite (default: %d)' % defaultIters)
+  return parser.parse_args()
+
 def runOutput(cmd):
  print('[repro] %s' % cmd)
  try:
@ -82,8 +87,10 @@ def run(cmd, rememberFailure=True):
  return code

 def fetchAndParseJenkinsLog(url):
-  global revision
-  revision = None
+  global revisionFromLog
+  global branchFromLog
+  revisionFromLog = None
+  tests = {}
  print('[repro] Jenkins log URL: %s\n' % url)
  try:
    with urllib.request.urlopen(url) as consoleText:
@ -91,8 +98,9 @@ def fetchAndParseJenkinsLog(url):
        line = rawLine.decode(encoding)
        match = reGitRev.match(line)
        if match is not None:
-          revision = match.group(1)
-          print('[repro] Revision: %s\n' % revision)
+          revisionFromLog = match.group(1)
+          branchFromLog = match.group(2)
+          print('[repro] Revision: %s\n' % revisionFromLog)
        else:
          match = reReproLine.search(line)
          if match is not None:
@ -103,7 +111,7 @@ def fetchAndParseJenkinsLog(url):
  except urllib.error.URLError as e:
    raise RuntimeError('ERROR: fetching %s : %s' % (url, e))
  
-  if revision == None:
+  if revisionFromLog == None:
    if reJenkinsURLWithoutConsoleText.match(url):
      print('[repro] Not a Jenkins log. Appending "/consoleText" and retrying ...\n')
      fetchAndParseJenkinsLog(url + '/consoleText')                                                        
@ -112,18 +120,29 @@ def fetchAndParseJenkinsLog(url):
  if 0 == len(tests):
    print('[repro] No "reproduce with" lines found; exiting.')
    sys.exit(0)
+  return tests

-def prepareWorkspace():
+def prepareWorkspace(fetch, gitRef):
  global gitCheckoutSucceeded
-  code = run('git checkout %s' % revision)
+  if fetch:
+    code = run('git fetch')
+    if 0 != code:
+      raise RuntimeError('ERROR: "git fetch" failed.  See above.')
+  checkoutCmd = 'git checkout %s' % gitRef
+  code = run(checkoutCmd)
  if 0 != code:
-    raise RuntimeError('ERROR: "git checkout %s" failed.  See above.  Maybe try "git pull"?' % revision)
+    raise RuntimeError('ERROR: "%s" failed.  See above.' % checkoutCmd)
+  if fetch:
+    code = run('git pull')
+    if 0 != code:
+      raise RuntimeError('ERROR: "git pull" failed.  See above.')
  gitCheckoutSucceeded = True
  code = run('ant clean')
  if 0 != code:
    raise RuntimeError('ERROR: "ant clean" failed.  See above.')

-def groupTestsByModule():
+def groupTestsByModule(tests):
+  modules = {}
  for (dir, _, files) in os.walk('.'):
    for file in files:
      match = reJavaFile.search(file)
@ -140,9 +159,9 @@ def groupTestsByModule():
    print('[repro]    %s' % module)
    for test in modules[module]:
      print('[repro]       %s' % test)
+  return modules

-def runTests():
-  global lastFailureCode
+def runTests(testIters, modules, tests):
  cwd = os.getcwd()
  testCmdline = 'ant test-nocompile -Dtests.dups=%d -Dtests.maxfailures=%d -Dtests.class="%s" -Dtests.showOutput=onerror %s'
  for module in modules:
@ -153,13 +172,13 @@ def runTests():
    os.chdir(module)
    code = run('ant compile-test')
    try:
-      if (0 != code):
+      if 0 != code:
        raise RuntimeError("ERROR: Compile failed in %s/ with code %d.  See above." % (module, code))
      run(testCmdline % (testIters, testIters * numTests, testList, params))
    finally:
      os.chdir(cwd)
      
-def printReport():
+def printReport(testIters, location):
  failures = {}
  for start in ('lucene/build', 'solr/build'):
    for (dir, _, files) in os.walk(start):
@ -175,35 +194,62 @@ def printReport():
              if errorFailureMatch is not None:
                failures[testcase] += 1
                break
-  print("[repro] Failures:")
-  for testcase in sorted(failures):
+  print("[repro] Failures%s:" % location)
+  for testcase in sorted(failures, key=lambda t: (failures[t],t)): # sort by failure count, then by testcase 
    print("[repro]   %d/%d failed: %s" % (failures[testcase], testIters, testcase))
+  return failures

-def rememberGitBranch():
-  global origGitBranch
+def getLocalGitBranch():
  origGitBranch = runOutput('git rev-parse --abbrev-ref HEAD')
-  if (origGitBranch == 'HEAD'):                     # In detached HEAD state
+  if origGitBranch == 'HEAD':                       # In detached HEAD state
    origGitBranch = runOutput('git rev-parse HEAD') # Use the SHA when not on a branch
  print('[repro] Initial local git branch/revision: %s' % origGitBranch)
+  return origGitBranch

 def main():
-  if 2 != len(sys.argv) or reHelpArg.match(sys.argv[1]):
-    print(usage)
-    sys.exit(0)
-  fetchAndParseJenkinsLog(sys.argv[1])
-  rememberGitBranch()
+  config = readConfig()
+  tests = fetchAndParseJenkinsLog(config.url)
+  localGitBranch = getLocalGitBranch()

  try:
-    prepareWorkspace()
-    groupTestsByModule()
-    runTests()
-    printReport()
+    prepareWorkspace(config.fetch, revisionFromLog)
+    modules = groupTestsByModule(tests)
+    runTests(config.testIters, modules, tests)
+    failures = printReport(config.testIters, '')
+    
+    # Retest 100% failures at the tip of the branch
+    oldTests = tests
+    tests = {}
+    for fullClass in failures:
+      testcase = fullClass[(fullClass.rindex('.') + 1):]
+      if failures[fullClass] == config.testIters:
+        tests[testcase] = oldTests[testcase]
+    if len(tests) > 0:
+      print('\n[repro] Re-testing 100%% failures at the tip of %s' % branchFromLog)
+      prepareWorkspace(False, branchFromLog)
+      modules = groupTestsByModule(tests)
+      runTests(config.testIters, modules, tests)
+      failures = printReport(config.testIters, ' at the tip of %s' % branchFromLog)
+      
+      # Retest 100% tip-of-branch failures without a seed
+      oldTests = tests
+      tests = {}
+      for fullClass in failures:
+        testcase = fullClass[(fullClass.rindex('.') + 1):]
+        if failures[fullClass] == config.testIters:
+          tests[testcase] = re.sub(reTestsSeed, '', oldTests[testcase])
+      if len(tests) > 0:
+        print('\n[repro] Re-testing 100%% failures at the tip of %s without a seed' % branchFromLog)
+        prepareWorkspace(False, branchFromLog)
+        modules = groupTestsByModule(tests)
+        runTests(config.testIters, modules, tests)
+        printReport(config.testIters, ' at the tip of %s without a seed' % branchFromLog)
  except Exception as e:
-    print('[repro] %s' % e)
+    print('[repro] %s' % traceback.format_exc())
    sys.exit(1)
  finally:
    if gitCheckoutSucceeded:
-      run('git checkout %s' % origGitBranch, rememberFailure=False) # Restore original git branch/sha
+      run('git checkout %s' % localGitBranch, rememberFailure=False) # Restore original git branch/sha

  print('[repro] Exiting with code %d' % lastFailureCode)
  sys.exit(lastFailureCode)