From dae7f3e56a8d7854e36c7ee36f826cfb7c3e1013 Mon Sep 17 00:00:00 2001 From: Michael McCandless Date: Tue, 3 Jul 2012 15:50:14 +0000 Subject: [PATCH] use python3.2 for javadocs link checking (its builtin HTML parser is more strict than 2.7's) git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1356797 13f79535-47bb-0310-9956-ffa450edef68 --- dev-tools/scripts/checkJavadocLinks.py | 50 +++++++++++++------------- lucene/common-build.xml | 3 +- 2 files changed, 27 insertions(+), 26 deletions(-) diff --git a/dev-tools/scripts/checkJavadocLinks.py b/dev-tools/scripts/checkJavadocLinks.py index c5829af063c..74a626b5c31 100644 --- a/dev-tools/scripts/checkJavadocLinks.py +++ b/dev-tools/scripts/checkJavadocLinks.py @@ -17,8 +17,8 @@ import traceback import os import sys import re -from HTMLParser import HTMLParser, HTMLParseError -import urlparse +from html.parser import HTMLParser, HTMLParseError +import urllib.parse as urlparse reHyperlink = re.compile(r'', re.I) reAtt = re.compile(r"""(?:\s+([a-z]+)\s*=\s*("[^"]*"|'[^']?'|[^'"\s]+))+""", re.I) @@ -57,7 +57,7 @@ class FindHyperlinks(HTMLParser): pass else: self.printFile() - print ' WARNING: anchor "%s" appears more than once' % name + print(' WARNING: anchor "%s" appears more than once' % name) else: self.anchors.add(name) elif href is not None: @@ -73,8 +73,8 @@ class FindHyperlinks(HTMLParser): def printFile(self): if not self.printed: - print - print ' ' + self.baseURL + print() + print(' ' + self.baseURL) self.printed = True def parse(baseURL, html): @@ -85,8 +85,8 @@ def parse(baseURL, html): parser.close() except HTMLParseError: parser.printFile() - print ' WARNING: failed to parse %s:' % baseURL - traceback.print_exc() + print(' WARNING: failed to parse %s:' % baseURL) + traceback.print_exc(file=sys.stdout) failures = True return [], [] @@ -104,8 +104,8 @@ def checkAll(dirName): global failures # Find/parse all HTML files first - print - print 'Crawl/parse...' + print() + print('Crawl/parse...') allFiles = {} if os.path.isfile(dirName): @@ -131,8 +131,8 @@ def checkAll(dirName): allFiles[fullPath] = parse(fullPath, open('%s/%s' % (root, f)).read()) # ... then verify: - print - print 'Verify...' + print() + print('Verify...') for fullPath, (links, anchors) in allFiles.items(): #print fullPath printed = False @@ -176,16 +176,16 @@ def checkAll(dirName): and os.path.basename(fullPath) != 'Changes.html': if not printed: printed = True - print - print fullPath - print ' BAD EXTERNAL LINK: %s' % link + print() + print(fullPath) + print(' BAD EXTERNAL LINK: %s' % link) elif link.startswith('mailto:'): if link.find('@lucene.apache.org') == -1 and link.find('@apache.org') != -1: if not printed: printed = True - print - print fullPath - print ' BROKEN MAILTO (?): %s' % link + print() + print(fullPath) + print(' BROKEN MAILTO (?): %s' % link) elif link.startswith('javascript:'): # ok...? pass @@ -200,15 +200,15 @@ def checkAll(dirName): if not os.path.exists(link): if not printed: printed = True - print - print fullPath - print ' BROKEN LINK: %s' % link + print() + print(fullPath) + print(' BROKEN LINK: %s' % link) elif anchor is not None and anchor not in allFiles[link][1]: if not printed: printed = True - print - print fullPath - print ' BROKEN ANCHOR: %s' % origLink + print() + print(fullPath) + print(' BROKEN ANCHOR: %s' % origLink) failures = failures or printed @@ -216,8 +216,8 @@ def checkAll(dirName): if __name__ == '__main__': if checkAll(sys.argv[1]): - print - print 'Broken javadocs links were found!' + print() + print('Broken javadocs links were found!') sys.exit(1) sys.exit(0) diff --git a/lucene/common-build.xml b/lucene/common-build.xml index 70f218d5f6a..12d28d34168 100644 --- a/lucene/common-build.xml +++ b/lucene/common-build.xml @@ -200,6 +200,7 @@ + @@ -1638,7 +1639,7 @@ ${tests-output}/junit4-*.suites - per-JVM executed suites - +