mirror of https://github.com/apache/lucene.git
use python3.2 for javadocs link checking (its builtin HTML parser is more strict than 2.7's)
git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1356797 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
parent
0f808c6bcd
commit
dae7f3e56a
|
@ -17,8 +17,8 @@ import traceback
|
||||||
import os
|
import os
|
||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
from HTMLParser import HTMLParser, HTMLParseError
|
from html.parser import HTMLParser, HTMLParseError
|
||||||
import urlparse
|
import urllib.parse as urlparse
|
||||||
|
|
||||||
reHyperlink = re.compile(r'<a(\s+.*?)>', re.I)
|
reHyperlink = re.compile(r'<a(\s+.*?)>', re.I)
|
||||||
reAtt = re.compile(r"""(?:\s+([a-z]+)\s*=\s*("[^"]*"|'[^']?'|[^'"\s]+))+""", re.I)
|
reAtt = re.compile(r"""(?:\s+([a-z]+)\s*=\s*("[^"]*"|'[^']?'|[^'"\s]+))+""", re.I)
|
||||||
|
@ -57,7 +57,7 @@ class FindHyperlinks(HTMLParser):
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
self.printFile()
|
self.printFile()
|
||||||
print ' WARNING: anchor "%s" appears more than once' % name
|
print(' WARNING: anchor "%s" appears more than once' % name)
|
||||||
else:
|
else:
|
||||||
self.anchors.add(name)
|
self.anchors.add(name)
|
||||||
elif href is not None:
|
elif href is not None:
|
||||||
|
@ -73,8 +73,8 @@ class FindHyperlinks(HTMLParser):
|
||||||
|
|
||||||
def printFile(self):
|
def printFile(self):
|
||||||
if not self.printed:
|
if not self.printed:
|
||||||
print
|
print()
|
||||||
print ' ' + self.baseURL
|
print(' ' + self.baseURL)
|
||||||
self.printed = True
|
self.printed = True
|
||||||
|
|
||||||
def parse(baseURL, html):
|
def parse(baseURL, html):
|
||||||
|
@ -85,8 +85,8 @@ def parse(baseURL, html):
|
||||||
parser.close()
|
parser.close()
|
||||||
except HTMLParseError:
|
except HTMLParseError:
|
||||||
parser.printFile()
|
parser.printFile()
|
||||||
print ' WARNING: failed to parse %s:' % baseURL
|
print(' WARNING: failed to parse %s:' % baseURL)
|
||||||
traceback.print_exc()
|
traceback.print_exc(file=sys.stdout)
|
||||||
failures = True
|
failures = True
|
||||||
return [], []
|
return [], []
|
||||||
|
|
||||||
|
@ -104,8 +104,8 @@ def checkAll(dirName):
|
||||||
global failures
|
global failures
|
||||||
|
|
||||||
# Find/parse all HTML files first
|
# Find/parse all HTML files first
|
||||||
print
|
print()
|
||||||
print 'Crawl/parse...'
|
print('Crawl/parse...')
|
||||||
allFiles = {}
|
allFiles = {}
|
||||||
|
|
||||||
if os.path.isfile(dirName):
|
if os.path.isfile(dirName):
|
||||||
|
@ -131,8 +131,8 @@ def checkAll(dirName):
|
||||||
allFiles[fullPath] = parse(fullPath, open('%s/%s' % (root, f)).read())
|
allFiles[fullPath] = parse(fullPath, open('%s/%s' % (root, f)).read())
|
||||||
|
|
||||||
# ... then verify:
|
# ... then verify:
|
||||||
print
|
print()
|
||||||
print 'Verify...'
|
print('Verify...')
|
||||||
for fullPath, (links, anchors) in allFiles.items():
|
for fullPath, (links, anchors) in allFiles.items():
|
||||||
#print fullPath
|
#print fullPath
|
||||||
printed = False
|
printed = False
|
||||||
|
@ -176,16 +176,16 @@ def checkAll(dirName):
|
||||||
and os.path.basename(fullPath) != 'Changes.html':
|
and os.path.basename(fullPath) != 'Changes.html':
|
||||||
if not printed:
|
if not printed:
|
||||||
printed = True
|
printed = True
|
||||||
print
|
print()
|
||||||
print fullPath
|
print(fullPath)
|
||||||
print ' BAD EXTERNAL LINK: %s' % link
|
print(' BAD EXTERNAL LINK: %s' % link)
|
||||||
elif link.startswith('mailto:'):
|
elif link.startswith('mailto:'):
|
||||||
if link.find('@lucene.apache.org') == -1 and link.find('@apache.org') != -1:
|
if link.find('@lucene.apache.org') == -1 and link.find('@apache.org') != -1:
|
||||||
if not printed:
|
if not printed:
|
||||||
printed = True
|
printed = True
|
||||||
print
|
print()
|
||||||
print fullPath
|
print(fullPath)
|
||||||
print ' BROKEN MAILTO (?): %s' % link
|
print(' BROKEN MAILTO (?): %s' % link)
|
||||||
elif link.startswith('javascript:'):
|
elif link.startswith('javascript:'):
|
||||||
# ok...?
|
# ok...?
|
||||||
pass
|
pass
|
||||||
|
@ -200,15 +200,15 @@ def checkAll(dirName):
|
||||||
if not os.path.exists(link):
|
if not os.path.exists(link):
|
||||||
if not printed:
|
if not printed:
|
||||||
printed = True
|
printed = True
|
||||||
print
|
print()
|
||||||
print fullPath
|
print(fullPath)
|
||||||
print ' BROKEN LINK: %s' % link
|
print(' BROKEN LINK: %s' % link)
|
||||||
elif anchor is not None and anchor not in allFiles[link][1]:
|
elif anchor is not None and anchor not in allFiles[link][1]:
|
||||||
if not printed:
|
if not printed:
|
||||||
printed = True
|
printed = True
|
||||||
print
|
print()
|
||||||
print fullPath
|
print(fullPath)
|
||||||
print ' BROKEN ANCHOR: %s' % origLink
|
print(' BROKEN ANCHOR: %s' % origLink)
|
||||||
|
|
||||||
failures = failures or printed
|
failures = failures or printed
|
||||||
|
|
||||||
|
@ -216,8 +216,8 @@ def checkAll(dirName):
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
if checkAll(sys.argv[1]):
|
if checkAll(sys.argv[1]):
|
||||||
print
|
print()
|
||||||
print 'Broken javadocs links were found!'
|
print('Broken javadocs links were found!')
|
||||||
sys.exit(1)
|
sys.exit(1)
|
||||||
sys.exit(0)
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
|
@ -200,6 +200,7 @@
|
||||||
<property name="moman.url" value="https://bitbucket.org/jpbarrette/moman" />
|
<property name="moman.url" value="https://bitbucket.org/jpbarrette/moman" />
|
||||||
<property name="moman.rev" value="120" />
|
<property name="moman.rev" value="120" />
|
||||||
<property name="python.exe" value="python" />
|
<property name="python.exe" value="python" />
|
||||||
|
<property name="python32.exe" value="python3.2" />
|
||||||
|
|
||||||
<property name="gpg.exe" value="gpg" />
|
<property name="gpg.exe" value="gpg" />
|
||||||
<property name="gpg.key" value="CODE SIGNING KEY" />
|
<property name="gpg.key" value="CODE SIGNING KEY" />
|
||||||
|
@ -1638,7 +1639,7 @@ ${tests-output}/junit4-*.suites - per-JVM executed suites
|
||||||
<macrodef name="check-broken-links">
|
<macrodef name="check-broken-links">
|
||||||
<attribute name="dir"/>
|
<attribute name="dir"/>
|
||||||
<sequential>
|
<sequential>
|
||||||
<exec dir="." executable="${python.exe}" failonerror="true">
|
<exec dir="." executable="${python32.exe}" failonerror="true">
|
||||||
<arg value="${dev-tools.dir}/scripts/checkJavadocLinks.py"/>
|
<arg value="${dev-tools.dir}/scripts/checkJavadocLinks.py"/>
|
||||||
<arg value="@{dir}"/>
|
<arg value="@{dir}"/>
|
||||||
</exec>
|
</exec>
|
||||||
|
|
Loading…
Reference in New Issue