mirror of https://github.com/apache/lucene.git
391 lines
13 KiB
Python
391 lines
13 KiB
Python
# Licensed to the Apache Software Foundation (ASF) under one or more
|
|
# contributor license agreements. See the NOTICE file distributed with
|
|
# this work for additional information regarding copyright ownership.
|
|
# The ASF licenses this file to You under the Apache License, Version 2.0
|
|
# (the "License"); you may not use this file except in compliance with
|
|
# the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing, software
|
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
# See the License for the specific language governing permissions and
|
|
# limitations under the License.
|
|
|
|
import sys
|
|
import os
|
|
import re
|
|
|
|
reHREF = re.compile('<a.*?>(.*?)</a>', re.IGNORECASE)
|
|
|
|
reMarkup = re.compile('<.*?>')
|
|
reDivBlock = re.compile('<div class="block">(.*?)</div>', re.IGNORECASE)
|
|
reCaption = re.compile('<caption><span>(.*?)</span>', re.IGNORECASE)
|
|
reJ8Caption = re.compile('<h3>(.*?) Summary</h3>')
|
|
reTDLastNested = re.compile('^<td class="colLast"><code><strong><a href="[^>]*\.([^>]*?)\.html" title="class in[^>]*">', re.IGNORECASE)
|
|
reTDLast = re.compile('^<td class="colLast"><code><strong><a href="[^>]*#([^>]*?)">', re.IGNORECASE)
|
|
reColOne = re.compile('^<td class="colOne"><code><strong><a href="[^>]*#([^>]*?)">', re.IGNORECASE)
|
|
reMemberNameLink = re.compile('^<td class="colLast"><code><span class="memberNameLink"><a href="[^>]*#([^>]*?)"', re.IGNORECASE)
|
|
reNestedClassMemberNameLink = re.compile('^<td class="colLast"><code><span class="memberNameLink"><a href="[^>]*?".*?>(.*?)</a>', re.IGNORECASE)
|
|
reMemberNameOneLink = re.compile('^<td class="colOne"><code><span class="memberNameLink"><a href="[^>]*#([^>]*?)"', re.IGNORECASE)
|
|
|
|
# the Method detail section at the end
|
|
reMethodDetail = re.compile('^<h3>Method Detail</h3>$', re.IGNORECASE)
|
|
reMethodDetailAnchor = re.compile('^(?:</a>)?<a name="([^>]*?)">$', re.IGNORECASE)
|
|
reMethodOverridden = re.compile('^<dt><strong>(Specified by:|Overrides:)</strong></dt>$', re.IGNORECASE)
|
|
|
|
reTag = re.compile("(?i)<(\/?\w+)((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>")
|
|
|
|
def verifyHTML(s):
|
|
|
|
stack = []
|
|
upto = 0
|
|
while True:
|
|
m = reTag.search(s, upto)
|
|
if m is None:
|
|
break
|
|
tag = m.group(1)
|
|
upto = m.end(0)
|
|
|
|
if tag[:1] == '/':
|
|
justTag = tag[1:]
|
|
else:
|
|
justTag = tag
|
|
|
|
if justTag.lower() in ('br', 'li', 'p', 'col'):
|
|
continue
|
|
|
|
if tag[:1] == '/':
|
|
if len(stack) == 0:
|
|
raise RuntimeError('saw closing "%s" without opening <%s...>' % (m.group(0), tag[1:]))
|
|
elif stack[-1][0] != tag[1:].lower():
|
|
raise RuntimeError('closing "%s" does not match opening "%s"' % (m.group(0), stack[-1][1]))
|
|
stack.pop()
|
|
else:
|
|
stack.append((tag.lower(), m.group(0)))
|
|
|
|
if len(stack) != 0:
|
|
raise RuntimeError('"%s" was never closed' % stack[-1][1])
|
|
|
|
def cleanHTML(s):
|
|
s = reMarkup.sub('', s)
|
|
s = s.replace(' ', ' ')
|
|
s = s.replace('<', '<')
|
|
s = s.replace('>', '>')
|
|
s = s.replace('&', '&')
|
|
return s.strip()
|
|
|
|
reH3 = re.compile('^<h3>(.*?)</h3>', re.IGNORECASE | re.MULTILINE)
|
|
reH4 = re.compile('^<h4>(.*?)</h4>', re.IGNORECASE | re.MULTILINE)
|
|
reDetailsDiv = re.compile('<div class="details">')
|
|
reEndOfClassData = re.compile('<!--.*END OF CLASS DATA.*-->')
|
|
reBlockList = re.compile('<ul class="blockList(?:Last)?">')
|
|
reCloseUl = re.compile('</ul>')
|
|
|
|
def checkClassDetails(fullPath):
|
|
"""
|
|
Checks for invalid HTML in the full javadocs under each field/method.
|
|
"""
|
|
|
|
# TODO: only works with java7 generated javadocs now!
|
|
with open(fullPath, encoding='UTF-8') as f:
|
|
desc = []
|
|
cat = None
|
|
item = None
|
|
errors = []
|
|
inDetailsDiv = False
|
|
blockListDepth = 0
|
|
for line in f.readlines():
|
|
# Skip content up until <div class="details">
|
|
if not inDetailsDiv:
|
|
if reDetailsDiv.match(line) is not None:
|
|
inDetailsDiv = True
|
|
continue
|
|
|
|
# Stop looking at content at closing details </div>, which is just before <!-- === END OF CLASS DATA === -->
|
|
if reEndOfClassData.match(line) is not None:
|
|
if len(desc) != 0:
|
|
try:
|
|
verifyHTML(''.join(desc))
|
|
except RuntimeError as re:
|
|
#print(' FAILED: %s' % re)
|
|
errors.append((cat, item, str(re)))
|
|
break
|
|
|
|
# <ul class="blockList(Last)"> is the boundary between items
|
|
if reBlockList.match(line) is not None:
|
|
blockListDepth += 1
|
|
if len(desc) != 0:
|
|
try:
|
|
verifyHTML(''.join(desc))
|
|
except RuntimeError as re:
|
|
#print(' FAILED: %s' % re)
|
|
errors.append((cat, item, str(re)))
|
|
del desc[:]
|
|
|
|
if blockListDepth == 3:
|
|
desc.append(line)
|
|
|
|
if reCloseUl.match(line) is not None:
|
|
blockListDepth -= 1
|
|
else:
|
|
m = reH3.search(line)
|
|
if m is not None:
|
|
cat = m.group(1)
|
|
else:
|
|
m = reH4.search(line)
|
|
if m is not None:
|
|
item = m.group(1)
|
|
|
|
if len(errors) != 0:
|
|
print()
|
|
print(fullPath)
|
|
for cat, item, message in errors:
|
|
print(' broken details HTML: %s: %s: %s' % (cat, item, message))
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def checkClassSummaries(fullPath):
|
|
#print("check %s" % fullPath)
|
|
|
|
# TODO: only works with java7 generated javadocs now!
|
|
f = open(fullPath, encoding='UTF-8')
|
|
|
|
missing = []
|
|
broken = []
|
|
inThing = False
|
|
lastCaption = None
|
|
lastItem = None
|
|
|
|
desc = None
|
|
|
|
foundMethodDetail = False
|
|
lastMethodAnchor = None
|
|
lineCount = 0
|
|
|
|
for line in f.readlines():
|
|
m = reMethodDetail.search(line)
|
|
lineCount += 1
|
|
if m is not None:
|
|
foundMethodDetail = True
|
|
#print(' got method detail')
|
|
continue
|
|
|
|
# prune methods that are just @Overrides of other interface/classes,
|
|
# they should be specified elsewhere, if they are e.g. jdk or
|
|
# external classes we cannot inherit their docs anyway
|
|
if foundMethodDetail:
|
|
m = reMethodDetailAnchor.search(line)
|
|
if m is not None:
|
|
lastMethodAnchor = m.group(1)
|
|
continue
|
|
isOverrides = '>Overrides:<' in line or '>Specified by:<' in line
|
|
#print('check for removing @overridden method: %s; %s; %s' % (lastMethodAnchor, isOverrides, missing))
|
|
if isOverrides and ('Methods', lastMethodAnchor) in missing:
|
|
#print('removing @overridden method: %s' % lastMethodAnchor)
|
|
missing.remove(('Methods', lastMethodAnchor))
|
|
|
|
m = reCaption.search(line)
|
|
if m is not None:
|
|
lastCaption = m.group(1)
|
|
#print(' caption %s' % lastCaption)
|
|
else:
|
|
m = reJ8Caption.search(line)
|
|
if m is not None:
|
|
lastCaption = m.group(1)
|
|
if not lastCaption.endswith('s'):
|
|
lastCaption += 's'
|
|
#print(' caption %s' % lastCaption)
|
|
|
|
# Try to find the item in question (method/member name):
|
|
for matcher in (reTDLastNested, # nested classes
|
|
reTDLast, # methods etc.
|
|
reColOne, # ctors etc.
|
|
reMemberNameLink, # java 8
|
|
reNestedClassMemberNameLink, # java 8, nested class
|
|
reMemberNameOneLink): # java 8 ctors
|
|
m = matcher.search(line)
|
|
if m is not None:
|
|
lastItem = m.group(1)
|
|
#print(' found item %s; inThing=%s' % (lastItem, inThing))
|
|
break
|
|
|
|
lineLower = line.strip().lower()
|
|
|
|
if lineLower.find('<tr class="') != -1 or lineLower.find('<tr id="') != -1:
|
|
inThing = True
|
|
hasDesc = False
|
|
continue
|
|
|
|
if inThing:
|
|
if lineLower.find('</tr>') != -1:
|
|
#print(' end item %s; hasDesc %s' % (lastItem, hasDesc))
|
|
if not hasDesc:
|
|
if lastItem is None:
|
|
raise RuntimeError('failed to locate javadoc item in %s, line %d? last line: %s' % (fullPath, lineCount, line.rstrip()))
|
|
missing.append((lastCaption, unEscapeURL(lastItem)))
|
|
#print(' add missing; now %d: %s' % (len(missing), str(missing)))
|
|
inThing = False
|
|
continue
|
|
else:
|
|
if line.find('<div class="block">') != -1:
|
|
desc = []
|
|
if desc is not None:
|
|
desc.append(line)
|
|
if line.find('</div>') != -1:
|
|
desc = ''.join(desc)
|
|
|
|
try:
|
|
verifyHTML(desc)
|
|
except RuntimeError as e:
|
|
broken.append((lastCaption, lastItem, str(e)))
|
|
#print('FAIL: %s: %s: %s: %s' % (lastCaption, lastItem, e, desc))
|
|
|
|
desc = desc.replace('<div class="block">', '')
|
|
desc = desc.replace('</div>', '')
|
|
desc = desc.strip()
|
|
hasDesc = len(desc) > 0
|
|
#print(' thing %s: %s' % (lastItem, desc))
|
|
|
|
desc = None
|
|
f.close()
|
|
if len(missing) > 0 or len(broken) > 0:
|
|
print()
|
|
print(fullPath)
|
|
for (caption, item) in missing:
|
|
print(' missing %s: %s' % (caption, item))
|
|
for (caption, item, why) in broken:
|
|
print(' broken HTML: %s: %s: %s' % (caption, item, why))
|
|
return True
|
|
else:
|
|
return False
|
|
|
|
def checkSummary(fullPath):
|
|
printed = False
|
|
f = open(fullPath, encoding='UTF-8')
|
|
anyMissing = False
|
|
sawPackage = False
|
|
desc = []
|
|
lastHREF = None
|
|
for line in f.readlines():
|
|
lineLower = line.strip().lower()
|
|
if desc is not None:
|
|
# TODO: also detect missing description in overview-summary
|
|
if lineLower.startswith('package ') or lineLower.startswith('<h1 title="package" '):
|
|
sawPackage = True
|
|
elif sawPackage:
|
|
if lineLower.startswith('<table ') or lineLower.startswith('<b>see: ') or lineLower.startswith('<p>see:'):
|
|
desc = ' '.join(desc)
|
|
desc = reMarkup.sub(' ', desc)
|
|
desc = desc.strip()
|
|
if desc == '':
|
|
if not printed:
|
|
print()
|
|
print(fullPath)
|
|
printed = True
|
|
print(' no package description (missing package.html in src?)')
|
|
anyMissing = True
|
|
desc = None
|
|
else:
|
|
desc.append(lineLower)
|
|
|
|
if lineLower in ('<td> </td>', '<td></td>', '<td class="collast"> </td>'):
|
|
if not printed:
|
|
print()
|
|
print(fullPath)
|
|
printed = True
|
|
print(' missing: %s' % unescapeHTML(lastHREF))
|
|
anyMissing = True
|
|
elif lineLower.find('licensed to the apache software foundation') != -1 or lineLower.find('copyright 2004 the apache software foundation') != -1:
|
|
if not printed:
|
|
print()
|
|
print(fullPath)
|
|
printed = True
|
|
print(' license-is-javadoc: %s' % unescapeHTML(lastHREF))
|
|
anyMissing = True
|
|
m = reHREF.search(line)
|
|
if m is not None:
|
|
lastHREF = m.group(1)
|
|
if desc is not None and fullPath.find('/overview-summary.html') == -1:
|
|
raise RuntimeError('BUG: failed to locate description in %s' % fullPath)
|
|
f.close()
|
|
return anyMissing
|
|
|
|
def unEscapeURL(s):
|
|
# Not exhaustive!!
|
|
s = s.replace('%20', ' ')
|
|
return s
|
|
|
|
def unescapeHTML(s):
|
|
s = s.replace('<', '<')
|
|
s = s.replace('>', '>')
|
|
s = s.replace('&', '&')
|
|
return s
|
|
|
|
def checkPackageSummaries(root, level='class'):
|
|
"""
|
|
Just checks for blank summary lines in package-summary.html; returns
|
|
True if there are problems.
|
|
"""
|
|
|
|
if level != 'class' and level != 'package' and level != 'method' and level != 'none':
|
|
print('unsupported level: %s, must be "class" or "package" or "method" or "none"' % level)
|
|
sys.exit(1)
|
|
|
|
#for dirPath, dirNames, fileNames in os.walk('%s/lucene/build/docs/api' % root):
|
|
|
|
if False:
|
|
os.chdir(root)
|
|
print()
|
|
print('Run "ant javadocs" > javadocs.log...')
|
|
if os.system('ant javadocs > javadocs.log 2>&1'):
|
|
print(' FAILED')
|
|
sys.exit(1)
|
|
|
|
anyMissing = False
|
|
if not os.path.isdir(root):
|
|
checkClassSummaries(root)
|
|
checkClassDetails(root)
|
|
sys.exit(0)
|
|
|
|
for dirPath, dirNames, fileNames in os.walk(root):
|
|
|
|
if dirPath.find('/all/') != -1:
|
|
# These are dups (this is a bit risk, eg, root IS this /all/ directory..)
|
|
continue
|
|
|
|
if 'package-summary.html' in fileNames:
|
|
if (level == 'class' or level == 'method') and checkSummary('%s/package-summary.html' % dirPath):
|
|
anyMissing = True
|
|
for fileName in fileNames:
|
|
fullPath = '%s/%s' % (dirPath, fileName)
|
|
if not fileName.startswith('package-') and fileName.endswith('.html') and os.path.isfile(fullPath):
|
|
if level == 'method':
|
|
if checkClassSummaries(fullPath):
|
|
anyMissing = True
|
|
# always look for broken html, regardless of level supplied
|
|
if checkClassDetails(fullPath):
|
|
anyMissing = True
|
|
|
|
if 'overview-summary.html' in fileNames:
|
|
if level != 'none' and checkSummary('%s/overview-summary.html' % dirPath):
|
|
anyMissing = True
|
|
|
|
return anyMissing
|
|
|
|
if __name__ == '__main__':
|
|
if len(sys.argv) < 2 or len(sys.argv) > 3:
|
|
print('usage: %s <dir> [none|package|class|method]' % sys.argv[0])
|
|
sys.exit(1)
|
|
if len(sys.argv) == 2:
|
|
level = 'class'
|
|
else:
|
|
level = sys.argv[2]
|
|
if checkPackageSummaries(sys.argv[1], level):
|
|
print()
|
|
print('Missing javadocs were found!')
|
|
sys.exit(1)
|
|
sys.exit(0)
|