try to catch mis-matched tags in the desc

git-svn-id: https://svn.apache.org/repos/asf/lucene/dev/trunk@1378143 13f79535-47bb-0310-9956-ffa450edef68
This commit is contained in:
Michael McCandless 2012-08-28 14:08:01 +00:00
parent a8ba6a23be
commit 2684ba035e
1 changed files with 47 additions and 3 deletions

View File

@ -31,6 +31,39 @@ reMethodDetail = re.compile('^<h3>Method Detail</h3>$', re.IGNORECASE)
reMethodDetailAnchor = re.compile('^(?:</a>)?<a name="([^>]*?)">$', re.IGNORECASE)
reMethodOverridden = re.compile('^<dt><strong>(Specified by:|Overrides:)</strong></dt>$', re.IGNORECASE)
reTag = re.compile("(?i)<(\/?\w+)((\s+\w+(\s*=\s*(?:\".*?\"|'.*?'|[^'\">\s]+))?)+\s*|\s*)\/?>")
def verifyHTML(s):
stack = []
upto = 0
while True:
m = reTag.search(s, upto)
if m is None:
break
tag = m.group(1)
upto = m.end(0)
if tag[:1] == '/':
justTag = tag[1:]
else:
justTag = tag
if justTag.lower() in ('br', 'li', 'p'):
continue
if tag[:1] == '/':
if len(stack) == 0:
raise RuntimeError('saw closing "%s" without opening <%s...>' % (m.group(0), tag[1:]))
elif stack[-1][0] != tag[1:].lower():
raise RuntimeError('closing "%s" does not match opening "%s"' % (m.group(0), stack[-1][1]))
stack.pop()
else:
stack.append((tag.lower(), m.group(0)))
if len(stack) != 0:
raise RuntimeError('"%s" was never closed' % stack[-1][1])
def cleanHTML(s):
s = reMarkup.sub('', s)
s = s.replace('&nbsp;', ' ')
@ -44,6 +77,7 @@ def checkClass(fullPath):
f = open(fullPath, encoding='UTF-8')
missing = []
broken = []
inThing = False
lastCaption = None
lastItem = None
@ -113,17 +147,27 @@ def checkClass(fullPath):
desc.append(line)
if line.find('</div>') != -1:
desc = ''.join(desc)
try:
verifyHTML(desc)
except RuntimeError as e:
broken.append((lastCaption, lastItem, str(e)))
#print('FAIL: %s: %s: %s: %s' % (lastCaption, lastItem, e, desc))
desc = desc.replace('<div class="block">', '')
desc = desc.replace('</div>', '')
desc = desc.strip()
hasDesc = len(desc) > 0
desc = None
f.close()
if len(missing) > 0:
if len(missing) > 0 or len(broken) > 0:
print()
print(fullPath)
for (caption, item) in missing:
print(' missing %s: %s' % (caption, item))
for (caption, item, why) in broken:
print(' broken HTML: %s: %s: %s' % (caption, item, why))
return True
else:
return False