') reEndOfClassData = re.compile('') reBlockList = re.compile('

') reCloseUl = re.compile('') def checkClassDetails(fullPath): """ Checks for invalid HTML in the full javadocs under each field/method. """ # TODO: only works with java7 generated javadocs now! with open(fullPath, encoding='UTF-8') as f: desc = [] cat = None item = None errors = [] inDetailsDiv = False blockListDepth = 0 for line in f.readlines(): # Skip content up until

if not inDetailsDiv: if reDetailsDiv.match(line) is not None: inDetailsDiv = True continue # Stop looking at content at closing details

, which is just before if reEndOfClassData.match(line) is not None: if len(desc) != 0: try: verifyHTML(''.join(desc)) except RuntimeError as re: #print(' FAILED: %s' % re) errors.append((cat, item, str(re))) break #

') != -1: desc = [] if desc is not None: desc.append(line) if line.find('

', '') desc = desc.replace('

see: ') or lineLower.startswith('
see:') or lineLower.startswith(''): desc = ' '.join(desc) desc = reMarkup.sub(' ', desc) desc = desc.strip() if desc == '': if not printed: print() print(fullPath) printed = True print(' no package description (missing package.html in src?)') anyMissing = True desc = None else: desc.append(lineLower) if lineLower in (' ', '', ' '): if not printed: print() print(fullPath) printed = True print(' missing description: %s' % unescapeHTML(lastHREF)) anyMissing = True elif lineLower.find('licensed to the apache software foundation') != -1 or lineLower.find('copyright 2004 the apache software foundation') != -1: if not printed: print() print(fullPath) printed = True print(' license-is-javadoc: %s' % unescapeHTML(lastHREF)) anyMissing = True m = reHREF.search(line) if m is not None: lastHREF = m.group(1) if desc is not None and fullPath.find('/overview-summary.html') == -1: raise RuntimeError('BUG: failed to locate description in %s' % fullPath) f.close() return anyMissing def unEscapeURL(s): # Not exhaustive!! s = s.replace('%20', ' ') s = s.replace('%5B', '[') s = s.replace('%5D', ']') return s def unescapeHTML(s): s = s.replace('<', '<') s = s.replace('>', '>') s = s.replace('&', '&') return s def checkPackageSummaries(root, level='class'): """ Just checks for blank summary lines in package-summary.html; returns True if there are problems. """ if level != 'class' and level != 'package' and level != 'method' and level != 'none': print('unsupported level: %s, must be "class" or "package" or "method" or "none"' % level) sys.exit(1) #for dirPath, dirNames, fileNames in os.walk('%s/lucene/build/docs/api' % root): if False: os.chdir(root) print() print('Run "ant javadocs" > javadocs.log...') if os.system('ant javadocs > javadocs.log 2>&1'): print(' FAILED') sys.exit(1) anyMissing = False if not os.path.isdir(root): checkClassSummaries(root) checkClassDetails(root) sys.exit(0) for dirPath, dirNames, fileNames in os.walk(root): if dirPath.find('/all/') != -1: # These are dups (this is a bit risk, eg, root IS this /all/ directory..) continue if 'package-summary.html' in fileNames: if (level == 'class' or level == 'method') and checkSummary('%s/package-summary.html' % dirPath): anyMissing = True for fileName in fileNames: fullPath = '%s/%s' % (dirPath, fileName) if not fileName.startswith('package-') and fileName.endswith('.html') and os.path.isfile(fullPath): if level == 'method': if checkClassSummaries(fullPath): anyMissing = True # always look for broken html, regardless of level supplied if checkClassDetails(fullPath): anyMissing = True if 'overview-summary.html' in fileNames: if level != 'none' and checkSummary('%s/overview-summary.html' % dirPath): anyMissing = True return anyMissing if name == 'main': if len(sys.argv) < 2 or len(sys.argv) > 3: print('usage: %s
[none|package|class|method]' % sys.argv[0]) sys.exit(1) if len(sys.argv) == 2: level = 'class' else: level = sys.argv[2] if checkPackageSummaries(sys.argv[1], level): print() print('Missing javadocs were found!') sys.exit(1) sys.exit(0)

(.*?) Summary

(.*?)

(.*?)