python-peps/pep2html.py

738 lines
25 KiB
Python
Raw Normal View History

2021-07-13 15:19:58 -04:00
#!/usr/bin/env python3.9
"""Convert PEPs to (X)HTML - courtesy of /F
Usage: %(PROGRAM)s [options] [<peps> ...]
Options:
-u, --user
python.org username
-b, --browse
After generating the HTML, direct your web browser to view it
(using the Python webbrowser module). If both -i and -b are
given, this will browse the on-line HTML; otherwise it will
browse the local HTML. If no pep arguments are given, this
will browse PEP 0.
-i, --install
After generating the HTML, install it and the plaintext source file
(.txt) on python.org. In that case the user's name is used in the scp
and ssh commands, unless "-u username" is given (in which case, it is
used instead). Without -i, -u is ignored.
-l, --local
Same as -i/--install, except install on the local machine. Use this
2005-10-12 12:08:28 -04:00
when logged in to the python.org machine (dinsdale).
-q, --quiet
Turn off verbose messages.
-h, --help
Print this help message and exit.
2001-11-12 09:58:07 -05:00
The optional arguments ``peps`` are either pep numbers, .rst or .txt files.
"""
from __future__ import print_function, unicode_literals
import sys
import os
import re
import glob
import getopt
import errno
import random
import time
from io import open
from pathlib import Path
try:
from html import escape
except ImportError:
from cgi import escape
from docutils import core, nodes, utils
from docutils.readers import standalone
from docutils.transforms import frontmatter, peps, Transform
from docutils.parsers import rst
class DataError(Exception):
pass
REQUIRES = {'python': '2.6',
'docutils': '0.2.7'}
PROGRAM = sys.argv[0]
RFCURL = 'http://www.faqs.org/rfcs/rfc%d.html'
PEPURL = 'pep-%04d.html'
2014-10-07 16:04:45 -04:00
PEPCVSURL = ('https://hg.python.org/peps/file/tip/pep-%04d.txt')
PEPDIRRUL = 'http://www.python.org/peps/'
2005-10-12 12:08:28 -04:00
HOST = "dinsdale.python.org" # host for update
HDIR = "/data/ftp.python.org/pub/www.python.org/peps" # target host directory
LOCALVARS = "Local Variables:"
COMMENT = """<!--
This HTML is auto-generated. DO NOT EDIT THIS FILE! If you are writing a new
PEP, see http://www.python.org/peps/pep-0001.html for instructions and links
to templates. DO NOT USE THIS HTML FILE AS YOUR TEMPLATE!
-->"""
# The generated HTML doesn't validate -- you cannot use <hr> and <h3> inside
# <pre> tags. But if I change that, the result doesn't look very nice...
DTD = ('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"\n'
' "http://www.w3.org/TR/REC-html40/loose.dtd">')
fixpat = re.compile(r"((https?|ftp):[-_a-zA-Z0-9/.+~:?#$=&,]+)|(pep-\d+(.txt|.rst)?)|"
r"(RFC[- ]?(?P<rfcnum>\d+))|"
r"(PEP\s+(?P<pepnum>\d+))|"
r".")
EMPTYSTRING = ''
SPACE = ' '
COMMASPACE = ', '
def usage(code, msg=''):
"""Print usage message and exit. Uses stderr if code != 0."""
if code == 0:
out = sys.stdout
else:
out = sys.stderr
print(__doc__ % globals(), file=out)
if msg:
print(msg, file=out)
sys.exit(code)
def fixanchor(current, match):
text = match.group(0)
link = None
if (text.startswith('http:') or text.startswith('https:')
or text.startswith('ftp:')):
# Strip off trailing punctuation. Pattern taken from faqwiz.
ltext = list(text)
while ltext:
c = ltext.pop()
if c not in '''();:,.?'"<>''':
ltext.append(c)
break
link = EMPTYSTRING.join(ltext)
elif text.startswith('pep-') and text != current:
link = os.path.splitext(text)[0] + ".html"
elif text.startswith('PEP'):
pepnum = int(match.group('pepnum'))
link = PEPURL % pepnum
elif text.startswith('RFC'):
rfcnum = int(match.group('rfcnum'))
link = RFCURL % rfcnum
if link:
return '<a href="%s">%s</a>' % (escape(link), escape(text))
return escape(match.group(0)) # really slow, but it works...
NON_MASKED_EMAILS = [
'peps@python.org',
'python-list@python.org',
'python-dev@python.org',
]
def fixemail(address, pepno):
if address.lower() in NON_MASKED_EMAILS:
# return hyperlinked version of email address
return linkemail(address, pepno)
else:
# return masked version of email address
parts = address.split('@', 1)
return '%s&#32;&#97;t&#32;%s' % (parts[0], parts[1])
def linkemail(address, pepno):
parts = address.split('@', 1)
return ('<a href="mailto:%s&#64;%s?subject=PEP%%20%s">'
'%s&#32;&#97;t&#32;%s</a>'
% (parts[0], parts[1], pepno, parts[0], parts[1]))
def fixfile(inpath, input_lines, outfile):
try:
from email.Utils import parseaddr
except ImportError:
from email.utils import parseaddr
basename = os.path.basename(inpath)
infile = iter(input_lines)
# convert plaintext pep to minimal XHTML markup
print(DTD, file=outfile)
print('<html>', file=outfile)
print(COMMENT, file=outfile)
print('<head>', file=outfile)
# head
header = []
pep = ""
title = ""
for line in infile:
if not line.strip():
break
if line[0].strip():
if ":" not in line:
break
key, value = line.split(":", 1)
value = value.strip()
header.append((key, value))
else:
# continuation line
key, value = header[-1]
value = value + line
header[-1] = key, value
if key.lower() == "title":
title = value
elif key.lower() == "pep":
pep = value
if pep:
title = "PEP " + pep + " -- " + title
if title:
print(' <title>%s</title>' % escape(title), file=outfile)
r = random.choice(list(range(64)))
print((
' <link rel="STYLESHEET" href="style.css" type="text/css" />\n'
'</head>\n'
'<body bgcolor="white">\n'
'<table class="navigation" cellpadding="0" cellspacing="0"\n'
' width="100%%" border="0">\n'
'<tr><td class="navicon" width="150" height="35">\n'
'<a href="../" title="Python Home Page">\n'
'<img src="../pics/PyBanner%03d.gif" alt="[Python]"\n'
' border="0" width="150" height="35" /></a></td>\n'
'<td class="textlinks" align="left">\n'
'[<b><a href="../">Python Home</a></b>]' % r), file=outfile)
if basename != 'pep-0000.txt':
print('[<b><a href=".">PEP Index</a></b>]', file=outfile)
if pep:
try:
print(('[<b><a href="pep-%04d.txt">PEP Source</a>'
'</b>]' % int(pep)), file=outfile)
except ValueError as error:
print(('ValueError (invalid PEP number): %s'
% error), file=sys.stderr)
print('</td></tr></table>', file=outfile)
print('<div class="header">\n<table border="0">', file=outfile)
for k, v in header:
if k.lower() in ('author', 'pep-delegate', 'bdfl-delegate', 'discussions-to',
'sponsor'):
mailtos = []
for part in re.split(r',\s*', v):
if '@' in part:
realname, addr = parseaddr(part)
if k.lower() == 'discussions-to':
m = linkemail(addr, pep)
else:
m = fixemail(addr, pep)
mailtos.append('%s &lt;%s&gt;' % (realname, m))
elif part.startswith('http:'):
mailtos.append(
'<a href="%s">%s</a>' % (part, part))
else:
mailtos.append(part)
v = COMMASPACE.join(mailtos)
elif k.lower() in ('replaces', 'superseded-by', 'requires'):
otherpeps = ''
for otherpep in re.split(r',?\s+', v):
otherpep = int(otherpep)
otherpeps += '<a href="pep-%04d.html">%i</a> ' % (otherpep,
otherpep)
v = otherpeps
elif k.lower() in ('last-modified',):
date = v or time.strftime('%d-%b-%Y',
time.localtime(os.stat(inpath)[8]))
if date.startswith('$' 'Date: ') and date.endswith(' $'):
date = date[6:-2]
if basename == 'pep-0000.txt':
v = date
else:
try:
url = PEPCVSURL % int(pep)
v = '<a href="%s">%s</a> ' % (url, escape(date))
except ValueError as error:
v = date
elif k.lower() in ('content-type',):
url = PEPURL % 9
pep_type = v or 'text/plain'
v = '<a href="%s">%s</a> ' % (url, escape(pep_type))
2014-10-07 12:12:47 -04:00
elif k.lower() == 'version':
if v.startswith('$' 'Revision: ') and v.endswith(' $'):
v = escape(v[11:-2])
else:
v = escape(v)
print(' <tr><th>%s:&nbsp;</th><td>%s</td></tr>' \
% (escape(k), v), file=outfile)
print('</table>', file=outfile)
print('</div>', file=outfile)
print('<hr />', file=outfile)
print('<div class="content">', file=outfile)
need_pre = 1
for line in infile:
if line[0] == '\f':
continue
if line.strip() == LOCALVARS:
break
if line[0].strip():
if not need_pre:
print('</pre>', file=outfile)
print('<h3>%s</h3>' % line.strip(), file=outfile)
need_pre = 1
elif not line.strip() and need_pre:
continue
else:
# PEP 0 has some special treatment
if basename == 'pep-0000.txt':
parts = line.split()
if len(parts) > 1 and re.match(r'\s*\d{1,4}', parts[1]):
# This is a PEP summary line, which we need to hyperlink
url = PEPURL % int(parts[1])
if need_pre:
print('<pre>', file=outfile)
need_pre = 0
print(re.sub(
parts[1],
'<a href="%s">%s</a>' % (url, parts[1]),
line, 1), end='', file=outfile)
continue
elif parts and '@' in parts[-1]:
# This is a pep email address line, so filter it.
url = fixemail(parts[-1], pep)
if need_pre:
print('<pre>', file=outfile)
need_pre = 0
print(re.sub(
parts[-1], url, line, 1), end='', file=outfile)
continue
line = fixpat.sub(lambda x, c=inpath: fixanchor(c, x), line)
if need_pre:
print('<pre>', file=outfile)
need_pre = 0
outfile.write(line)
if not need_pre:
print('</pre>', file=outfile)
print('</div>', file=outfile)
print('</body>', file=outfile)
print('</html>', file=outfile)
2002-10-18 01:19:08 -04:00
docutils_settings = None
"""Runtime settings object used by Docutils. Can be set by the client
application when this module is imported."""
class PEPHeaders(Transform):
"""
Process fields in a PEP's initial RFC-2822 header.
"""
default_priority = 360
pep_url = 'pep-%04d'
pep_cvs_url = PEPCVSURL
rcs_keyword_substitutions = (
(re.compile(r'\$' r'RCSfile: (.+),v \$$', re.IGNORECASE), r'\1'),
(re.compile(r'\$[a-zA-Z]+: (.+) \$$'), r'\1'),)
def apply(self):
if not len(self.document):
# @@@ replace these DataErrors with proper system messages
raise DataError('Document tree is empty.')
header = self.document[0]
if not isinstance(header, nodes.field_list) or \
'rfc2822' not in header['classes']:
raise DataError('Document does not begin with an RFC-2822 '
'header; it is not a PEP.')
pep = None
for field in header:
if field[0].astext().lower() == 'pep': # should be the first field
value = field[1].astext()
try:
pep = int(value)
cvs_url = self.pep_cvs_url % pep
except ValueError:
pep = value
cvs_url = None
msg = self.document.reporter.warning(
'"PEP" header must contain an integer; "%s" is an '
'invalid value.' % pep, base_node=field)
msgid = self.document.set_id(msg)
prb = nodes.problematic(value, value or '(none)',
refid=msgid)
prbid = self.document.set_id(prb)
msg.add_backref(prbid)
if len(field[1]):
field[1][0][:] = [prb]
else:
field[1] += nodes.paragraph('', '', prb)
break
if pep is None:
raise DataError('Document does not contain an RFC-2822 "PEP" '
'header.')
if pep == 0:
# Special processing for PEP 0.
2017-11-10 02:21:15 -05:00
pending = nodes.pending(peps.PEPZero)
self.document.insert(1, pending)
self.document.note_pending(pending)
if len(header) < 2 or header[1][0].astext().lower() != 'title':
raise DataError('No title!')
for field in header:
name = field[0].astext().lower()
body = field[1]
if len(body) > 1:
raise DataError('PEP header field body contains multiple '
'elements:\n%s' % field.pformat(level=1))
elif len(body) == 1:
if not isinstance(body[0], nodes.paragraph):
raise DataError('PEP header field body may only contain '
'a single paragraph:\n%s'
% field.pformat(level=1))
elif name == 'last-modified':
date = time.strftime(
'%d-%b-%Y',
time.localtime(os.stat(self.document['source'])[8]))
if cvs_url:
body += nodes.paragraph(
'', '', nodes.reference('', date, refuri=cvs_url))
else:
# empty
continue
para = body[0]
if name in ('author', 'bdfl-delegate', 'pep-delegate', 'sponsor'):
for node in para:
if isinstance(node, nodes.reference):
node.replace_self(peps.mask_email(node))
elif name == 'discussions-to':
for node in para:
if isinstance(node, nodes.reference):
node.replace_self(peps.mask_email(node, pep))
elif name in ('replaces', 'superseded-by', 'requires'):
newbody = []
space = nodes.Text(' ')
for refpep in re.split(r',?\s+', body.astext()):
pepno = int(refpep)
newbody.append(nodes.reference(
refpep, refpep,
refuri=(self.document.settings.pep_base_url
+ self.pep_url % pepno)))
newbody.append(space)
para[:] = newbody[:-1] # drop trailing space
elif name == 'last-modified':
utils.clean_rcs_keywords(para, self.rcs_keyword_substitutions)
if cvs_url:
date = para.astext()
para[:] = [nodes.reference('', date, refuri=cvs_url)]
elif name == 'content-type':
pep_type = para.astext()
uri = self.document.settings.pep_base_url + self.pep_url % 12
para[:] = [nodes.reference('', pep_type, refuri=uri)]
elif name == 'version' and len(body):
utils.clean_rcs_keywords(para, self.rcs_keyword_substitutions)
class PEPFooter(Transform):
"""Remove the References section if it is empty when rendered."""
# Set low priority so ref targets aren't removed before they are needed
default_priority = 999
def apply(self):
pep_source_path = Path(self.document['source'])
if not pep_source_path.match('pep-*'):
return # not a PEP file, exit early
# Iterate through sections from the end of the document
for section in reversed(self.document):
if not isinstance(section, nodes.section):
continue
title_words = section[0].astext().lower().split()
if 'references' in title_words:
# Remove references section if there are no displayed
# footnotes (it only has title & link target nodes)
if all(isinstance(ref_node, (nodes.title, nodes.target))
for ref_node in section):
section.parent.remove(section)
break
class PEPReader(standalone.Reader):
supported = ('pep',)
"""Contexts this reader supports."""
settings_spec = (
'PEP Reader Option Defaults',
'The --pep-references and --rfc-references options (for the '
'reStructuredText parser) are on by default.',
())
config_section = 'pep reader'
config_section_dependencies = ('readers', 'standalone reader')
def get_transforms(self):
transforms = standalone.Reader.get_transforms(self)
# We have PEP-specific frontmatter handling.
transforms.remove(frontmatter.DocTitle)
transforms.remove(frontmatter.SectionSubTitle)
transforms.remove(frontmatter.DocInfo)
transforms.extend([PEPHeaders, peps.Contents, PEPFooter])
return transforms
settings_default_overrides = {'pep_references': 1, 'rfc_references': 1}
inliner_class = rst.states.Inliner
def __init__(self, parser=None, parser_name=None):
"""`parser` should be ``None``."""
if parser is None:
parser = rst.Parser(rfc2822=True, inliner=self.inliner_class())
standalone.Reader.__init__(self, parser, '')
def fix_rst_pep(inpath, input_lines, outfile):
output = core.publish_string(
source=''.join(input_lines),
source_path=inpath,
destination_path=outfile.name,
reader=PEPReader(),
parser_name='restructuredtext',
writer_name='pep_html',
settings=docutils_settings,
# Allow Docutils traceback if there's an exception:
2016-07-05 13:14:38 -04:00
settings_overrides={'traceback': 1, 'halt_level': 2})
outfile.write(output.decode('utf-8'))
def get_pep_type(input_lines):
"""
Return the Content-Type of the input. "text/plain" is the default.
Return ``None`` if the input is not a PEP.
"""
pep_type = None
for line in input_lines:
line = line.rstrip().lower()
if not line:
# End of the RFC 2822 header (first blank line).
break
elif line.startswith('content-type: '):
pep_type = line.split()[1] or 'text/plain'
break
elif line.startswith('pep: '):
# Default PEP type, used if no explicit content-type specified:
pep_type = 'text/plain'
return pep_type
def get_input_lines(inpath):
try:
infile = open(inpath, encoding='utf-8')
except IOError as e:
if e.errno != errno.ENOENT: raise
print('Error: Skipping missing PEP file:', e.filename, file=sys.stderr)
sys.stderr.flush()
return None
lines = infile.read().splitlines(1) # handles x-platform line endings
infile.close()
return lines
def find_pep(pep_str):
"""Find the .rst or .txt file indicated by a cmd line argument"""
if os.path.exists(pep_str):
return pep_str
num = int(pep_str)
rstpath = "pep-%04d.rst" % num
if os.path.exists(rstpath):
return rstpath
return "pep-%04d.txt" % num
def make_html(inpath, verbose=0):
input_lines = get_input_lines(inpath)
if input_lines is None:
return None
pep_type = get_pep_type(input_lines)
if pep_type is None:
print('Error: Input file %s is not a PEP.' % inpath, file=sys.stderr)
sys.stdout.flush()
return None
elif pep_type not in PEP_TYPE_DISPATCH:
print(('Error: Unknown PEP type for input file %s: %s'
% (inpath, pep_type)), file=sys.stderr)
sys.stdout.flush()
return None
2021-09-21 15:57:20 -04:00
elif PEP_TYPE_DISPATCH[pep_type] is None:
pep_type_error(inpath, pep_type)
return None
outpath = os.path.splitext(inpath)[0] + ".html"
if verbose:
print(inpath, "(%s)" % pep_type, "->", outpath)
sys.stdout.flush()
outfile = open(outpath, "w", encoding='utf-8')
PEP_TYPE_DISPATCH[pep_type](inpath, input_lines, outfile)
outfile.close()
os.chmod(outfile.name, 0o664)
return outpath
def push_pep(htmlfiles, txtfiles, username, verbose, local=0):
quiet = ""
if local:
if verbose:
quiet = "-v"
target = HDIR
copy_cmd = "cp"
chmod_cmd = "chmod"
else:
if not verbose:
quiet = "-q"
if username:
username = username + "@"
target = username + HOST + ":" + HDIR
copy_cmd = "scp"
chmod_cmd = "ssh %s%s chmod" % (username, HOST)
files = htmlfiles[:]
files.extend(txtfiles)
files.append("style.css")
files.append("pep.css")
filelist = SPACE.join(files)
rc = os.system("%s %s %s %s" % (copy_cmd, quiet, filelist, target))
if rc:
sys.exit(rc)
## rc = os.system("%s 664 %s/*" % (chmod_cmd, HDIR))
## if rc:
## sys.exit(rc)
PEP_TYPE_DISPATCH = {'text/plain': fixfile,
'text/x-rst': fix_rst_pep}
PEP_TYPE_MESSAGES = {}
def check_requirements():
# Check Python:
# This is pretty much covered by the __future__ imports...
if sys.version_info < (2, 6, 0):
PEP_TYPE_DISPATCH['text/plain'] = None
PEP_TYPE_MESSAGES['text/plain'] = (
'Python %s or better required for "%%(pep_type)s" PEP '
'processing; %s present (%%(inpath)s).'
% (REQUIRES['python'], sys.version.split()[0]))
# Check Docutils:
try:
import docutils
except ImportError:
PEP_TYPE_DISPATCH['text/x-rst'] = None
PEP_TYPE_MESSAGES['text/x-rst'] = (
'Docutils not present for "%(pep_type)s" PEP file %(inpath)s. '
'See README.rst for installation.')
else:
installed = [int(part) for part in docutils.__version__.split('.')]
required = [int(part) for part in REQUIRES['docutils'].split('.')]
if installed < required:
PEP_TYPE_DISPATCH['text/x-rst'] = None
PEP_TYPE_MESSAGES['text/x-rst'] = (
'Docutils must be reinstalled for "%%(pep_type)s" PEP '
'processing (%%(inpath)s). Version %s or better required; '
'%s present. See README.rst for installation.'
% (REQUIRES['docutils'], docutils.__version__))
def pep_type_error(inpath, pep_type):
print('Error: ' + PEP_TYPE_MESSAGES[pep_type] % locals(), file=sys.stderr)
sys.stdout.flush()
def browse_file(pep):
import webbrowser
file = find_pep(pep)
if file.startswith('pep-') and file.endswith((".txt", '.rst')):
file = file[:-3] + "html"
file = os.path.abspath(file)
url = "file:" + file
webbrowser.open(url)
def browse_remote(pep):
import webbrowser
file = find_pep(pep)
if file.startswith('pep-') and file.endswith((".txt", '.rst')):
file = file[:-3] + "html"
url = PEPDIRRUL + file
webbrowser.open(url)
def main(argv=None):
# defaults
update = 0
local = 0
username = ''
verbose = 1
browse = 0
check_requirements()
if argv is None:
argv = sys.argv[1:]
try:
opts, args = getopt.getopt(
argv, 'bilhqu:',
['browse', 'install', 'local', 'help', 'quiet', 'user='])
except getopt.error as msg:
usage(1, msg)
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt in ('-i', '--install'):
update = 1
elif opt in ('-l', '--local'):
update = 1
local = 1
elif opt in ('-u', '--user'):
username = arg
elif opt in ('-q', '--quiet'):
verbose = 0
elif opt in ('-b', '--browse'):
browse = 1
if args:
pep_list = []
html = []
for pep in args:
file = find_pep(pep)
pep_list.append(file)
newfile = make_html(file, verbose=verbose)
if newfile:
html.append(newfile)
if browse and not update:
browse_file(pep)
else:
# do them all
pep_list = []
html = []
files = glob.glob("pep-*.txt") + glob.glob("pep-*.rst")
files.sort()
for file in files:
pep_list.append(file)
newfile = make_html(file, verbose=verbose)
if newfile:
html.append(newfile)
if browse and not update:
browse_file("0")
if update:
push_pep(html, pep_list, username, verbose, local=local)
if browse:
if args:
for pep in args:
browse_remote(pep)
else:
browse_remote("0")
if __name__ == "__main__":
main()