python-peps/pep2html.py

400 lines
12 KiB
Python
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python
"""
convert PEP's to (X)HTML - courtesy of /F
Usage: %(PROGRAM)s [options] [peps]
Options:
-u/--user
SF username
-b/--browse
After generating the HTML, direct your web browser to view it
(using the Python webbrowser module). If both -i and -b are
given, this will browse the on-line HTML; otherwise it will
browse the local HTML. If no pep arguments are given, this
will browse PEP 0.
-i/--install
After generating the HTML, install it and the plain text source file
(.txt) SourceForge. In that case the user's name is used in the scp
and ssh commands, unless -u sf_username is given (in which case, it is
used instead). Without -i, -u is ignored.
-q/--quiet
Turn off verbose messages.
-h/--help
Print this help message and exit.
The optional argument `peps' is a list of either pep numbers or .txt files.
"""
# Requires Python 2.2
import sys
import os
import re
import cgi
import glob
import getopt
import errno
import random
import time
from email.Utils import parseaddr
PROGRAM = sys.argv[0]
RFCURL = 'http://www.faqs.org/rfcs/rfc%d.html'
PEPURL = 'pep-%04d.html'
PEPCVSURL = 'http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/python/python/nondist/peps/pep-%04d.txt'
PEPDIRRUL = 'http://www.python.org/peps/'
HOST = "www.python.org" # host for update
HDIR = "/ftp/ftp.python.org/pub/www.python.org/peps" # target host directory
LOCALVARS = "Local Variables:"
# The generated HTML doesn't validate -- you cannot use <hr> and <h3> inside
# <pre> tags. But if I change that, the result doesn't look very nice...
DTD = ('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"\n'
' "http://www.w3.org/TR/REC-html40/loose.dtd">')
fixpat = re.compile("((http|ftp):[-_a-zA-Z0-9/.+~:?#$=&,]+)|(pep-\d+(.txt)?)|"
"(RFC[- ]?(?P<rfcnum>\d+))|"
"(PEP\s+(?P<pepnum>\d+))|"
".")
EMPTYSTRING = ''
SPACE = ' '
COMMASPACE = ', '
def usage(code, msg=''):
print >> sys.stderr, __doc__ % globals()
if msg:
print >> sys.stderr, msg
sys.exit(code)
def fixanchor(current, match):
text = match.group(0)
link = None
if text.startswith('http:') or text.startswith('ftp:'):
# Strip off trailing punctuation. Pattern taken from faqwiz.
ltext = list(text)
while ltext:
c = ltext.pop()
if c not in '();:,.?\'"<>':
ltext.append(c)
break
link = EMPTYSTRING.join(ltext)
elif text.startswith('pep-') and text <> current:
link = os.path.splitext(text)[0] + ".html"
elif text.startswith('PEP'):
pepnum = int(match.group('pepnum'))
link = PEPURL % pepnum
elif text.startswith('RFC'):
rfcnum = int(match.group('rfcnum'))
link = RFCURL % rfcnum
if link:
return '<a href="%s">%s</a>' % (link, cgi.escape(text))
return cgi.escape(match.group(0)) # really slow, but it works...
NON_MASKED_EMAILS = [
'peps@python.org',
'python-list@python.org',
'python-dev@python.org',
]
def fixemail(address, pepno):
if address.lower() in NON_MASKED_EMAILS:
# return hyperlinked version of email address
return linkemail(address, pepno)
else:
# return masked version of email address
parts = address.split('@', 1)
return '%s&#32;&#97;t&#32;%s' % (parts[0], parts[1])
def linkemail(address, pepno):
parts = address.split('@', 1)
return ('<a href="mailto:%s&#64;%s?subject=PEP%%20%s">'
'%s&#32;&#97;t&#32;%s</a>'
% (parts[0], parts[1], pepno, parts[0], parts[1]))
def fixfile(infile, outfile):
basename = os.path.basename(infile)
# convert plain text pep to minimal XHTML markup
try:
fi = open(infile)
except IOError, e:
if e.errno <> errno.ENOENT: raise
print >> sys.stderr, 'Error: Skipping missing PEP file:', e.filename
return
fo = open(outfile, "w")
print >> fo, DTD
print >> fo, '<html>'
print >> fo, '<head>'
# head
header = []
pep = ""
title = ""
while 1:
line = fi.readline()
if not line.strip():
break
if line[0].strip():
if ":" not in line:
break
key, value = line.split(":", 1)
value = value.strip()
header.append((key, value))
else:
# continuation line
key, value = header[-1]
value = value + line
header[-1] = key, value
if key.lower() == "title":
title = value
elif key.lower() == "pep":
pep = value
if pep:
title = "PEP " + pep + " -- " + title
if title:
print >> fo, ' <title>%s</title>' % cgi.escape(title)
print >> fo, ' <link rel="STYLESHEET" href="style.css" type="text/css">'
print >> fo, '</head>'
# body
print >> fo, '<body bgcolor="white" marginwidth="0" marginheight="0">'
print >> fo, '<table class="navigation" cellpadding="0" cellspacing="0"'
print >> fo, ' width="100%" border="0">'
print >> fo, '<tr><td class="navicon" width="150" height="35">'
r = random.choice(range(64))
print >> fo, '<a href="../" title="Python Home Page">'
print >> fo, '<img src="../pics/PyBanner%03d.gif" alt="[Python]"' % r
print >> fo, ' border="0" width="150" height="35" /></a></td>'
print >> fo, '<td class="textlinks" align="left">'
print >> fo, '[<b><a href="../">Python Home</a></b>]'
if basename <> 'pep-0000.txt':
print >> fo, '[<b><a href=".">PEP Index</a></b>]'
if pep:
print >> fo, '[<b><a href="pep-%04d.txt">PEP Source</a></b>]' \
% int(pep)
print >> fo, '</td></tr></table>'
print >> fo, '<div class="header">\n<table border="0">'
for k, v in header:
if k.lower() in ('author', 'discussions-to'):
mailtos = []
for part in re.split(',\s*', v):
print 'part:', part
if '@' in part:
realname, addr = parseaddr(part)
if k.lower() == 'discussions-to':
m = linkemail(addr, pep)
else:
m = fixemail(addr, pep)
mailtos.append('%s &lt;%s&gt;' % (realname, m))
elif part.startswith('http:'):
mailtos.append(
'<a href="%s">%s</a>' % (part, part))
else:
mailtos.append(part)
print 'mailtos:', mailtos
v = COMMASPACE.join(mailtos)
elif k.lower() in ('replaces', 'replaced-by'):
otherpeps = ''
for otherpep in v.split():
otherpep = int(otherpep)
otherpeps += '<a href="pep-%04d.html">%i</a> ' % (otherpep,
otherpep)
v = otherpeps
elif k.lower() in ('last-modified',):
url = PEPCVSURL % int(pep)
date = v or time.strftime('%d-%b-%Y',
time.localtime(os.stat(infile)[8]))
v = '<a href="%s">%s</a> ' % (url, cgi.escape(date))
else:
v = cgi.escape(v)
print >> fo, ' <tr><th>%s:&nbsp;</th><td>%s</td></tr>' \
% (cgi.escape(k), v)
print >> fo, '</table>'
print >> fo, '</div>'
print >> fo, '<hr />'
print >> fo, '<div class="content">'
need_pre = 1
while 1:
line = fi.readline()
if not line:
break
if line[0] == '\f':
continue
if line.strip() == LOCALVARS:
break
if line[0].strip():
if line.strip() == LOCALVARS:
break
if not need_pre:
print >> fo, '</pre>'
print >> fo, '<h3>%s</h3>' % line.strip()
need_pre = 1
elif not line.strip() and need_pre:
continue
else:
# PEP 0 has some special treatment
if basename == 'pep-0000.txt':
parts = line.split()
if len(parts) > 1 and re.match(r'\s*\d{1,4}', parts[1]):
# This is a PEP summary line, which we need to hyperlink
url = PEPURL % int(parts[1])
if need_pre:
print >> fo, '<pre>'
need_pre = 0
print >> fo, re.sub(
parts[1],
'<a href="%s">%s</a>' % (url, parts[1]),
line, 1),
continue
elif parts and '@' in parts[-1]:
# This is a pep email address line, so filter it.
url = fixemail(parts[-1], pep)
if need_pre:
print >> fo, '<pre>'
need_pre = 0
print >> fo, re.sub(
parts[-1], url, line, 1),
continue
line = fixpat.sub(lambda x, c=infile: fixanchor(c, x), line)
if need_pre:
print >> fo, '<pre>'
need_pre = 0
fo.write(line)
if not need_pre:
print >> fo, '</pre>'
print >> fo, '</div>'
print >> fo, '</body>'
print >> fo, '</html>'
fo.close()
os.chmod(outfile, 0664)
def find_pep(pep_str):
"""Find the .txt file indicated by a cmd line argument"""
if os.path.exists(pep_str):
return pep_str
num = int(pep_str)
return "pep-%04d.txt" % num
def make_html(file, verbose=0):
newfile = os.path.splitext(file)[0] + ".html"
if verbose:
print file, "->", newfile
fixfile(file, newfile)
return newfile
def push_pep(htmlfiles, txtfiles, username, verbose):
if verbose:
quiet = ""
else:
quiet = "-q"
if username:
username = username + "@"
target = username + HOST + ":" + HDIR
files = htmlfiles[:]
files.extend(txtfiles)
files.append("style.css")
filelist = SPACE.join(files)
rc = os.system("scp %s %s %s" % (quiet, filelist, target))
if rc:
sys.exit(rc)
rc = os.system("ssh %s%s chmod 664 %s/*" % (username, HOST, HDIR))
if rc:
sys.exit(rc)
def browse_file(pep):
import webbrowser
file = find_pep(pep)
if file.endswith(".txt"):
file = file[:-3] + "html"
file = os.path.abspath(file)
url = "file:" + file
webbrowser.open(url)
def browse_remote(pep):
import webbrowser
file = find_pep(pep)
if file.endswith(".txt"):
file = file[:-3] + "html"
url = PEPDIRRUL + file
webbrowser.open(url)
def main():
# defaults
update = 0
username = ''
verbose = 1
browse = 0
try:
opts, args = getopt.getopt(
sys.argv[1:], 'bihqu:',
['browse', 'install', 'help', 'quiet', 'user='])
except getopt.error, msg:
usage(1, msg)
for opt, arg in opts:
if opt in ('-h', '--help'):
usage(0)
elif opt in ('-i', '--install'):
update = 1
elif opt in ('-u', '--user'):
username = arg
elif opt in ('-q', '--quiet'):
verbose = 0
elif opt in ('-b', '--browse'):
browse = 1
if args:
peptxt = []
html = []
for pep in args:
file = find_pep(pep)
peptxt.append(file)
newfile = make_html(file, verbose=verbose)
html.append(newfile)
if browse and not update:
browse_file(pep)
else:
# do them all
peptxt = []
files = glob.glob("pep-*.txt")
files.sort()
for file in files:
peptxt.append(file)
make_html(file, verbose=verbose)
html = ["pep-*.html"]
if browse and not update:
browse_file("0")
if update:
push_pep(html, peptxt, username, verbose)
if browse:
if args:
for pep in args:
browse_remote(pep)
else:
browse_remote("0")
if __name__ == "__main__":
main()