#!/usr/bin/env python """ convert PEP's to (X)HTML - courtesy of /F Usage: %(PROGRAM)s [options] [peps] Options: -u/--user SF username -b/--browse After generating the HTML, direct your web browser to view it (using the Python webbrowser module). If both -i and -b are given, this will browse the on-line HTML; otherwise it will browse the local HTML. If no pep arguments are given, this will browse PEP 0. -i/--install After generating the HTML, install it and the plain text source file (.txt) SourceForge. In that case the user's name is used in the scp and ssh commands, unless -u sf_username is given (in which case, it is used instead). Without -i, -u is ignored. -q/--quiet Turn off verbose messages. -h/--help Print this help message and exit. The optional argument `peps' is a list of either pep numbers or .txt files. """ import sys import os import re import cgi import glob import getopt import errno import time PROGRAM = sys.argv[0] RFCURL = 'http://www.faqs.org/rfcs/rfc%d.html' PEPURL = 'pep-%04d.html' PEPCVSURL = 'http://cvs.sourceforge.net/cgi-bin/viewcvs.cgi/python/python/nondist/peps/pep-%04d.txt' PEPDIRRUL = 'http://www.python.org/peps/' HOST = "shell.sourceforge.net" # host for update HDIR = "/home/groups/p/py/python/htdocs/peps" # target host directory LOCALVARS = "Local Variables:" # The generated HTML doesn't validate -- you cannot use
and

inside #
 tags.  But if I change that, the result doesn't look very nice...
DTD = ('')

fixpat = re.compile("((http|ftp):[-_a-zA-Z0-9/.+~:?#$=&,]+)|(pep-\d+(.txt)?)|"
                    "(RFC[- ]?(?P\d+))|"
                    "(PEP\s+(?P\d+))|"
                    ".")

EMPTYSTRING = ''
SPACE = ' '



def usage(code, msg=''):
    print >> sys.stderr, __doc__ % globals()
    if msg:
        print >> sys.stderr, msg
    sys.exit(code)



def fixanchor(current, match):
    text = match.group(0)
    link = None
    if text.startswith('http:') or text.startswith('ftp:'):
        # Strip off trailing punctuation.  Pattern taken from faqwiz.
        ltext = list(text)
        while ltext:
            c = ltext.pop()
            if c not in '();:,.?\'"<>':
                ltext.append(c)
                break
        link = EMPTYSTRING.join(ltext)
    elif text.startswith('pep-') and text <> current:
        link = os.path.splitext(text)[0] + ".html"
    elif text.startswith('PEP'):
        pepnum = int(match.group('pepnum'))
        link = PEPURL % pepnum
    elif text.startswith('RFC'):
        rfcnum = int(match.group('rfcnum'))
        link = RFCURL % rfcnum
    if link:
        return '%s' % (link, cgi.escape(text))
    return cgi.escape(match.group(0)) # really slow, but it works...



def fixfile(infile, outfile):
    basename = os.path.basename(infile)
    # convert plain text pep to minimal XHTML markup
    try:
        fi = open(infile)
    except IOError, e:
        if e.errno <> errno.ENOENT: raise
        print >> sys.stderr, 'Error: Skipping missing PEP file:', e.filename
        return
    fo = open(outfile, "w")
    print >> fo, DTD
    print >> fo, ''
    print >> fo, ''
    # head
    header = []
    pep = ""
    title = ""
    while 1:
        line = fi.readline()
        if not line.strip():
            break
        if line[0].strip():
            if ":" not in line:
                break
            key, value = line.split(":", 1)
            value = value.strip()
            header.append((key, value))
        else:
            # continuation line
            key, value = header[-1]
            value = value + line
            header[-1] = key, value
        if key.lower() == "title":
            title = value
        elif key.lower() == "pep":
            pep = value
    if pep:
        title = "PEP " + pep + " -- " + title
    if title:
        print >> fo, '  %s' % cgi.escape(title)
        print >> fo, '  '
    print >> fo, ''
    # body
    print >> fo, ''
    print >> fo, ''
    print >> fo, '
\n' for k, v in header: if k.lower() in ('author', 'discussions-to'): mailtos = [] for addr in v.split(): if '@' in addr: mailtos.append( '%s' % (addr, pep, addr)) elif addr.startswith('http:'): mailtos.append( '%s' % (addr, addr)) else: mailtos.append(addr) v = SPACE.join(mailtos) elif k.lower() in ('replaces', 'replaced-by'): otherpeps = '' for otherpep in v.split(): otherpep = int(otherpep) otherpeps += '%i ' % (otherpep, otherpep) v = otherpeps elif k.lower() in ('last-modified',): url = PEPCVSURL % int(pep) date = v or time.strftime('%d-%b-%Y', time.localtime(os.stat(infile)[8])) v = '%s ' % (url, cgi.escape(date)) else: v = cgi.escape(v) print >> fo, ' ' % ( cgi.escape(k), v) print >> fo, '
%s:%s
' print >> fo, '
' print >> fo, '
' print >> fo, '
'
    while 1:
        line = fi.readline()
        if not line:
            break
        if line[0] == '\f':
            continue
        if line.strip() == LOCALVARS:
            break
        if line[0].strip():
            if line.strip() == LOCALVARS:
                break
            print >> fo, '
' print >> fo, '

%s

' % line.strip() print >> fo, '
',
        else:
            # PEP 0 has some special treatment
            if basename == 'pep-0000.txt':
                parts = line.split()
                if len(parts) > 1 and re.match(r'\s*\d{1,4}', parts[1]):
                    # This is a PEP summary line, which we need to hyperlink
                    url = PEPURL % int(parts[1])
                    print >> fo, re.sub(
                        parts[1],
                        '%s' % (url, parts[1]),
                        line, 1),
                    continue
                elif parts and '@' in parts[-1]:
                    # This is a pep email address line, so hyperlink it
                    url = '%s' % (parts[-1], parts[-1])
                    print >> fo, re.sub(
                        parts[-1], url, line, 1),
                    continue
            line = fixpat.sub(lambda x, c=infile: fixanchor(c, x), line)
            fo.write(line)
    print >> fo, '
' print >> fo, '' print >> fo, '' fo.close() os.chmod(outfile, 0664) def find_pep(pep_str): """Find the .txt file indicated by a cmd line argument""" if os.path.exists(pep_str): return pep_str num = int(pep_str) return "pep-%04d.txt" % num def make_html(file, verbose=0): newfile = os.path.splitext(file)[0] + ".html" if verbose: print file, "->", newfile fixfile(file, newfile) return newfile def push_pep(htmlfiles, txtfiles, username, verbose): if verbose: quiet = "" else: quiet = "-q" if username: username = username + "@" target = username + HOST + ":" + HDIR files = htmlfiles[:] files.extend(txtfiles) files.append("style.css") filelist = SPACE.join(files) rc = os.system("scp %s %s %s" % (quiet, filelist, target)) if rc: sys.exit(rc) rc = os.system("ssh %s%s chmod 664 %s/*" % (username, HOST, HDIR)) if rc: sys.exit(rc) def browse_file(pep): import webbrowser file = find_pep(pep) if file.endswith(".txt"): file = file[:-3] + "html" file = os.path.abspath(file) url = "file:" + file webbrowser.open(url) def browse_remote(pep): import webbrowser file = find_pep(pep) if file.endswith(".txt"): file = file[:-3] + "html" url = PEPDIRRUL + file webbrowser.open(url) def main(): # defaults update = 0 username = '' verbose = 1 browse = 0 try: opts, args = getopt.getopt( sys.argv[1:], 'bihqu:', ['browse', 'install', 'help', 'quiet', 'user=']) except getopt.error, msg: usage(1, msg) for opt, arg in opts: if opt in ('-h', '--help'): usage(0) elif opt in ('-i', '--install'): update = 1 elif opt in ('-u', '--user'): username = arg elif opt in ('-q', '--quiet'): verbose = 0 elif opt in ('-b', '--browse'): browse = 1 if args: peptxt = [] html = [] for pep in args: file = find_pep(pep) peptxt.append(file) newfile = make_html(file, verbose=verbose) html.append(newfile) if browse and not update: browse_file(pep) else: # do them all peptxt = [] files = glob.glob("pep-*.txt") files.sort() for file in files: peptxt.append(file) make_html(file, verbose=verbose) html = ["pep-*.html"] if browse and not update: browse_file("0") if update: push_pep(html, peptxt, username, verbose) if browse: if args: for pep in args: browse_remote(pep) else: browse_remote("0") if __name__ == "__main__": main()