From 1fbe18ef4578cfbe7996b53da9417738e15c4e66 Mon Sep 17 00:00:00 2001
From: Benjamin Peterson <benjamin@python.org>
Date: Tue, 30 Dec 2008 17:44:55 +0000
Subject: [PATCH] add the supporting library for generating PEP 0

---
 pep0/__init__.py  |   1 +
 pep0/constants.py |  40 +++++++
 pep0/output.py    | 183 +++++++++++++++++++++++++++++
 pep0/pep.py       | 287 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 511 insertions(+)
 create mode 100644 pep0/__init__.py
 create mode 100644 pep0/constants.py
 create mode 100644 pep0/output.py
 create mode 100644 pep0/pep.py

diff --git a/pep0/__init__.py b/pep0/__init__.py
new file mode 100644
index 000000000..b7db25411
--- /dev/null
+++ b/pep0/__init__.py
@@ -0,0 +1 @@
+# Empty
diff --git a/pep0/constants.py b/pep0/constants.py
new file mode 100644
index 000000000..d4f7a322c
--- /dev/null
+++ b/pep0/constants.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+title_length = 55
+column_format = (u' %(type)1s%(status)1s %(number)4s  %(title)-' +
+                    unicode(title_length) + u's %(authors)-s')
+
+header = u"""PEP: 0
+Title: Index of Python Enhancement Proposals (PEPs)
+Version: $Revision$
+Last-Modified: $Date$
+Author: David Goodger <goodger@python.org>,
+        Barry A. Warsaw <barry@python.org>
+Status: Active
+Type: Informational
+Created: 13-Jul-2000
+"""
+
+intro = u"""
+    The PEP contains the index of all Python Enhancement Proposals,
+    known as PEPs.  PEP numbers are assigned by the PEP Editor, and
+    once assigned are never changed.  The SVN history[1] of the PEP
+    texts represent their historical record.
+
+    The BDFL maintains his own Pronouncements page[2] at
+    http://www.python.org/doc/essays/pepparade.html which contains his
+    musings on the various outstanding PEPs.
+"""
+
+references = u"""
+    [1] View PEP history online
+        http://svn.python.org/projects/peps/trunk/
+"""
+
+footer = u"""
+Local Variables:
+mode: indented-text
+indent-tabs-mode: nil
+sentence-end-double-space: t
+fill-column: 70
+coding: utf-8
+End:"""
diff --git a/pep0/output.py b/pep0/output.py
new file mode 100644
index 000000000..2053bd7a1
--- /dev/null
+++ b/pep0/output.py
@@ -0,0 +1,183 @@
+"""Code to handle the output of PEP 0."""
+import sys
+import unicodedata
+
+from operator import attrgetter
+
+from . import constants
+from .pep import PEP, PEPError
+
+
+indent = u' '
+
+def write_column_headers(output):
+    """Output the column headers for the PEP indices."""
+    column_headers = {'status': u'', 'type': u'', 'number': u'num',
+                        'title': u'title', 'authors': u'owner'}
+    print>>output, constants.column_format % column_headers
+    underline_headers = {}
+    for key, value in column_headers.items():
+        underline_headers[key] = unicode(len(value) * '-')
+    print>>output, constants.column_format % underline_headers
+
+
+def sort_peps(peps):
+    """Sort PEPs into meta, informational, accepted, open, finished,
+    and essentially dead."""
+    meta = []
+    info = []
+    accepted = []
+    open_ = []
+    finished = []
+    dead = []
+    for pep in peps:
+        # Order of 'if' statement important.  Key Status values take precedence
+        # over Type value, and vice-versa.
+        if pep.type_ == 'Process':
+            meta.append(pep)
+        elif pep.status == 'Draft':
+            open_.append(pep)
+        elif pep.status in ('Rejected', 'Withdrawn', 'Deferred',
+                'Incomplete', 'Replaced'):
+            dead.append(pep)
+        elif pep.type_ == 'Informational':
+            info.append(pep)
+        elif pep.status in ('Accepted', 'Active'):
+            accepted.append(pep)
+        elif pep.status == 'Final':
+            finished.append(pep)
+        else:
+            raise PEPError("unsorted (%s/%s)" %
+                           (pep.type_, pep.status),
+                           pep.filename, pep.number)
+    return meta, info, accepted, open_, finished, dead
+
+
+def verify_email_addresses(peps):
+    authors_dict = {}
+    for pep in peps:
+        for author in pep.authors:
+            # If this is the first time we have come across an author, add him.
+            if author not in authors_dict:
+                authors_dict[author] = [author.email]
+            else:
+                found_emails = authors_dict[author]
+                # If no email exists for the author, use the new value.
+                if not found_emails[0]:
+                    authors_dict[author] = [author.email]
+                # If the new email is an empty string, move on.
+                elif not author.email:
+                    continue
+                # If the email has not been seen, add it to the list.
+                elif author.email not in found_emails:
+                    authors_dict[author].append(author.email)
+
+    valid_authors_dict = {}
+    too_many_emails = []
+    for author, emails in authors_dict.items():
+        if len(emails) > 1:
+            too_many_emails.append((author.first_last, emails))
+        else:
+            valid_authors_dict[author] = emails[0]
+    if too_many_emails:
+        err_output = []
+        for author, emails in too_many_emails:
+            err_output.append("    %s: %r" % (author, emails))
+        raise ValueError("some authors have more than one email address "
+                         "listed:\n" + '\n'.join(err_output))
+
+    return valid_authors_dict
+
+
+def sort_authors(authors_dict):
+    authors_list = authors_dict.keys()
+    authors_list.sort(key=attrgetter('sort_by'))
+    return authors_list
+
+def normalized_last_first(name):
+    return len(unicodedata.normalize('NFC', name.last_first))
+
+
+def write_pep0(peps, output=sys.stdout):
+    print>>output, constants.header
+    print>>output
+    print>>output, u"Introduction"
+    print>>output, constants.intro
+    print>>output
+    print>>output, u"Index by Category"
+    print>>output
+    write_column_headers(output)
+    meta, info, accepted, open_, finished, dead = sort_peps(peps)
+    print>>output
+    print>>output, u" Meta-PEPs (PEPs about PEPs or Processs)"
+    print>>output
+    for pep in meta:
+        print>>output, unicode(pep)
+    print>>output
+    print>>output, u" Other Informational PEPs"
+    print>>output
+    for pep in info:
+        print>>output, unicode(pep)
+    print>>output
+    print>>output, u" Accepted PEPs (accepted; may not be implemented yet)"
+    print>>output
+    for pep in accepted:
+        print>>output, unicode(pep)
+    print>>output
+    print>>output, u" Open PEPs (under consideration)"
+    print>>output
+    for pep in open_:
+        print>>output, unicode(pep)
+    print>>output
+    print>>output, u" Finished PEPs (done, implemented in code repository)"
+    print>>output
+    for pep in finished:
+        print>>output, unicode(pep)
+    print>>output
+    print>>output, u" Deferred, Abandoned, Withdrawn, and Rejected PEPs"
+    print>>output
+    for pep in dead:
+        print>>output, unicode(pep)
+    print>>output
+    print>>output
+    print>>output, u" Numerical Index"
+    print>>output
+    write_column_headers(output)
+    prev_pep = 0
+    for pep in peps:
+        if pep.number - prev_pep > 1:
+            print>>output
+        print>>output, unicode(pep)
+        prev_pep = pep.number
+    print>>output
+    print>>output
+    print>>output, u"Key"
+    print>>output
+    for type_ in PEP.type_values:
+        print>>output, u"    %s - %s PEP" % (type_[0], type_)
+    print>>output
+    for status in PEP.status_values:
+        print>>output, u"    %s - %s proposal" % (status[0], status)
+
+    print>>output
+    print>>output
+    print>>output, u"Owners"
+    print>>output
+    authors_dict = verify_email_addresses(peps)
+    max_name = max(authors_dict.keys(), key=normalized_last_first)
+    max_name_len = len(max_name.last_first)
+    print>>output, u"    %s  %s" % ('name'.ljust(max_name_len), 'email address')
+    print>>output, u"    %s  %s" % ((len('name')*'-').ljust(max_name_len),
+                                    len('email address')*'-')
+    sorted_authors = sort_authors(authors_dict)
+    for author in sorted_authors:
+        # Use the email from authors_dict instead of the one from 'author' as
+        # the author instance may have an empty email.
+        print>>output, (u"    %s  %s" %
+                (author.last_first.ljust(max_name_len), authors_dict[author]))
+    print>>output
+    print>>output
+    print>>output, u"References"
+    print>>output
+    print>>output, constants.references
+    print>>output, constants.footer
diff --git a/pep0/pep.py b/pep0/pep.py
new file mode 100644
index 000000000..cb9f4716b
--- /dev/null
+++ b/pep0/pep.py
@@ -0,0 +1,287 @@
+"""Code for handling object representation of a PEP."""
+import re
+import textwrap
+
+from email.parser import HeaderParser
+
+from . import constants
+
+
+class PEPError(Exception):
+
+    def __init__(self, error, pep_file, pep_number=None):
+        super(PEPError, self).__init__(error)
+        self.filename = pep_file
+        self.number = pep_number
+
+    def __str__(self):
+        error_msg = super(PEPError, self).__str__()
+        if self.number is not None:
+            return "PEP %d: %r" % (self.number, error_msg)
+        else:
+            return "(%s): %r" % (self.filename, error_msg)
+
+
+class Author(object):
+
+    """Represent PEP authors.
+
+    Attributes:
+
+        + first_last : str
+            The author's full name.
+
+        + last_first : str
+            Output the author's name in Last, First, Suffix order.
+
+        + first : str
+            The author's first name.  A middle initial may be included.
+
+        + last : str
+            The author's last name.
+
+        + suffix : str
+            A person's suffix (can be the empty string).
+
+        + sort_by : str
+            Modification of the author's last name that should be used for
+            sorting.
+
+        + email : str
+            The author's email address.
+    """
+
+    def __init__(self, author_and_email_tuple):
+        """Parse the name and email address of an author."""
+        name, email = author_and_email_tuple
+        self.first_last = name.strip()
+        self.email = email.lower()
+        last_name_fragment, suffix = self._last_name(name)
+        name_sep = name.index(last_name_fragment)
+        self.first = name[:name_sep].rstrip()
+        self.last = last_name_fragment
+        self.suffix = suffix
+        if not self.first:
+            self.last_first = self.last
+        else:
+            self.last_first = u', '.join([self.last, self.first])
+            if self.suffix:
+                self.last_first += u', ' + self.suffix
+        if self.last == "van Rossum":
+            # Special case for our beloved BDFL. :)
+            if self.first == "Guido":
+                self.nick = "GvR"
+            elif self.first == "Just":
+                self.nick = "JvR"
+            else:
+                raise ValueError("unkown van Rossum %r!" % self)
+        else:
+            self.nick = self.last
+
+    def __hash__(self):
+        return hash(self.first_last)
+
+    def __eq__(self, other):
+        return self.first_last == other.first_last
+
+    @property
+    def sort_by(self):
+        if u' ' not in self.last:
+            return self.last
+        name_parts = self.last.split()
+        for index, part in enumerate(name_parts):
+            if part[0].isupper():
+                break
+        else:
+            raise ValueError("last name missing a capital letter")
+        return u' '.join(name_parts[index:])
+
+    def _last_name(self, full_name):
+        """Find the last name (or nickname) of a full name.
+
+        If no last name (e.g, 'Aahz') then return the full name.  If there is a
+        leading, lowercase portion to the last name (e.g., 'van' or 'von') then
+        include it.  If there is a suffix (e.g., 'Jr.') that is appended through a
+        comma, then drop the suffix.
+
+        """
+        name_partition = full_name.partition(u',')
+        no_suffix = name_partition[0].strip()
+        suffix = name_partition[2].strip()
+        name_parts = no_suffix.split()
+        part_count = len(name_parts)
+        if part_count == 1 or part_count == 2:
+            return name_parts[-1], suffix
+        else:
+            assert part_count > 2
+            if name_parts[-2].islower():
+                return u' '.join(name_parts[-2:]), suffix
+            else:
+                return name_parts[-1], suffix
+
+
+class PEP(object):
+
+    """Representation of PEPs.
+    
+    Attributes:
+
+        + number : int
+            PEP number.
+
+        + title : str
+            PEP title.
+
+        + type_ : str
+            The type of PEP.  Can only be one of the values from
+            PEP.type_values.
+
+        + status : str
+            The PEP's status.  Value must be found in PEP.status_values.
+
+        + authors : Sequence(Author)
+            A list of the authors.
+    """
+
+    # The various RFC 822 headers that are supported.
+    # The second item in the nested tuples represents if the header is
+    # required or not.
+    headers = (('PEP', True), ('Title', True), ('Version', True),
+                ('Last-Modified', True), ('Author', True),
+                ('Discussions-To', False), ('Status', True), ('Type', True),
+                ('Content-Type', False), ('Requires', False),
+                ('Created', True), ('Python-Version', False),
+                ('Post-History', False), ('Replaces', False),
+                ('Replaced-By', False))
+    # Valid values for the Type header.
+    type_values = (u"Standards Track", u"Informational", u"Process")
+    # Valid values for the Status header.
+    # Active PEPs can only be for Informational or Process PEPs.
+    status_values = (u"Accepted", u"Rejected", u"Withdrawn", u"Deferred", u"Final",
+                     u"Active", u"Draft", u"Replaced")
+
+    def __init__(self, pep_file):
+        """Init object from an open PEP file object."""
+        # Parse the headers.
+        self.filename = pep_file
+        pep_parser = HeaderParser()
+        metadata = pep_parser.parse(pep_file)
+        header_order = iter(self.headers)
+        try:
+            for header_name in metadata.keys():
+                current_header, required = header_order.next()
+                while header_name != current_header and not required:
+                    current_header, required = header_order.next()
+                if header_name != current_header:
+                    raise PEPError("did not deal with "
+                                   "%r before having to handle %r" %
+                                   (header_name, current_header),
+                                   pep_file.name)
+        except StopIteration:
+            raise PEPError("headers missing or out of order",
+                                pep_file.name)
+        required = False
+        try:
+            while not required:
+                current_header, required = header_order.next()
+            else:
+                raise PEPError("PEP is missing its %r" % (current_header,),
+                               pep_file.name)
+        except StopIteration:
+            pass
+        # 'PEP'.
+        try:
+            self.number = int(metadata['PEP'])
+        except ValueError:
+            raise PEPParseError("PEP number isn't an integer", pep_file.name)
+        # 'Title'.
+        self.title = metadata['Title']
+        # 'Type'.
+        type_ = metadata['Type']
+        if type_ not in self.type_values:
+            raise PEPError('%r is not a valid Type value' % (type_,),
+                           pep_file.name, self.number)
+        self.type_ = type_
+        # 'Status'.
+        status = metadata['Status']
+        if status not in self.status_values:
+            raise PEPError("%r is not a valid Status value" %
+                           (status,), pep_file.name, self.number)
+        # Special case for Active PEPs.
+        if (status == u"Active" and
+                self.type_ not in ("Process", "Informational")):
+            raise PEPError("Only Process and Informational PEPs may "
+                           "have an Active status", pep_file.name,
+                           self.number)
+        self.status = status
+        # 'Author'.
+        authors_and_emails = self._parse_author(metadata['Author'])
+        if len(authors_and_emails) < 1:
+            raise PEPError("no authors found", pep_file.name,
+                           self.number)
+        self.authors = map(Author, authors_and_emails)
+
+    def _parse_author(self, data):
+        """Return a list of author names and emails."""
+        # XXX Consider using email.utils.parseaddr (doesn't work with names
+        # lacking an email address.
+        angled = ur'(?P<author>.+?) <(?P<email>.+?)>'
+        paren = ur'(?P<email>.+?) \((?P<author>.+?)\)'
+        simple = ur'(?P<author>[^,]+)'
+        author_list = []
+        for regex in (angled, paren, simple):
+            # Watch out for commas separating multiple names.
+            regex += u'(,\s*)?'
+            for match in re.finditer(regex, data):
+                # Watch out for suffixes like 'Jr.' when they are comma-separated
+                # from the name and thus cause issues when *all* names are only
+                # separated by commas.
+                match_dict = match.groupdict()
+                author = match_dict['author']
+                if not author.partition(' ')[1] and author.endswith('.'):
+                    prev_author = author_list.pop()
+                    author = ', '.join([prev_author, author])
+                if u'email' not in match_dict:
+                    email = ''
+                else:
+                    email = match_dict['email']
+                author_list.append((author, email))
+            else:
+                # If authors were found then stop searching as only expect one
+                # style of author citation.
+                if author_list:
+                    break
+        return author_list
+
+    @property
+    def type_abbr(self):
+        """Return the how the type is to be represented in the index."""
+        return self.type_[0].upper()
+
+    @property
+    def status_abbr(self):
+        """Return how the status should be represented in the index."""
+        if self.status in ('Draft', 'Active'):
+            return u' '
+        else:
+            return self.status[0].upper()
+
+    @property
+    def author_abbr(self):
+        """Return the author list as a comma-separated with only last names."""
+        return u', '.join(x.nick for x in self.authors)
+
+    @property
+    def title_abbr(self):
+        """Shorten the title to be no longer than the max title length."""
+        if len(self.title) <= constants.title_length:
+            return self.title
+        wrapped_title = textwrap.wrap(self.title, constants.title_length - 4)
+        return wrapped_title[0] + u' ...'
+
+    def __unicode__(self):
+        """Return the line entry for the PEP."""
+        pep_info = {'type': self.type_abbr, 'number': str(self.number),
+                'title': self.title_abbr, 'status': self.status_abbr,
+                'authors': self.author_abbr}
+        return constants.column_format % pep_info