From 1fbe18ef4578cfbe7996b53da9417738e15c4e66 Mon Sep 17 00:00:00 2001 From: Benjamin Peterson Date: Tue, 30 Dec 2008 17:44:55 +0000 Subject: [PATCH] add the supporting library for generating PEP 0 --- pep0/__init__.py | 1 + pep0/constants.py | 40 +++++++ pep0/output.py | 183 +++++++++++++++++++++++++++++ pep0/pep.py | 287 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 511 insertions(+) create mode 100644 pep0/__init__.py create mode 100644 pep0/constants.py create mode 100644 pep0/output.py create mode 100644 pep0/pep.py diff --git a/pep0/__init__.py b/pep0/__init__.py new file mode 100644 index 000000000..b7db25411 --- /dev/null +++ b/pep0/__init__.py @@ -0,0 +1 @@ +# Empty diff --git a/pep0/constants.py b/pep0/constants.py new file mode 100644 index 000000000..d4f7a322c --- /dev/null +++ b/pep0/constants.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +title_length = 55 +column_format = (u' %(type)1s%(status)1s %(number)4s %(title)-' + + unicode(title_length) + u's %(authors)-s') + +header = u"""PEP: 0 +Title: Index of Python Enhancement Proposals (PEPs) +Version: $Revision$ +Last-Modified: $Date$ +Author: David Goodger , + Barry A. Warsaw +Status: Active +Type: Informational +Created: 13-Jul-2000 +""" + +intro = u""" + The PEP contains the index of all Python Enhancement Proposals, + known as PEPs. PEP numbers are assigned by the PEP Editor, and + once assigned are never changed. The SVN history[1] of the PEP + texts represent their historical record. + + The BDFL maintains his own Pronouncements page[2] at + http://www.python.org/doc/essays/pepparade.html which contains his + musings on the various outstanding PEPs. +""" + +references = u""" + [1] View PEP history online + http://svn.python.org/projects/peps/trunk/ +""" + +footer = u""" +Local Variables: +mode: indented-text +indent-tabs-mode: nil +sentence-end-double-space: t +fill-column: 70 +coding: utf-8 +End:""" diff --git a/pep0/output.py b/pep0/output.py new file mode 100644 index 000000000..2053bd7a1 --- /dev/null +++ b/pep0/output.py @@ -0,0 +1,183 @@ +"""Code to handle the output of PEP 0.""" +import sys +import unicodedata + +from operator import attrgetter + +from . import constants +from .pep import PEP, PEPError + + +indent = u' ' + +def write_column_headers(output): + """Output the column headers for the PEP indices.""" + column_headers = {'status': u'', 'type': u'', 'number': u'num', + 'title': u'title', 'authors': u'owner'} + print>>output, constants.column_format % column_headers + underline_headers = {} + for key, value in column_headers.items(): + underline_headers[key] = unicode(len(value) * '-') + print>>output, constants.column_format % underline_headers + + +def sort_peps(peps): + """Sort PEPs into meta, informational, accepted, open, finished, + and essentially dead.""" + meta = [] + info = [] + accepted = [] + open_ = [] + finished = [] + dead = [] + for pep in peps: + # Order of 'if' statement important. Key Status values take precedence + # over Type value, and vice-versa. + if pep.type_ == 'Process': + meta.append(pep) + elif pep.status == 'Draft': + open_.append(pep) + elif pep.status in ('Rejected', 'Withdrawn', 'Deferred', + 'Incomplete', 'Replaced'): + dead.append(pep) + elif pep.type_ == 'Informational': + info.append(pep) + elif pep.status in ('Accepted', 'Active'): + accepted.append(pep) + elif pep.status == 'Final': + finished.append(pep) + else: + raise PEPError("unsorted (%s/%s)" % + (pep.type_, pep.status), + pep.filename, pep.number) + return meta, info, accepted, open_, finished, dead + + +def verify_email_addresses(peps): + authors_dict = {} + for pep in peps: + for author in pep.authors: + # If this is the first time we have come across an author, add him. + if author not in authors_dict: + authors_dict[author] = [author.email] + else: + found_emails = authors_dict[author] + # If no email exists for the author, use the new value. + if not found_emails[0]: + authors_dict[author] = [author.email] + # If the new email is an empty string, move on. + elif not author.email: + continue + # If the email has not been seen, add it to the list. + elif author.email not in found_emails: + authors_dict[author].append(author.email) + + valid_authors_dict = {} + too_many_emails = [] + for author, emails in authors_dict.items(): + if len(emails) > 1: + too_many_emails.append((author.first_last, emails)) + else: + valid_authors_dict[author] = emails[0] + if too_many_emails: + err_output = [] + for author, emails in too_many_emails: + err_output.append(" %s: %r" % (author, emails)) + raise ValueError("some authors have more than one email address " + "listed:\n" + '\n'.join(err_output)) + + return valid_authors_dict + + +def sort_authors(authors_dict): + authors_list = authors_dict.keys() + authors_list.sort(key=attrgetter('sort_by')) + return authors_list + +def normalized_last_first(name): + return len(unicodedata.normalize('NFC', name.last_first)) + + +def write_pep0(peps, output=sys.stdout): + print>>output, constants.header + print>>output + print>>output, u"Introduction" + print>>output, constants.intro + print>>output + print>>output, u"Index by Category" + print>>output + write_column_headers(output) + meta, info, accepted, open_, finished, dead = sort_peps(peps) + print>>output + print>>output, u" Meta-PEPs (PEPs about PEPs or Processs)" + print>>output + for pep in meta: + print>>output, unicode(pep) + print>>output + print>>output, u" Other Informational PEPs" + print>>output + for pep in info: + print>>output, unicode(pep) + print>>output + print>>output, u" Accepted PEPs (accepted; may not be implemented yet)" + print>>output + for pep in accepted: + print>>output, unicode(pep) + print>>output + print>>output, u" Open PEPs (under consideration)" + print>>output + for pep in open_: + print>>output, unicode(pep) + print>>output + print>>output, u" Finished PEPs (done, implemented in code repository)" + print>>output + for pep in finished: + print>>output, unicode(pep) + print>>output + print>>output, u" Deferred, Abandoned, Withdrawn, and Rejected PEPs" + print>>output + for pep in dead: + print>>output, unicode(pep) + print>>output + print>>output + print>>output, u" Numerical Index" + print>>output + write_column_headers(output) + prev_pep = 0 + for pep in peps: + if pep.number - prev_pep > 1: + print>>output + print>>output, unicode(pep) + prev_pep = pep.number + print>>output + print>>output + print>>output, u"Key" + print>>output + for type_ in PEP.type_values: + print>>output, u" %s - %s PEP" % (type_[0], type_) + print>>output + for status in PEP.status_values: + print>>output, u" %s - %s proposal" % (status[0], status) + + print>>output + print>>output + print>>output, u"Owners" + print>>output + authors_dict = verify_email_addresses(peps) + max_name = max(authors_dict.keys(), key=normalized_last_first) + max_name_len = len(max_name.last_first) + print>>output, u" %s %s" % ('name'.ljust(max_name_len), 'email address') + print>>output, u" %s %s" % ((len('name')*'-').ljust(max_name_len), + len('email address')*'-') + sorted_authors = sort_authors(authors_dict) + for author in sorted_authors: + # Use the email from authors_dict instead of the one from 'author' as + # the author instance may have an empty email. + print>>output, (u" %s %s" % + (author.last_first.ljust(max_name_len), authors_dict[author])) + print>>output + print>>output + print>>output, u"References" + print>>output + print>>output, constants.references + print>>output, constants.footer diff --git a/pep0/pep.py b/pep0/pep.py new file mode 100644 index 000000000..cb9f4716b --- /dev/null +++ b/pep0/pep.py @@ -0,0 +1,287 @@ +"""Code for handling object representation of a PEP.""" +import re +import textwrap + +from email.parser import HeaderParser + +from . import constants + + +class PEPError(Exception): + + def __init__(self, error, pep_file, pep_number=None): + super(PEPError, self).__init__(error) + self.filename = pep_file + self.number = pep_number + + def __str__(self): + error_msg = super(PEPError, self).__str__() + if self.number is not None: + return "PEP %d: %r" % (self.number, error_msg) + else: + return "(%s): %r" % (self.filename, error_msg) + + +class Author(object): + + """Represent PEP authors. + + Attributes: + + + first_last : str + The author's full name. + + + last_first : str + Output the author's name in Last, First, Suffix order. + + + first : str + The author's first name. A middle initial may be included. + + + last : str + The author's last name. + + + suffix : str + A person's suffix (can be the empty string). + + + sort_by : str + Modification of the author's last name that should be used for + sorting. + + + email : str + The author's email address. + """ + + def __init__(self, author_and_email_tuple): + """Parse the name and email address of an author.""" + name, email = author_and_email_tuple + self.first_last = name.strip() + self.email = email.lower() + last_name_fragment, suffix = self._last_name(name) + name_sep = name.index(last_name_fragment) + self.first = name[:name_sep].rstrip() + self.last = last_name_fragment + self.suffix = suffix + if not self.first: + self.last_first = self.last + else: + self.last_first = u', '.join([self.last, self.first]) + if self.suffix: + self.last_first += u', ' + self.suffix + if self.last == "van Rossum": + # Special case for our beloved BDFL. :) + if self.first == "Guido": + self.nick = "GvR" + elif self.first == "Just": + self.nick = "JvR" + else: + raise ValueError("unkown van Rossum %r!" % self) + else: + self.nick = self.last + + def __hash__(self): + return hash(self.first_last) + + def __eq__(self, other): + return self.first_last == other.first_last + + @property + def sort_by(self): + if u' ' not in self.last: + return self.last + name_parts = self.last.split() + for index, part in enumerate(name_parts): + if part[0].isupper(): + break + else: + raise ValueError("last name missing a capital letter") + return u' '.join(name_parts[index:]) + + def _last_name(self, full_name): + """Find the last name (or nickname) of a full name. + + If no last name (e.g, 'Aahz') then return the full name. If there is a + leading, lowercase portion to the last name (e.g., 'van' or 'von') then + include it. If there is a suffix (e.g., 'Jr.') that is appended through a + comma, then drop the suffix. + + """ + name_partition = full_name.partition(u',') + no_suffix = name_partition[0].strip() + suffix = name_partition[2].strip() + name_parts = no_suffix.split() + part_count = len(name_parts) + if part_count == 1 or part_count == 2: + return name_parts[-1], suffix + else: + assert part_count > 2 + if name_parts[-2].islower(): + return u' '.join(name_parts[-2:]), suffix + else: + return name_parts[-1], suffix + + +class PEP(object): + + """Representation of PEPs. + + Attributes: + + + number : int + PEP number. + + + title : str + PEP title. + + + type_ : str + The type of PEP. Can only be one of the values from + PEP.type_values. + + + status : str + The PEP's status. Value must be found in PEP.status_values. + + + authors : Sequence(Author) + A list of the authors. + """ + + # The various RFC 822 headers that are supported. + # The second item in the nested tuples represents if the header is + # required or not. + headers = (('PEP', True), ('Title', True), ('Version', True), + ('Last-Modified', True), ('Author', True), + ('Discussions-To', False), ('Status', True), ('Type', True), + ('Content-Type', False), ('Requires', False), + ('Created', True), ('Python-Version', False), + ('Post-History', False), ('Replaces', False), + ('Replaced-By', False)) + # Valid values for the Type header. + type_values = (u"Standards Track", u"Informational", u"Process") + # Valid values for the Status header. + # Active PEPs can only be for Informational or Process PEPs. + status_values = (u"Accepted", u"Rejected", u"Withdrawn", u"Deferred", u"Final", + u"Active", u"Draft", u"Replaced") + + def __init__(self, pep_file): + """Init object from an open PEP file object.""" + # Parse the headers. + self.filename = pep_file + pep_parser = HeaderParser() + metadata = pep_parser.parse(pep_file) + header_order = iter(self.headers) + try: + for header_name in metadata.keys(): + current_header, required = header_order.next() + while header_name != current_header and not required: + current_header, required = header_order.next() + if header_name != current_header: + raise PEPError("did not deal with " + "%r before having to handle %r" % + (header_name, current_header), + pep_file.name) + except StopIteration: + raise PEPError("headers missing or out of order", + pep_file.name) + required = False + try: + while not required: + current_header, required = header_order.next() + else: + raise PEPError("PEP is missing its %r" % (current_header,), + pep_file.name) + except StopIteration: + pass + # 'PEP'. + try: + self.number = int(metadata['PEP']) + except ValueError: + raise PEPParseError("PEP number isn't an integer", pep_file.name) + # 'Title'. + self.title = metadata['Title'] + # 'Type'. + type_ = metadata['Type'] + if type_ not in self.type_values: + raise PEPError('%r is not a valid Type value' % (type_,), + pep_file.name, self.number) + self.type_ = type_ + # 'Status'. + status = metadata['Status'] + if status not in self.status_values: + raise PEPError("%r is not a valid Status value" % + (status,), pep_file.name, self.number) + # Special case for Active PEPs. + if (status == u"Active" and + self.type_ not in ("Process", "Informational")): + raise PEPError("Only Process and Informational PEPs may " + "have an Active status", pep_file.name, + self.number) + self.status = status + # 'Author'. + authors_and_emails = self._parse_author(metadata['Author']) + if len(authors_and_emails) < 1: + raise PEPError("no authors found", pep_file.name, + self.number) + self.authors = map(Author, authors_and_emails) + + def _parse_author(self, data): + """Return a list of author names and emails.""" + # XXX Consider using email.utils.parseaddr (doesn't work with names + # lacking an email address. + angled = ur'(?P.+?) <(?P.+?)>' + paren = ur'(?P.+?) \((?P.+?)\)' + simple = ur'(?P[^,]+)' + author_list = [] + for regex in (angled, paren, simple): + # Watch out for commas separating multiple names. + regex += u'(,\s*)?' + for match in re.finditer(regex, data): + # Watch out for suffixes like 'Jr.' when they are comma-separated + # from the name and thus cause issues when *all* names are only + # separated by commas. + match_dict = match.groupdict() + author = match_dict['author'] + if not author.partition(' ')[1] and author.endswith('.'): + prev_author = author_list.pop() + author = ', '.join([prev_author, author]) + if u'email' not in match_dict: + email = '' + else: + email = match_dict['email'] + author_list.append((author, email)) + else: + # If authors were found then stop searching as only expect one + # style of author citation. + if author_list: + break + return author_list + + @property + def type_abbr(self): + """Return the how the type is to be represented in the index.""" + return self.type_[0].upper() + + @property + def status_abbr(self): + """Return how the status should be represented in the index.""" + if self.status in ('Draft', 'Active'): + return u' ' + else: + return self.status[0].upper() + + @property + def author_abbr(self): + """Return the author list as a comma-separated with only last names.""" + return u', '.join(x.nick for x in self.authors) + + @property + def title_abbr(self): + """Shorten the title to be no longer than the max title length.""" + if len(self.title) <= constants.title_length: + return self.title + wrapped_title = textwrap.wrap(self.title, constants.title_length - 4) + return wrapped_title[0] + u' ...' + + def __unicode__(self): + """Return the line entry for the PEP.""" + pep_info = {'type': self.type_abbr, 'number': str(self.number), + 'title': self.title_abbr, 'status': self.status_abbr, + 'authors': self.author_abbr} + return constants.column_format % pep_info