python-peps/pep0/pep.py

# -*- coding: utf-8 -*-
"""Code for handling object representation of a PEP."""
import re
import textwrap
import unicodedata

from email.parser import HeaderParser

from . import constants


class PEPError(Exception):

    def __init__(self, error, pep_file, pep_number=None):
        super(PEPError, self).__init__(error)
        self.filename = pep_file
        self.number = pep_number

    def __str__(self):
        error_msg = super(PEPError, self).__str__()
        if self.number is not None:
            return "PEP %d: %r" % (self.number, error_msg)
        else:
            return "(%s): %r" % (self.filename, error_msg)


class Author(object):

    """Represent PEP authors.

    Attributes:

        + first_last : str
            The author's full name.

        + last_first : str
            Output the author's name in Last, First, Suffix order.

        + first : str
            The author's first name.  A middle initial may be included.

        + last : str
            The author's last name.

        + suffix : str
            A person's suffix (can be the empty string).

        + sort_by : str
            Modification of the author's last name that should be used for
            sorting.

        + email : str
            The author's email address.
    """

    def __init__(self, author_and_email_tuple):
        """Parse the name and email address of an author."""
        name, email = author_and_email_tuple
        self.first_last = name.strip()
        self.email = email.lower()
        last_name_fragment, suffix = self._last_name(name)
        name_sep = name.index(last_name_fragment)
        self.first = name[:name_sep].rstrip()
        self.last = last_name_fragment
        self.suffix = suffix
        if not self.first:
            self.last_first = self.last
        else:
            self.last_first = u', '.join([self.last, self.first])
            if self.suffix:
                self.last_first += u', ' + self.suffix
        if self.last == "van Rossum":
            # Special case for our beloved BDFL. :)
            if self.first == "Guido":
                self.nick = "GvR"
            elif self.first == "Just":
                self.nick = "JvR"
            else:
                raise ValueError("unkown van Rossum %r!" % self)
            self.last_first += " (%s)" % (self.nick,)
        else:
            self.nick = self.last

    def __hash__(self):
        return hash(self.first_last)

    def __eq__(self, other):
        return self.first_last == other.first_last

    @property
    def sort_by(self):
        name_parts = self.last.split()
        for index, part in enumerate(name_parts):
            if part[0].isupper():
                break
        else:
            raise ValueError("last name missing a capital letter")
        base = u' '.join(name_parts[index:]).lower()
        return unicodedata.normalize('NFKD', base).encode('ASCII', 'ignore')

    def _last_name(self, full_name):
        """Find the last name (or nickname) of a full name.

        If no last name (e.g, 'Aahz') then return the full name.  If there is a
        leading, lowercase portion to the last name (e.g., 'van' or 'von') then
        include it.  If there is a suffix (e.g., 'Jr.') that is appended through a
        comma, then drop the suffix.

        """
        name_partition = full_name.partition(u',')
        no_suffix = name_partition[0].strip()
        suffix = name_partition[2].strip()
        name_parts = no_suffix.split()
        part_count = len(name_parts)
        if part_count == 1 or part_count == 2:
            return name_parts[-1], suffix
        else:
            assert part_count > 2
            if name_parts[-2].islower():
                return u' '.join(name_parts[-2:]), suffix
            else:
                return name_parts[-1], suffix


class PEP(object):

    """Representation of PEPs.

    Attributes:

        + number : int
            PEP number.

        + title : str
            PEP title.

        + type_ : str
            The type of PEP.  Can only be one of the values from
            PEP.type_values.

        + status : str
            The PEP's status.  Value must be found in PEP.status_values.

        + authors : Sequence(Author)
            A list of the authors.
    """

    # The various RFC 822 headers that are supported.
    # The second item in the nested tuples represents if the header is
    # required or not.
    headers = (('PEP', True), ('Title', True), ('Version', True),
                ('Last-Modified', True), ('Author', True),
                ('Discussions-To', False), ('Status', True), ('Type', True),
                ('Content-Type', False), ('Requires', False),
                ('Created', True), ('Python-Version', False),
                ('Post-History', False), ('Replaces', False),
                ('Replaced-By', False))
    # Valid values for the Type header.
    type_values = (u"Standards Track", u"Informational", u"Process")
    # Valid values for the Status header.
    # Active PEPs can only be for Informational or Process PEPs.
    status_values = (u"Accepted", u"Rejected", u"Withdrawn", u"Deferred", u"Final",
                     u"Active", u"Draft", u"Replaced")

    def __init__(self, pep_file):
        """Init object from an open PEP file object."""
        # Parse the headers.
        self.filename = pep_file
        pep_parser = HeaderParser()
        metadata = pep_parser.parse(pep_file)
        header_order = iter(self.headers)
        try:
            for header_name in metadata.keys():
                current_header, required = header_order.next()
                while header_name != current_header and not required:
                    current_header, required = header_order.next()
                if header_name != current_header:
                    raise PEPError("did not deal with "
                                   "%r before having to handle %r" %
                                   (header_name, current_header),
                                   pep_file.name)
        except StopIteration:
            raise PEPError("headers missing or out of order",
                                pep_file.name)
        required = False
        try:
            while not required:
                current_header, required = header_order.next()
            else:
                raise PEPError("PEP is missing its %r" % (current_header,),
                               pep_file.name)
        except StopIteration:
            pass
        # 'PEP'.
        try:
            self.number = int(metadata['PEP'])
        except ValueError:
            raise PEPParseError("PEP number isn't an integer", pep_file.name)
        # 'Title'.
        self.title = metadata['Title']
        # 'Type'.
        type_ = metadata['Type']
        if type_ not in self.type_values:
            raise PEPError('%r is not a valid Type value' % (type_,),
                           pep_file.name, self.number)
        self.type_ = type_
        # 'Status'.
        status = metadata['Status']
        if status not in self.status_values:
            raise PEPError("%r is not a valid Status value" %
                           (status,), pep_file.name, self.number)
        # Special case for Active PEPs.
        if (status == u"Active" and
                self.type_ not in ("Process", "Informational")):
            raise PEPError("Only Process and Informational PEPs may "
                           "have an Active status", pep_file.name,
                           self.number)
        self.status = status
        # 'Author'.
        authors_and_emails = self._parse_author(metadata['Author'])
        if len(authors_and_emails) < 1:
            raise PEPError("no authors found", pep_file.name,
                           self.number)
        self.authors = map(Author, authors_and_emails)

    def _parse_author(self, data):
        """Return a list of author names and emails."""
        # XXX Consider using email.utils.parseaddr (doesn't work with names
        # lacking an email address.
        angled = ur'(?P<author>.+?) <(?P<email>.+?)>'
        paren = ur'(?P<email>.+?) \((?P<author>.+?)\)'
        simple = ur'(?P<author>[^,]+)'
        author_list = []
        for regex in (angled, paren, simple):
            # Watch out for commas separating multiple names.
            regex += u'(,\s*)?'
            for match in re.finditer(regex, data):
                # Watch out for suffixes like 'Jr.' when they are comma-separated
                # from the name and thus cause issues when *all* names are only
                # separated by commas.
                match_dict = match.groupdict()
                author = match_dict['author']
                if not author.partition(' ')[1] and author.endswith('.'):
                    prev_author = author_list.pop()
                    author = ', '.join([prev_author, author])
                if u'email' not in match_dict:
                    email = ''
                else:
                    email = match_dict['email']
                author_list.append((author, email))
            else:
                # If authors were found then stop searching as only expect one
                # style of author citation.
                if author_list:
                    break
        return author_list

    @property
    def type_abbr(self):
        """Return the how the type is to be represented in the index."""
        return self.type_[0].upper()

    @property
    def status_abbr(self):
        """Return how the status should be represented in the index."""
        if self.status in ('Draft', 'Active'):
            return u' '
        else:
            return self.status[0].upper()

    @property
    def author_abbr(self):
        """Return the author list as a comma-separated with only last names."""
        return u', '.join(x.nick for x in self.authors)

    @property
    def title_abbr(self):
        """Shorten the title to be no longer than the max title length."""
        if len(self.title) <= constants.title_length:
            return self.title
        wrapped_title = textwrap.wrap(self.title, constants.title_length - 4)
        return wrapped_title[0] + u' ...'

    def __unicode__(self):
        """Return the line entry for the PEP."""
        pep_info = {'type': self.type_abbr, 'number': str(self.number),
                'title': self.title_abbr, 'status': self.status_abbr,
                'authors': self.author_abbr}
        return constants.column_format % pep_info