PEP 0: Use authors' full names over surnames (#3295)

Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+aa-turner@users.noreply.github.com>
2023-09-01 10:11:33 -05:00 · 2023-09-01 10:11:33 -05:00 · 32a92bd50b
parent 3c5f6973cf
commit 32a92bd50b
8 changed files with 50 additions and 225 deletions
--- a/AUTHOR_OVERRIDES.csv
+++ b/AUTHOR_OVERRIDES.csv
@ -1,13 +0,0 @@
 Overridden Name,Surname First,Name Reference
 The Python core team and community,"The Python core team and community",python-dev
 Erik De Bonte,"De Bonte, Erik",De Bonte
 Greg Ewing,"Ewing, Gregory",Ewing
 Guido van Rossum,"van Rossum, Guido (GvR)",GvR
 Inada Naoki,"Inada, Naoki",Inada
 Jim Jewett,"Jewett, Jim J.",Jewett
 Just van Rossum,"van Rossum, Just (JvR)",JvR
 Martin v. Löwis,"von Löwis, Martin",von Löwis
 Nathaniel Smith,"Smith, Nathaniel J.",Smith
 P.J. Eby,"Eby, Phillip J.",Eby
 Germán Méndez Bravo,"Méndez Bravo, Germán",Méndez Bravo
 Amethyst Reese,"Reese, Amethyst",Amethyst
--- a/pep_sphinx_extensions/pep_zero_generator/author.py
+++ b/pep_sphinx_extensions/pep_zero_generator/author.py
@ -1,89 +0,0 @@
 from __future__ import annotations
 from typing import NamedTuple
 class _Name(NamedTuple):
    mononym: str = None
    forename: str = None
    surname: str = None
    suffix: str = None
 class Author(NamedTuple):
    """Represent PEP authors."""
    last_first: str  # The author's name in Surname, Forename, Suffix order.
    nick: str  # Author's nickname for PEP tables. Defaults to surname.
    email: str  # The author's email address.
 def parse_author_email(author_email_tuple: tuple[str, str], authors_overrides: dict[str, dict[str, str]]) -> Author:
    """Parse the name and email address of an author."""
    name, email = author_email_tuple
    _first_last = name.strip()
    email = email.lower()
    if _first_last in authors_overrides:
        name_dict = authors_overrides[_first_last]
        last_first = name_dict["Surname First"]
        nick = name_dict["Name Reference"]
        return Author(last_first, nick, email)
    name_parts = _parse_name(_first_last)
    if name_parts.mononym is not None:
        return Author(name_parts.mononym, name_parts.mononym, email)
    if name_parts.suffix:
        last_first = f"{name_parts.surname}, {name_parts.forename}, {name_parts.suffix}"
        return Author(last_first, name_parts.surname, email)
    last_first = f"{name_parts.surname}, {name_parts.forename}"
    return Author(last_first, name_parts.surname, email)
 def _parse_name(full_name: str) -> _Name:
    """Decompose a full name into parts.
    If a mononym (e.g, 'Aahz') then return the full name. If there are
    suffixes in the name (e.g. ', Jr.' or 'II'), then find and extract
    them. If there is a middle initial followed by a full stop, then
    combine the following words into a surname (e.g. N. Vander Weele). If
    there is a leading, lowercase portion to the last name (e.g. 'van' or
    'von') then include it in the surname.
    """
    possible_suffixes = {"Jr", "Jr.", "II", "III"}
    pre_suffix, _, raw_suffix = full_name.partition(",")
    name_parts = pre_suffix.strip().split(" ")
    num_parts = len(name_parts)
    suffix = raw_suffix.strip()
    if name_parts == [""]:
        raise ValueError("Name is empty!")
    elif num_parts == 1:
        return _Name(mononym=name_parts[0], suffix=suffix)
    elif num_parts == 2:
        return _Name(forename=name_parts[0].strip(), surname=name_parts[1], suffix=suffix)
    # handles rogue uncaught suffixes
    if name_parts[-1] in possible_suffixes:
        suffix = f"{name_parts.pop(-1)} {suffix}".strip()
    # handles von, van, v. etc.
    if name_parts[-2].islower():
        forename = " ".join(name_parts[:-2]).strip()
        surname = " ".join(name_parts[-2:])
        return _Name(forename=forename, surname=surname, suffix=suffix)
    # handles double surnames after a middle initial (e.g. N. Vander Weele)
    elif any(s.endswith(".") for s in name_parts):
        split_position = [i for i, x in enumerate(name_parts) if x.endswith(".")][-1] + 1
        forename = " ".join(name_parts[:split_position]).strip()
        surname = " ".join(name_parts[split_position:])
        return _Name(forename=forename, surname=surname, suffix=suffix)
    # default to using the last item as the surname
    else:
        forename = " ".join(name_parts[:-1]).strip()
        return _Name(forename=forename, surname=name_parts[-1], suffix=suffix)
--- a/pep_sphinx_extensions/pep_zero_generator/parser.py
+++ b/pep_sphinx_extensions/pep_zero_generator/parser.py
@ -2,13 +2,10 @@
 from __future__ import annotations
 import csv
 from email.parser import HeaderParser
 from pathlib import Path
 import re
 from typing import TYPE_CHECKING
 from pep_sphinx_extensions.pep_zero_generator.author import parse_author_email
 from pep_sphinx_extensions.pep_zero_generator.constants import ACTIVE_ALLOWED
 from pep_sphinx_extensions.pep_zero_generator.constants import HIDE_STATUS
 from pep_sphinx_extensions.pep_zero_generator.constants import SPECIAL_STATUSES
@ -19,17 +16,6 @@ from pep_sphinx_extensions.pep_zero_generator.constants import TYPE_STANDARDS
 from pep_sphinx_extensions.pep_zero_generator.constants import TYPE_VALUES
 from pep_sphinx_extensions.pep_zero_generator.errors import PEPError
 if TYPE_CHECKING:
    from pep_sphinx_extensions.pep_zero_generator.author import Author
 # AUTHOR_OVERRIDES.csv is an exception file for PEP 0 name parsing
 AUTHOR_OVERRIDES: dict[str, dict[str, str]] = {}
 with open("AUTHOR_OVERRIDES.csv", "r", encoding="utf-8") as f:
    for line in csv.DictReader(f):
        full_name = line.pop("Overridden Name")
        AUTHOR_OVERRIDES[full_name] = line
 class PEP:
    """Representation of PEPs.
@ -97,7 +83,7 @@ class PEP:
        self.status: str = status
        # Parse PEP authors
-        self.authors: list[Author] = _parse_authors(self, metadata["Author"], AUTHOR_OVERRIDES)
+        self.authors: dict[str, str] = _parse_authors(self, metadata["Author"])
        # Topic (for sub-indices)
        _topic = metadata.get("Topic", "").lower().split(",")
@ -144,7 +130,7 @@ class PEP:
            # a tooltip representing the type and status
            "shorthand": self.shorthand,
            # the author list as a comma-separated with only last names
-            "authors": ", ".join(author.nick for author in self.authors),
+            "authors": ", ".join(self.authors),
        }
    @property
@ -153,7 +139,7 @@ class PEP:
        return {
            "number": self.number,
            "title": self.title,
-            "authors": ", ".join(author.nick for author in self.authors),
+            "authors": ", ".join(self.authors),
            "discussions_to": self.discussions_to,
            "status": self.status,
            "type": self.pep_type,
@ -175,12 +161,12 @@ def _raise_pep_error(pep: PEP, msg: str, pep_num: bool = False) -> None:
    raise PEPError(msg, pep.filename)
-def _parse_authors(pep: PEP, author_header: str, authors_overrides: dict) -> list[Author]:
+def _parse_authors(pep: PEP, author_header: str) -> dict[str, str]:
    """Parse Author header line"""
-    authors_and_emails = _parse_author(author_header)
+    authors_to_emails = _parse_author(author_header)
-    if not authors_and_emails:
+    if not authors_to_emails:
        raise _raise_pep_error(pep, "no authors found", pep_num=True)
-    return [parse_author_email(author_tuple, authors_overrides) for author_tuple in authors_and_emails]
+    return authors_to_emails
 author_angled = re.compile(r"(?P<author>.+?) <(?P<email>.+?)>(,\s*)?")
@ -188,10 +174,10 @@ author_paren = re.compile(r"(?P<email>.+?) \((?P<author>.+?)\)(,\s*)?")
 author_simple = re.compile(r"(?P<author>[^,]+)(,\s*)?")
-def _parse_author(data: str) -> list[tuple[str, str]]:
+def _parse_author(data: str) -> dict[str, str]:
-    """Return a list of author names and emails."""
+    """Return a mapping of author names to emails."""
-    author_list = []
+    author_items = []
    for regex in (author_angled, author_paren, author_simple):
        for match in regex.finditer(data):
            # Watch out for suffixes like 'Jr.' when they are comma-separated
@ -200,16 +186,21 @@ def _parse_author(data: str) -> list[tuple[str, str]]:
            match_dict = match.groupdict()
            author = match_dict["author"]
            if not author.partition(" ")[1] and author.endswith("."):
-                prev_author = author_list.pop()
+                prev_author = author_items.pop()
                author = ", ".join([prev_author, author])
            if "email" not in match_dict:
                email = ""
            else:
                email = match_dict["email"]
-            author_list.append((author, email))
+
            author = author.strip()
            if not author:
                raise ValueError("Name is empty!")
            author_items.append((author, email.lower().strip()))
        # If authors were found then stop searching as only expect one
        # style of author citation.
-        if author_list:
+        if author_items:
            break
-    return author_list
+    return dict(author_items)
--- a/pep_sphinx_extensions/pep_zero_generator/writer.py
+++ b/pep_sphinx_extensions/pep_zero_generator/writer.py
@ -295,24 +295,24 @@ def _classify_peps(peps: list[PEP]) -> tuple[list[PEP], ...]:
 def _verify_email_addresses(peps: list[PEP]) -> dict[str, str]:
    authors_dict: dict[str, set[str]] = {}
    for pep in peps:
-        for author in pep.authors:
+        for author, email in pep.authors.items():
            # If this is the first time we have come across an author, add them.
-            if author.last_first not in authors_dict:
+            if author not in authors_dict:
-                authors_dict[author.last_first] = set()
+                authors_dict[author] = set()
            # If the new email is an empty string, move on.
-            if not author.email:
+            if not email:
                continue
            # If the email has not been seen, add it to the list.
-            authors_dict[author.last_first].add(author.email)
+            authors_dict[author].add(email)
    valid_authors_dict: dict[str, str] = {}
    too_many_emails: list[tuple[str, set[str]]] = []
-    for last_first, emails in authors_dict.items():
+    for name, emails in authors_dict.items():
        if len(emails) > 1:
-            too_many_emails.append((last_first, emails))
+            too_many_emails.append((name, emails))
        else:
-            valid_authors_dict[last_first] = next(iter(emails), "")
+            valid_authors_dict[name] = next(iter(emails), "")
    if too_many_emails:
        err_output = []
        for author, emails in too_many_emails:
--- a/pep_sphinx_extensions/tests/pep_zero_generator/test_author.py
+++ b/pep_sphinx_extensions/tests/pep_zero_generator/test_author.py
@ -1,69 +0,0 @@
 import pytest
 from pep_sphinx_extensions.pep_zero_generator import author
 from pep_sphinx_extensions.tests.utils import AUTHORS_OVERRIDES
@pytest.mark.parametrize(
    "test_input, expected",
    [
        (
            ("First Last", "first@example.com"),
            author.Author(
                last_first="Last, First", nick="Last", email="first@example.com"
            ),
        ),
        (
            ("Guido van Rossum", "guido@example.com"),
            author.Author(
                last_first="van Rossum, Guido (GvR)",
                nick="GvR",
                email="guido@example.com",
            ),
        ),
        (
            ("Hugo van Kemenade", "hugo@example.com"),
            author.Author(
                last_first="van Kemenade, Hugo",
                nick="van Kemenade",
                email="hugo@example.com",
            ),
        ),
        (
            ("Eric N. Vander Weele", "eric@example.com"),
            author.Author(
                last_first="Vander Weele, Eric N.",
                nick="Vander Weele",
                email="eric@example.com",
            ),
        ),
        (
            ("Mariatta", "mariatta@example.com"),
            author.Author(
                last_first="Mariatta", nick="Mariatta", email="mariatta@example.com"
            ),
        ),
        (
            ("First Last Jr.", "first@example.com"),
            author.Author(
                last_first="Last, First, Jr.", nick="Last", email="first@example.com"
            ),
        ),
        pytest.param(
            ("First Last", "first at example.com"),
            author.Author(
                last_first="Last, First", nick="Last", email="first@example.com"
            ),
            marks=pytest.mark.xfail,
        ),
    ],
 )
 def test_parse_author_email(test_input, expected):
    out = author.parse_author_email(test_input, AUTHORS_OVERRIDES)
    assert out == expected
 def test_parse_author_email_empty_name():
    with pytest.raises(ValueError, match="Name is empty!"):
        author.parse_author_email(("", "user@example.com"), AUTHORS_OVERRIDES)
--- a/pep_sphinx_extensions/tests/pep_zero_generator/test_parser.py
+++ b/pep_sphinx_extensions/tests/pep_zero_generator/test_parser.py
@ -3,7 +3,6 @@ from pathlib import Path
 import pytest
 from pep_sphinx_extensions.pep_zero_generator import parser
 from pep_sphinx_extensions.pep_zero_generator.author import Author
 from pep_sphinx_extensions.pep_zero_generator.constants import (
    STATUS_ACCEPTED,
    STATUS_ACTIVE,
@ -19,7 +18,6 @@ from pep_sphinx_extensions.pep_zero_generator.constants import (
    TYPE_STANDARDS,
 )
 from pep_sphinx_extensions.pep_zero_generator.errors import PEPError
 from pep_sphinx_extensions.tests.utils import AUTHORS_OVERRIDES
 def test_pep_repr():
@ -46,7 +44,7 @@ def test_pep_details(monkeypatch):
    pep8 = parser.PEP(Path("pep-0008.txt"))
    assert pep8.details == {
-        "authors": "GvR, Warsaw, Coghlan",
+        "authors": "Guido van Rossum, Barry Warsaw, Nick Coghlan",
        "number": 8,
        "shorthand": ":abbr:`PA (Process, Active)`",
        "title": "Style Guide for Python Code",
@ -58,21 +56,34 @@ def test_pep_details(monkeypatch):
    [
        (
            "First Last <user@example.com>",
-            [Author(last_first="Last, First", nick="Last", email="user@example.com")],
+            {"First Last": "user@example.com"},
        ),
        (
            "First Last <   user@example.com  >",
            {"First Last": "user@example.com"},
        ),
        (
            "First Last",
-            [Author(last_first="Last, First", nick="Last", email="")],
+            {"First Last": ""},
        ),
        (
            "user@example.com (First Last)",
-            [Author(last_first="Last, First", nick="Last", email="user@example.com")],
+            {"First Last": "user@example.com"},
        ),
        (
            "user@example.com (  First Last  )",
            {"First Last": "user@example.com"},
        ),
        pytest.param(
            "First Last <user at example.com>",
-            [Author(last_first="Last, First", nick="Last", email="user@example.com")],
+            {"First Last": "user@example.com"},
            marks=pytest.mark.xfail,
        ),
        pytest.param(
            " , First Last,",
            {"First Last": ""},
            marks=pytest.mark.xfail(raises=ValueError),
        ),
    ],
 )
 def test_parse_authors(test_input, expected):
@ -80,7 +91,7 @@ def test_parse_authors(test_input, expected):
    dummy_object = parser.PEP(Path("pep-0160.txt"))
    # Act
-    out = parser._parse_authors(dummy_object, test_input, AUTHORS_OVERRIDES)
+    out = parser._parse_authors(dummy_object, test_input)
    # Assert
    assert out == expected
@ -90,7 +101,7 @@ def test_parse_authors_invalid():
    pep = parser.PEP(Path("pep-0008.txt"))
    with pytest.raises(PEPError, match="no authors found"):
-        parser._parse_authors(pep, "", AUTHORS_OVERRIDES)
+        parser._parse_authors(pep, "")
@pytest.mark.parametrize(
--- a/pep_sphinx_extensions/tests/pep_zero_generator/test_writer.py
+++ b/pep_sphinx_extensions/tests/pep_zero_generator/test_writer.py
@ -35,13 +35,13 @@ def test_pep_zero_writer_emit_title():
        (
            "pep-9000.rst",
            {
-                "Fussyreverend, Francis": "one@example.com",
+                "Francis Fussyreverend": "one@example.com",
-                "Soulfulcommodore, Javier": "two@example.com",
+                "Javier Soulfulcommodore": "two@example.com",
            },
        ),
        (
            "pep-9001.rst",
-            {"Fussyreverend, Francis": "", "Soulfulcommodore, Javier": ""},
+            {"Francis Fussyreverend": "", "Javier Soulfulcommodore": ""},
        ),
    ],
 )
--- a/pep_sphinx_extensions/tests/utils.py
+++ b/pep_sphinx_extensions/tests/utils.py
@ -1,6 +0,0 @@
 AUTHORS_OVERRIDES = {
    "Guido van Rossum": {
        "Surname First": "van Rossum, Guido (GvR)",
        "Name Reference": "GvR",
    },
 }