PEP 0: Use authors' full names over surnames (#3295)

Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Adam Turner <9087854+aa-turner@users.noreply.github.com>
2023-09-01 10:11:33 -05:00 · 2023-09-01 10:11:33 -05:00 · 32a92bd50b
parent 3c5f6973cf
commit 32a92bd50b
8 changed files with 50 additions and 225 deletions
--- a/AUTHOR_OVERRIDES.csv
+++ b/AUTHOR_OVERRIDES.csv
@ -1,13 +0,0 @@
-Overridden Name,Surname First,Name Reference
-The Python core team and community,"The Python core team and community",python-dev
-Erik De Bonte,"De Bonte, Erik",De Bonte
-Greg Ewing,"Ewing, Gregory",Ewing
-Guido van Rossum,"van Rossum, Guido (GvR)",GvR
-Inada Naoki,"Inada, Naoki",Inada
-Jim Jewett,"Jewett, Jim J.",Jewett
-Just van Rossum,"van Rossum, Just (JvR)",JvR
-Martin v. Löwis,"von Löwis, Martin",von Löwis
-Nathaniel Smith,"Smith, Nathaniel J.",Smith
-P.J. Eby,"Eby, Phillip J.",Eby
-Germán Méndez Bravo,"Méndez Bravo, Germán",Méndez Bravo
-Amethyst Reese,"Reese, Amethyst",Amethyst
--- a/pep_sphinx_extensions/pep_zero_generator/author.py
+++ b/pep_sphinx_extensions/pep_zero_generator/author.py
@ -1,89 +0,0 @@
-from __future__ import annotations
-
-from typing import NamedTuple
-
-
-class _Name(NamedTuple):
-    mononym: str = None
-    forename: str = None
-    surname: str = None
-    suffix: str = None
-
-
-class Author(NamedTuple):
-    """Represent PEP authors."""
-    last_first: str  # The author's name in Surname, Forename, Suffix order.
-    nick: str  # Author's nickname for PEP tables. Defaults to surname.
-    email: str  # The author's email address.
-
-
-def parse_author_email(author_email_tuple: tuple[str, str], authors_overrides: dict[str, dict[str, str]]) -> Author:
-    """Parse the name and email address of an author."""
-    name, email = author_email_tuple
-    _first_last = name.strip()
-    email = email.lower()
-
-    if _first_last in authors_overrides:
-        name_dict = authors_overrides[_first_last]
-        last_first = name_dict["Surname First"]
-        nick = name_dict["Name Reference"]
-        return Author(last_first, nick, email)
-
-    name_parts = _parse_name(_first_last)
-    if name_parts.mononym is not None:
-        return Author(name_parts.mononym, name_parts.mononym, email)
-
-    if name_parts.suffix:
-        last_first = f"{name_parts.surname}, {name_parts.forename}, {name_parts.suffix}"
-        return Author(last_first, name_parts.surname, email)
-
-    last_first = f"{name_parts.surname}, {name_parts.forename}"
-    return Author(last_first, name_parts.surname, email)
-
-
-def _parse_name(full_name: str) -> _Name:
-    """Decompose a full name into parts.
-
-    If a mononym (e.g, 'Aahz') then return the full name. If there are
-    suffixes in the name (e.g. ', Jr.' or 'II'), then find and extract
-    them. If there is a middle initial followed by a full stop, then
-    combine the following words into a surname (e.g. N. Vander Weele). If
-    there is a leading, lowercase portion to the last name (e.g. 'van' or
-    'von') then include it in the surname.
-
-    """
-    possible_suffixes = {"Jr", "Jr.", "II", "III"}
-
-    pre_suffix, _, raw_suffix = full_name.partition(",")
-    name_parts = pre_suffix.strip().split(" ")
-    num_parts = len(name_parts)
-    suffix = raw_suffix.strip()
-
-    if name_parts == [""]:
-        raise ValueError("Name is empty!")
-    elif num_parts == 1:
-        return _Name(mononym=name_parts[0], suffix=suffix)
-    elif num_parts == 2:
-        return _Name(forename=name_parts[0].strip(), surname=name_parts[1], suffix=suffix)
-
-    # handles rogue uncaught suffixes
-    if name_parts[-1] in possible_suffixes:
-        suffix = f"{name_parts.pop(-1)} {suffix}".strip()
-
-    # handles von, van, v. etc.
-    if name_parts[-2].islower():
-        forename = " ".join(name_parts[:-2]).strip()
-        surname = " ".join(name_parts[-2:])
-        return _Name(forename=forename, surname=surname, suffix=suffix)
-
-    # handles double surnames after a middle initial (e.g. N. Vander Weele)
-    elif any(s.endswith(".") for s in name_parts):
-        split_position = [i for i, x in enumerate(name_parts) if x.endswith(".")][-1] + 1
-        forename = " ".join(name_parts[:split_position]).strip()
-        surname = " ".join(name_parts[split_position:])
-        return _Name(forename=forename, surname=surname, suffix=suffix)
-
-    # default to using the last item as the surname
-    else:
-        forename = " ".join(name_parts[:-1]).strip()
-        return _Name(forename=forename, surname=name_parts[-1], suffix=suffix)
--- a/pep_sphinx_extensions/pep_zero_generator/parser.py
+++ b/pep_sphinx_extensions/pep_zero_generator/parser.py
@ -2,13 +2,10 @@

 from __future__ import annotations

-import csv
 from email.parser import HeaderParser
 from pathlib import Path
 import re
-from typing import TYPE_CHECKING

-from pep_sphinx_extensions.pep_zero_generator.author import parse_author_email
 from pep_sphinx_extensions.pep_zero_generator.constants import ACTIVE_ALLOWED
 from pep_sphinx_extensions.pep_zero_generator.constants import HIDE_STATUS
 from pep_sphinx_extensions.pep_zero_generator.constants import SPECIAL_STATUSES
@ -19,17 +16,6 @@ from pep_sphinx_extensions.pep_zero_generator.constants import TYPE_STANDARDS
 from pep_sphinx_extensions.pep_zero_generator.constants import TYPE_VALUES
 from pep_sphinx_extensions.pep_zero_generator.errors import PEPError

-if TYPE_CHECKING:
-    from pep_sphinx_extensions.pep_zero_generator.author import Author
-
-
-# AUTHOR_OVERRIDES.csv is an exception file for PEP 0 name parsing
-AUTHOR_OVERRIDES: dict[str, dict[str, str]] = {}
-with open("AUTHOR_OVERRIDES.csv", "r", encoding="utf-8") as f:
-    for line in csv.DictReader(f):
-        full_name = line.pop("Overridden Name")
-        AUTHOR_OVERRIDES[full_name] = line
-

 class PEP:
    """Representation of PEPs.
@ -97,7 +83,7 @@ class PEP:
        self.status: str = status

        # Parse PEP authors
-        self.authors: list[Author] = _parse_authors(self, metadata["Author"], AUTHOR_OVERRIDES)
+        self.authors: dict[str, str] = _parse_authors(self, metadata["Author"])

        # Topic (for sub-indices)
        _topic = metadata.get("Topic", "").lower().split(",")
@ -144,7 +130,7 @@ class PEP:
            # a tooltip representing the type and status
            "shorthand": self.shorthand,
            # the author list as a comma-separated with only last names
-            "authors": ", ".join(author.nick for author in self.authors),
+            "authors": ", ".join(self.authors),
        }

    @property
@ -153,7 +139,7 @@ class PEP:
        return {
            "number": self.number,
            "title": self.title,
-            "authors": ", ".join(author.nick for author in self.authors),
+            "authors": ", ".join(self.authors),
            "discussions_to": self.discussions_to,
            "status": self.status,
            "type": self.pep_type,
@ -175,12 +161,12 @@ def _raise_pep_error(pep: PEP, msg: str, pep_num: bool = False) -> None:
    raise PEPError(msg, pep.filename)


-def _parse_authors(pep: PEP, author_header: str, authors_overrides: dict) -> list[Author]:
+def _parse_authors(pep: PEP, author_header: str) -> dict[str, str]:
    """Parse Author header line"""
-    authors_and_emails = _parse_author(author_header)
-    if not authors_and_emails:
+    authors_to_emails = _parse_author(author_header)
+    if not authors_to_emails:
        raise _raise_pep_error(pep, "no authors found", pep_num=True)
-    return [parse_author_email(author_tuple, authors_overrides) for author_tuple in authors_and_emails]
+    return authors_to_emails


 author_angled = re.compile(r"(?P<author>.+?) <(?P<email>.+?)>(,\s*)?")
@ -188,10 +174,10 @@ author_paren = re.compile(r"(?P<email>.+?) \((?P<author>.+?)\)(,\s*)?")
 author_simple = re.compile(r"(?P<author>[^,]+)(,\s*)?")


-def _parse_author(data: str) -> list[tuple[str, str]]:
-    """Return a list of author names and emails."""
+def _parse_author(data: str) -> dict[str, str]:
+    """Return a mapping of author names to emails."""

-    author_list = []
+    author_items = []
    for regex in (author_angled, author_paren, author_simple):
        for match in regex.finditer(data):
            # Watch out for suffixes like 'Jr.' when they are comma-separated
@ -200,16 +186,21 @@ def _parse_author(data: str) -> list[tuple[str, str]]:
            match_dict = match.groupdict()
            author = match_dict["author"]
            if not author.partition(" ")[1] and author.endswith("."):
-                prev_author = author_list.pop()
+                prev_author = author_items.pop()
                author = ", ".join([prev_author, author])
            if "email" not in match_dict:
                email = ""
            else:
                email = match_dict["email"]
-            author_list.append((author, email))
+
+            author = author.strip()
+            if not author:
+                raise ValueError("Name is empty!")
+
+            author_items.append((author, email.lower().strip()))

        # If authors were found then stop searching as only expect one
        # style of author citation.
-        if author_list:
+        if author_items:
            break
-    return author_list
+    return dict(author_items)
--- a/pep_sphinx_extensions/pep_zero_generator/writer.py
+++ b/pep_sphinx_extensions/pep_zero_generator/writer.py
@ -295,24 +295,24 @@ def _classify_peps(peps: list[PEP]) -> tuple[list[PEP], ...]:
 def _verify_email_addresses(peps: list[PEP]) -> dict[str, str]:
    authors_dict: dict[str, set[str]] = {}
    for pep in peps:
-        for author in pep.authors:
+        for author, email in pep.authors.items():
            # If this is the first time we have come across an author, add them.
-            if author.last_first not in authors_dict:
-                authors_dict[author.last_first] = set()
+            if author not in authors_dict:
+                authors_dict[author] = set()

            # If the new email is an empty string, move on.
-            if not author.email:
+            if not email:
                continue
            # If the email has not been seen, add it to the list.
-            authors_dict[author.last_first].add(author.email)
+            authors_dict[author].add(email)

    valid_authors_dict: dict[str, str] = {}
    too_many_emails: list[tuple[str, set[str]]] = []
-    for last_first, emails in authors_dict.items():
+    for name, emails in authors_dict.items():
        if len(emails) > 1:
-            too_many_emails.append((last_first, emails))
+            too_many_emails.append((name, emails))
        else:
-            valid_authors_dict[last_first] = next(iter(emails), "")
+            valid_authors_dict[name] = next(iter(emails), "")
    if too_many_emails:
        err_output = []
        for author, emails in too_many_emails:
--- a/pep_sphinx_extensions/tests/pep_zero_generator/test_author.py
+++ b/pep_sphinx_extensions/tests/pep_zero_generator/test_author.py
@ -1,69 +0,0 @@
-import pytest
-
-from pep_sphinx_extensions.pep_zero_generator import author
-from pep_sphinx_extensions.tests.utils import AUTHORS_OVERRIDES
-
-
-@pytest.mark.parametrize(
-    "test_input, expected",
-    [
-        (
-            ("First Last", "first@example.com"),
-            author.Author(
-                last_first="Last, First", nick="Last", email="first@example.com"
-            ),
-        ),
-        (
-            ("Guido van Rossum", "guido@example.com"),
-            author.Author(
-                last_first="van Rossum, Guido (GvR)",
-                nick="GvR",
-                email="guido@example.com",
-            ),
-        ),
-        (
-            ("Hugo van Kemenade", "hugo@example.com"),
-            author.Author(
-                last_first="van Kemenade, Hugo",
-                nick="van Kemenade",
-                email="hugo@example.com",
-            ),
-        ),
-        (
-            ("Eric N. Vander Weele", "eric@example.com"),
-            author.Author(
-                last_first="Vander Weele, Eric N.",
-                nick="Vander Weele",
-                email="eric@example.com",
-            ),
-        ),
-        (
-            ("Mariatta", "mariatta@example.com"),
-            author.Author(
-                last_first="Mariatta", nick="Mariatta", email="mariatta@example.com"
-            ),
-        ),
-        (
-            ("First Last Jr.", "first@example.com"),
-            author.Author(
-                last_first="Last, First, Jr.", nick="Last", email="first@example.com"
-            ),
-        ),
-        pytest.param(
-            ("First Last", "first at example.com"),
-            author.Author(
-                last_first="Last, First", nick="Last", email="first@example.com"
-            ),
-            marks=pytest.mark.xfail,
-        ),
-    ],
-)
-def test_parse_author_email(test_input, expected):
-    out = author.parse_author_email(test_input, AUTHORS_OVERRIDES)
-
-    assert out == expected
-
-
-def test_parse_author_email_empty_name():
-    with pytest.raises(ValueError, match="Name is empty!"):
-        author.parse_author_email(("", "user@example.com"), AUTHORS_OVERRIDES)
--- a/pep_sphinx_extensions/tests/pep_zero_generator/test_parser.py
+++ b/pep_sphinx_extensions/tests/pep_zero_generator/test_parser.py
@ -3,7 +3,6 @@ from pathlib import Path
 import pytest

 from pep_sphinx_extensions.pep_zero_generator import parser
-from pep_sphinx_extensions.pep_zero_generator.author import Author
 from pep_sphinx_extensions.pep_zero_generator.constants import (
    STATUS_ACCEPTED,
    STATUS_ACTIVE,
@ -19,7 +18,6 @@ from pep_sphinx_extensions.pep_zero_generator.constants import (
    TYPE_STANDARDS,
 )
 from pep_sphinx_extensions.pep_zero_generator.errors import PEPError
-from pep_sphinx_extensions.tests.utils import AUTHORS_OVERRIDES


 def test_pep_repr():
@ -46,7 +44,7 @@ def test_pep_details(monkeypatch):
    pep8 = parser.PEP(Path("pep-0008.txt"))

    assert pep8.details == {
-        "authors": "GvR, Warsaw, Coghlan",
+        "authors": "Guido van Rossum, Barry Warsaw, Nick Coghlan",
        "number": 8,
        "shorthand": ":abbr:`PA (Process, Active)`",
        "title": "Style Guide for Python Code",
@ -58,21 +56,34 @@ def test_pep_details(monkeypatch):
    [
        (
            "First Last <user@example.com>",
-            [Author(last_first="Last, First", nick="Last", email="user@example.com")],
+            {"First Last": "user@example.com"},
+        ),
+        (
+            "First Last <   user@example.com  >",
+            {"First Last": "user@example.com"},
        ),
        (
            "First Last",
-            [Author(last_first="Last, First", nick="Last", email="")],
+            {"First Last": ""},
        ),
        (
            "user@example.com (First Last)",
-            [Author(last_first="Last, First", nick="Last", email="user@example.com")],
+            {"First Last": "user@example.com"},
+        ),
+        (
+            "user@example.com (  First Last  )",
+            {"First Last": "user@example.com"},
        ),
        pytest.param(
            "First Last <user at example.com>",
-            [Author(last_first="Last, First", nick="Last", email="user@example.com")],
+            {"First Last": "user@example.com"},
            marks=pytest.mark.xfail,
        ),
+        pytest.param(
+            " , First Last,",
+            {"First Last": ""},
+            marks=pytest.mark.xfail(raises=ValueError),
+        ),
    ],
 )
 def test_parse_authors(test_input, expected):
@ -80,7 +91,7 @@ def test_parse_authors(test_input, expected):
    dummy_object = parser.PEP(Path("pep-0160.txt"))

    # Act
-    out = parser._parse_authors(dummy_object, test_input, AUTHORS_OVERRIDES)
+    out = parser._parse_authors(dummy_object, test_input)

    # Assert
    assert out == expected
@ -90,7 +101,7 @@ def test_parse_authors_invalid():
    pep = parser.PEP(Path("pep-0008.txt"))

    with pytest.raises(PEPError, match="no authors found"):
-        parser._parse_authors(pep, "", AUTHORS_OVERRIDES)
+        parser._parse_authors(pep, "")


@pytest.mark.parametrize(
--- a/pep_sphinx_extensions/tests/pep_zero_generator/test_writer.py
+++ b/pep_sphinx_extensions/tests/pep_zero_generator/test_writer.py
@ -35,13 +35,13 @@ def test_pep_zero_writer_emit_title():
        (
            "pep-9000.rst",
            {
-                "Fussyreverend, Francis": "one@example.com",
-                "Soulfulcommodore, Javier": "two@example.com",
+                "Francis Fussyreverend": "one@example.com",
+                "Javier Soulfulcommodore": "two@example.com",
            },
        ),
        (
            "pep-9001.rst",
-            {"Fussyreverend, Francis": "", "Soulfulcommodore, Javier": ""},
+            {"Francis Fussyreverend": "", "Javier Soulfulcommodore": ""},
        ),
    ],
 )
--- a/pep_sphinx_extensions/tests/utils.py
+++ b/pep_sphinx_extensions/tests/utils.py
@ -1,6 +0,0 @@
-AUTHORS_OVERRIDES = {
-    "Guido van Rossum": {
-        "Surname First": "van Rossum, Guido (GvR)",
-        "Name Reference": "GvR",
-    },
-}