PEP 0: Use authors' full names over surnames (#3295)

Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com>
Co-authored-by: Adam Turner <9087854+aa-turner@users.noreply.github.com>
This commit is contained in:
Josh Cannon 2023-09-01 10:11:33 -05:00 committed by GitHub
parent 3c5f6973cf
commit 32a92bd50b
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
8 changed files with 50 additions and 225 deletions

View File

@ -1,13 +0,0 @@
Overridden Name,Surname First,Name Reference
The Python core team and community,"The Python core team and community",python-dev
Erik De Bonte,"De Bonte, Erik",De Bonte
Greg Ewing,"Ewing, Gregory",Ewing
Guido van Rossum,"van Rossum, Guido (GvR)",GvR
Inada Naoki,"Inada, Naoki",Inada
Jim Jewett,"Jewett, Jim J.",Jewett
Just van Rossum,"van Rossum, Just (JvR)",JvR
Martin v. Löwis,"von Löwis, Martin",von Löwis
Nathaniel Smith,"Smith, Nathaniel J.",Smith
P.J. Eby,"Eby, Phillip J.",Eby
Germán Méndez Bravo,"Méndez Bravo, Germán",Méndez Bravo
Amethyst Reese,"Reese, Amethyst",Amethyst
1 Overridden Name Surname First Name Reference
2 The Python core team and community The Python core team and community python-dev
3 Erik De Bonte De Bonte, Erik De Bonte
4 Greg Ewing Ewing, Gregory Ewing
5 Guido van Rossum van Rossum, Guido (GvR) GvR
6 Inada Naoki Inada, Naoki Inada
7 Jim Jewett Jewett, Jim J. Jewett
8 Just van Rossum van Rossum, Just (JvR) JvR
9 Martin v. Löwis von Löwis, Martin von Löwis
10 Nathaniel Smith Smith, Nathaniel J. Smith
11 P.J. Eby Eby, Phillip J. Eby
12 Germán Méndez Bravo Méndez Bravo, Germán Méndez Bravo
13 Amethyst Reese Reese, Amethyst Amethyst

View File

@ -1,89 +0,0 @@
from __future__ import annotations
from typing import NamedTuple
class _Name(NamedTuple):
mononym: str = None
forename: str = None
surname: str = None
suffix: str = None
class Author(NamedTuple):
"""Represent PEP authors."""
last_first: str # The author's name in Surname, Forename, Suffix order.
nick: str # Author's nickname for PEP tables. Defaults to surname.
email: str # The author's email address.
def parse_author_email(author_email_tuple: tuple[str, str], authors_overrides: dict[str, dict[str, str]]) -> Author:
"""Parse the name and email address of an author."""
name, email = author_email_tuple
_first_last = name.strip()
email = email.lower()
if _first_last in authors_overrides:
name_dict = authors_overrides[_first_last]
last_first = name_dict["Surname First"]
nick = name_dict["Name Reference"]
return Author(last_first, nick, email)
name_parts = _parse_name(_first_last)
if name_parts.mononym is not None:
return Author(name_parts.mononym, name_parts.mononym, email)
if name_parts.suffix:
last_first = f"{name_parts.surname}, {name_parts.forename}, {name_parts.suffix}"
return Author(last_first, name_parts.surname, email)
last_first = f"{name_parts.surname}, {name_parts.forename}"
return Author(last_first, name_parts.surname, email)
def _parse_name(full_name: str) -> _Name:
"""Decompose a full name into parts.
If a mononym (e.g, 'Aahz') then return the full name. If there are
suffixes in the name (e.g. ', Jr.' or 'II'), then find and extract
them. If there is a middle initial followed by a full stop, then
combine the following words into a surname (e.g. N. Vander Weele). If
there is a leading, lowercase portion to the last name (e.g. 'van' or
'von') then include it in the surname.
"""
possible_suffixes = {"Jr", "Jr.", "II", "III"}
pre_suffix, _, raw_suffix = full_name.partition(",")
name_parts = pre_suffix.strip().split(" ")
num_parts = len(name_parts)
suffix = raw_suffix.strip()
if name_parts == [""]:
raise ValueError("Name is empty!")
elif num_parts == 1:
return _Name(mononym=name_parts[0], suffix=suffix)
elif num_parts == 2:
return _Name(forename=name_parts[0].strip(), surname=name_parts[1], suffix=suffix)
# handles rogue uncaught suffixes
if name_parts[-1] in possible_suffixes:
suffix = f"{name_parts.pop(-1)} {suffix}".strip()
# handles von, van, v. etc.
if name_parts[-2].islower():
forename = " ".join(name_parts[:-2]).strip()
surname = " ".join(name_parts[-2:])
return _Name(forename=forename, surname=surname, suffix=suffix)
# handles double surnames after a middle initial (e.g. N. Vander Weele)
elif any(s.endswith(".") for s in name_parts):
split_position = [i for i, x in enumerate(name_parts) if x.endswith(".")][-1] + 1
forename = " ".join(name_parts[:split_position]).strip()
surname = " ".join(name_parts[split_position:])
return _Name(forename=forename, surname=surname, suffix=suffix)
# default to using the last item as the surname
else:
forename = " ".join(name_parts[:-1]).strip()
return _Name(forename=forename, surname=name_parts[-1], suffix=suffix)

View File

@ -2,13 +2,10 @@
from __future__ import annotations
import csv
from email.parser import HeaderParser
from pathlib import Path
import re
from typing import TYPE_CHECKING
from pep_sphinx_extensions.pep_zero_generator.author import parse_author_email
from pep_sphinx_extensions.pep_zero_generator.constants import ACTIVE_ALLOWED
from pep_sphinx_extensions.pep_zero_generator.constants import HIDE_STATUS
from pep_sphinx_extensions.pep_zero_generator.constants import SPECIAL_STATUSES
@ -19,17 +16,6 @@ from pep_sphinx_extensions.pep_zero_generator.constants import TYPE_STANDARDS
from pep_sphinx_extensions.pep_zero_generator.constants import TYPE_VALUES
from pep_sphinx_extensions.pep_zero_generator.errors import PEPError
if TYPE_CHECKING:
from pep_sphinx_extensions.pep_zero_generator.author import Author
# AUTHOR_OVERRIDES.csv is an exception file for PEP 0 name parsing
AUTHOR_OVERRIDES: dict[str, dict[str, str]] = {}
with open("AUTHOR_OVERRIDES.csv", "r", encoding="utf-8") as f:
for line in csv.DictReader(f):
full_name = line.pop("Overridden Name")
AUTHOR_OVERRIDES[full_name] = line
class PEP:
"""Representation of PEPs.
@ -97,7 +83,7 @@ class PEP:
self.status: str = status
# Parse PEP authors
self.authors: list[Author] = _parse_authors(self, metadata["Author"], AUTHOR_OVERRIDES)
self.authors: dict[str, str] = _parse_authors(self, metadata["Author"])
# Topic (for sub-indices)
_topic = metadata.get("Topic", "").lower().split(",")
@ -144,7 +130,7 @@ class PEP:
# a tooltip representing the type and status
"shorthand": self.shorthand,
# the author list as a comma-separated with only last names
"authors": ", ".join(author.nick for author in self.authors),
"authors": ", ".join(self.authors),
}
@property
@ -153,7 +139,7 @@ class PEP:
return {
"number": self.number,
"title": self.title,
"authors": ", ".join(author.nick for author in self.authors),
"authors": ", ".join(self.authors),
"discussions_to": self.discussions_to,
"status": self.status,
"type": self.pep_type,
@ -175,12 +161,12 @@ def _raise_pep_error(pep: PEP, msg: str, pep_num: bool = False) -> None:
raise PEPError(msg, pep.filename)
def _parse_authors(pep: PEP, author_header: str, authors_overrides: dict) -> list[Author]:
def _parse_authors(pep: PEP, author_header: str) -> dict[str, str]:
"""Parse Author header line"""
authors_and_emails = _parse_author(author_header)
if not authors_and_emails:
authors_to_emails = _parse_author(author_header)
if not authors_to_emails:
raise _raise_pep_error(pep, "no authors found", pep_num=True)
return [parse_author_email(author_tuple, authors_overrides) for author_tuple in authors_and_emails]
return authors_to_emails
author_angled = re.compile(r"(?P<author>.+?) <(?P<email>.+?)>(,\s*)?")
@ -188,10 +174,10 @@ author_paren = re.compile(r"(?P<email>.+?) \((?P<author>.+?)\)(,\s*)?")
author_simple = re.compile(r"(?P<author>[^,]+)(,\s*)?")
def _parse_author(data: str) -> list[tuple[str, str]]:
"""Return a list of author names and emails."""
def _parse_author(data: str) -> dict[str, str]:
"""Return a mapping of author names to emails."""
author_list = []
author_items = []
for regex in (author_angled, author_paren, author_simple):
for match in regex.finditer(data):
# Watch out for suffixes like 'Jr.' when they are comma-separated
@ -200,16 +186,21 @@ def _parse_author(data: str) -> list[tuple[str, str]]:
match_dict = match.groupdict()
author = match_dict["author"]
if not author.partition(" ")[1] and author.endswith("."):
prev_author = author_list.pop()
prev_author = author_items.pop()
author = ", ".join([prev_author, author])
if "email" not in match_dict:
email = ""
else:
email = match_dict["email"]
author_list.append((author, email))
author = author.strip()
if not author:
raise ValueError("Name is empty!")
author_items.append((author, email.lower().strip()))
# If authors were found then stop searching as only expect one
# style of author citation.
if author_list:
if author_items:
break
return author_list
return dict(author_items)

View File

@ -295,24 +295,24 @@ def _classify_peps(peps: list[PEP]) -> tuple[list[PEP], ...]:
def _verify_email_addresses(peps: list[PEP]) -> dict[str, str]:
authors_dict: dict[str, set[str]] = {}
for pep in peps:
for author in pep.authors:
for author, email in pep.authors.items():
# If this is the first time we have come across an author, add them.
if author.last_first not in authors_dict:
authors_dict[author.last_first] = set()
if author not in authors_dict:
authors_dict[author] = set()
# If the new email is an empty string, move on.
if not author.email:
if not email:
continue
# If the email has not been seen, add it to the list.
authors_dict[author.last_first].add(author.email)
authors_dict[author].add(email)
valid_authors_dict: dict[str, str] = {}
too_many_emails: list[tuple[str, set[str]]] = []
for last_first, emails in authors_dict.items():
for name, emails in authors_dict.items():
if len(emails) > 1:
too_many_emails.append((last_first, emails))
too_many_emails.append((name, emails))
else:
valid_authors_dict[last_first] = next(iter(emails), "")
valid_authors_dict[name] = next(iter(emails), "")
if too_many_emails:
err_output = []
for author, emails in too_many_emails:

View File

@ -1,69 +0,0 @@
import pytest
from pep_sphinx_extensions.pep_zero_generator import author
from pep_sphinx_extensions.tests.utils import AUTHORS_OVERRIDES
@pytest.mark.parametrize(
"test_input, expected",
[
(
("First Last", "first@example.com"),
author.Author(
last_first="Last, First", nick="Last", email="first@example.com"
),
),
(
("Guido van Rossum", "guido@example.com"),
author.Author(
last_first="van Rossum, Guido (GvR)",
nick="GvR",
email="guido@example.com",
),
),
(
("Hugo van Kemenade", "hugo@example.com"),
author.Author(
last_first="van Kemenade, Hugo",
nick="van Kemenade",
email="hugo@example.com",
),
),
(
("Eric N. Vander Weele", "eric@example.com"),
author.Author(
last_first="Vander Weele, Eric N.",
nick="Vander Weele",
email="eric@example.com",
),
),
(
("Mariatta", "mariatta@example.com"),
author.Author(
last_first="Mariatta", nick="Mariatta", email="mariatta@example.com"
),
),
(
("First Last Jr.", "first@example.com"),
author.Author(
last_first="Last, First, Jr.", nick="Last", email="first@example.com"
),
),
pytest.param(
("First Last", "first at example.com"),
author.Author(
last_first="Last, First", nick="Last", email="first@example.com"
),
marks=pytest.mark.xfail,
),
],
)
def test_parse_author_email(test_input, expected):
out = author.parse_author_email(test_input, AUTHORS_OVERRIDES)
assert out == expected
def test_parse_author_email_empty_name():
with pytest.raises(ValueError, match="Name is empty!"):
author.parse_author_email(("", "user@example.com"), AUTHORS_OVERRIDES)

View File

@ -3,7 +3,6 @@ from pathlib import Path
import pytest
from pep_sphinx_extensions.pep_zero_generator import parser
from pep_sphinx_extensions.pep_zero_generator.author import Author
from pep_sphinx_extensions.pep_zero_generator.constants import (
STATUS_ACCEPTED,
STATUS_ACTIVE,
@ -19,7 +18,6 @@ from pep_sphinx_extensions.pep_zero_generator.constants import (
TYPE_STANDARDS,
)
from pep_sphinx_extensions.pep_zero_generator.errors import PEPError
from pep_sphinx_extensions.tests.utils import AUTHORS_OVERRIDES
def test_pep_repr():
@ -46,7 +44,7 @@ def test_pep_details(monkeypatch):
pep8 = parser.PEP(Path("pep-0008.txt"))
assert pep8.details == {
"authors": "GvR, Warsaw, Coghlan",
"authors": "Guido van Rossum, Barry Warsaw, Nick Coghlan",
"number": 8,
"shorthand": ":abbr:`PA (Process, Active)`",
"title": "Style Guide for Python Code",
@ -58,21 +56,34 @@ def test_pep_details(monkeypatch):
[
(
"First Last <user@example.com>",
[Author(last_first="Last, First", nick="Last", email="user@example.com")],
{"First Last": "user@example.com"},
),
(
"First Last < user@example.com >",
{"First Last": "user@example.com"},
),
(
"First Last",
[Author(last_first="Last, First", nick="Last", email="")],
{"First Last": ""},
),
(
"user@example.com (First Last)",
[Author(last_first="Last, First", nick="Last", email="user@example.com")],
{"First Last": "user@example.com"},
),
(
"user@example.com ( First Last )",
{"First Last": "user@example.com"},
),
pytest.param(
"First Last <user at example.com>",
[Author(last_first="Last, First", nick="Last", email="user@example.com")],
{"First Last": "user@example.com"},
marks=pytest.mark.xfail,
),
pytest.param(
" , First Last,",
{"First Last": ""},
marks=pytest.mark.xfail(raises=ValueError),
),
],
)
def test_parse_authors(test_input, expected):
@ -80,7 +91,7 @@ def test_parse_authors(test_input, expected):
dummy_object = parser.PEP(Path("pep-0160.txt"))
# Act
out = parser._parse_authors(dummy_object, test_input, AUTHORS_OVERRIDES)
out = parser._parse_authors(dummy_object, test_input)
# Assert
assert out == expected
@ -90,7 +101,7 @@ def test_parse_authors_invalid():
pep = parser.PEP(Path("pep-0008.txt"))
with pytest.raises(PEPError, match="no authors found"):
parser._parse_authors(pep, "", AUTHORS_OVERRIDES)
parser._parse_authors(pep, "")
@pytest.mark.parametrize(

View File

@ -35,13 +35,13 @@ def test_pep_zero_writer_emit_title():
(
"pep-9000.rst",
{
"Fussyreverend, Francis": "one@example.com",
"Soulfulcommodore, Javier": "two@example.com",
"Francis Fussyreverend": "one@example.com",
"Javier Soulfulcommodore": "two@example.com",
},
),
(
"pep-9001.rst",
{"Fussyreverend, Francis": "", "Soulfulcommodore, Javier": ""},
{"Francis Fussyreverend": "", "Javier Soulfulcommodore": ""},
),
],
)

View File

@ -1,6 +0,0 @@
AUTHORS_OVERRIDES = {
"Guido van Rossum": {
"Surname First": "van Rossum, Guido (GvR)",
"Name Reference": "GvR",
},
}