From 3550731898aa878bb0016258cdd4ccc3f1da51f7 Mon Sep 17 00:00:00 2001
From: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
Date: Fri, 1 Sep 2023 16:19:19 +0100
Subject: [PATCH] PEP 0: Refactoring (#3340)

---
 .../pep_zero_generator/parser.py              | 73 ++++++++-----------
 .../pep_zero_generator/writer.py              | 24 +++---
 .../tests/pep_zero_generator/test_parser.py   | 31 ++------
 3 files changed, 50 insertions(+), 78 deletions(-)
diff --git a/pep_sphinx_extensions/pep_zero_generator/parser.py b/pep_sphinx_extensions/pep_zero_generator/parser.py
index 2ce802a13..96b4e9aba 100644
--- a/pep_sphinx_extensions/pep_zero_generator/parser.py
+++ b/pep_sphinx_extensions/pep_zero_generator/parser.py
@@ -2,6 +2,7 @@
 
 from __future__ import annotations
 
+import dataclasses
 from email.parser import HeaderParser
 from pathlib import Path
 import re
@@ -17,6 +18,13 @@ from pep_sphinx_extensions.pep_zero_generator.constants import TYPE_VALUES
 from pep_sphinx_extensions.pep_zero_generator.errors import PEPError
 
 
+@dataclasses.dataclass(order=True, frozen=True)
+class _Author:
+    """Represent PEP authors."""
+    full_name: str  # The author's name.
+    email: str  # The author's email address.
+
+
 class PEP:
     """Representation of PEPs.
 
@@ -83,7 +91,9 @@ class PEP:
         self.status: str = status
 
         # Parse PEP authors
-        self.authors: dict[str, str] = _parse_authors(self, metadata["Author"])
+        self.authors: list[_Author] = _parse_author(metadata["Author"])
+        if not self.authors:
+            raise _raise_pep_error(self, "no authors found", pep_num=True)
 
         # Topic (for sub-indices)
         _topic = metadata.get("Topic", "").lower().split(",")
@@ -130,7 +140,7 @@ class PEP:
             # a tooltip representing the type and status
             "shorthand": self.shorthand,
             # the author list as a comma-separated with only last names
-            "authors": ", ".join(self.authors),
+            "authors": ", ".join(author.full_name for author in self.authors),
         }
 
     @property
@@ -139,7 +149,7 @@ class PEP:
         return {
             "number": self.number,
             "title": self.title,
-            "authors": ", ".join(self.authors),
+            "authors": ", ".join(author.full_name for author in self.authors),
             "discussions_to": self.discussions_to,
             "status": self.status,
             "type": self.pep_type,
@@ -161,46 +171,27 @@ def _raise_pep_error(pep: PEP, msg: str, pep_num: bool = False) -> None:
     raise PEPError(msg, pep.filename)
 
 
-def _parse_authors(pep: PEP, author_header: str) -> dict[str, str]:
-    """Parse Author header line"""
-    authors_to_emails = _parse_author(author_header)
-    if not authors_to_emails:
-        raise _raise_pep_error(pep, "no authors found", pep_num=True)
-    return authors_to_emails
+jr_placeholder = ",Jr"
 
 
-author_angled = re.compile(r"(?P<author>.+?) <(?P<email>.+?)>(,\s*)?")
-author_paren = re.compile(r"(?P<email>.+?) \((?P<author>.+?)\)(,\s*)?")
-author_simple = re.compile(r"(?P<author>[^,]+)(,\s*)?")
+def _parse_author(data: str) -> list[_Author]:
+    """Return a list of author names and emails."""
 
+    author_list = []
+    data = (data.replace("\n", " ")
+                .replace(", Jr", jr_placeholder)
+                .rstrip().removesuffix(","))
+    for author_email in data.split(", "):
+        if ' <' in author_email:
+            author, email = author_email.removesuffix(">").split(" <")
+        else:
+            author, email = author_email, ""
 
-def _parse_author(data: str) -> dict[str, str]:
-    """Return a mapping of author names to emails."""
+        author = author.strip()
+        if author == "":
+            raise ValueError("Name is empty!")
 
-    author_items = []
-    for regex in (author_angled, author_paren, author_simple):
-        for match in regex.finditer(data):
-            # Watch out for suffixes like 'Jr.' when they are comma-separated
-            # from the name and thus cause issues when *all* names are only
-            # separated by commas.
-            match_dict = match.groupdict()
-            author = match_dict["author"]
-            if not author.partition(" ")[1] and author.endswith("."):
-                prev_author = author_items.pop()
-                author = ", ".join([prev_author, author])
-            if "email" not in match_dict:
-                email = ""
-            else:
-                email = match_dict["email"]
-
-            author = author.strip()
-            if not author:
-                raise ValueError("Name is empty!")
-
-            author_items.append((author, email.lower().strip()))
-
-        # If authors were found then stop searching as only expect one
-        # style of author citation.
-        if author_items:
-            break
-    return dict(author_items)
+        author = author.replace(jr_placeholder, ", Jr")
+        email = email.lower()
+        author_list.append(_Author(author, email))
+    return author_list
diff --git a/pep_sphinx_extensions/pep_zero_generator/writer.py b/pep_sphinx_extensions/pep_zero_generator/writer.py
index 043337a77..02af0c8bd 100644
--- a/pep_sphinx_extensions/pep_zero_generator/writer.py
+++ b/pep_sphinx_extensions/pep_zero_generator/writer.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import datetime as dt
 from typing import TYPE_CHECKING
 import unicodedata
 
@@ -29,11 +28,10 @@ from pep_sphinx_extensions.pep_zero_generator.errors import PEPError
 if TYPE_CHECKING:
     from pep_sphinx_extensions.pep_zero_generator.parser import PEP
 
-HEADER = f"""\
+HEADER = """\
 PEP: 0
 Title: Index of Python Enhancement Proposals (PEPs)
-Last-Modified: {dt.date.today()}
-Author: python-dev <python-dev@python.org>
+Author: The PEP Editors
 Status: Active
 Type: Informational
 Content-Type: text/x-rst
@@ -241,7 +239,7 @@ class PEPZeroWriter:
             self.emit_newline()
             self.emit_newline()
 
-        pep0_string = "\n".join([str(s) for s in self.output])
+        pep0_string = "\n".join(map(str, self.output))
         return pep0_string
 
 
@@ -295,24 +293,24 @@ def _classify_peps(peps: list[PEP]) -> tuple[list[PEP], ...]:
 def _verify_email_addresses(peps: list[PEP]) -> dict[str, str]:
     authors_dict: dict[str, set[str]] = {}
     for pep in peps:
-        for author, email in pep.authors.items():
+        for author in pep.authors:
             # If this is the first time we have come across an author, add them.
-            if author not in authors_dict:
-                authors_dict[author] = set()
+            if author.full_name not in authors_dict:
+                authors_dict[author.full_name] = set()
 
             # If the new email is an empty string, move on.
-            if not email:
+            if not author.email:
                 continue
             # If the email has not been seen, add it to the list.
-            authors_dict[author].add(email)
+            authors_dict[author.full_name].add(author.email)
 
     valid_authors_dict: dict[str, str] = {}
     too_many_emails: list[tuple[str, set[str]]] = []
-    for name, emails in authors_dict.items():
+    for full_name, emails in authors_dict.items():
         if len(emails) > 1:
-            too_many_emails.append((name, emails))
+            too_many_emails.append((full_name, emails))
         else:
-            valid_authors_dict[name] = next(iter(emails), "")
+            valid_authors_dict[full_name] = next(iter(emails), "")
     if too_many_emails:
         err_output = []
         for author, emails in too_many_emails:
diff --git a/pep_sphinx_extensions/tests/pep_zero_generator/test_parser.py b/pep_sphinx_extensions/tests/pep_zero_generator/test_parser.py
index 4950acbb1..2cba74df1 100644
--- a/pep_sphinx_extensions/tests/pep_zero_generator/test_parser.py
+++ b/pep_sphinx_extensions/tests/pep_zero_generator/test_parser.py
@@ -17,7 +17,7 @@ from pep_sphinx_extensions.pep_zero_generator.constants import (
     TYPE_PROCESS,
     TYPE_STANDARDS,
 )
-from pep_sphinx_extensions.pep_zero_generator.errors import PEPError
+from pep_sphinx_extensions.pep_zero_generator.parser import _Author
 
 
 def test_pep_repr():
@@ -56,27 +56,15 @@ def test_pep_details(monkeypatch):
     [
         (
             "First Last <user@example.com>",
-            {"First Last": "user@example.com"},
-        ),
-        (
-            "First Last <   user@example.com  >",
-            {"First Last": "user@example.com"},
+            [_Author(full_name="First Last", email="user@example.com")],
         ),
         (
             "First Last",
-            {"First Last": ""},
-        ),
-        (
-            "user@example.com (First Last)",
-            {"First Last": "user@example.com"},
-        ),
-        (
-            "user@example.com (  First Last  )",
-            {"First Last": "user@example.com"},
+            [_Author(full_name="First Last", email="")],
         ),
         pytest.param(
             "First Last <user at example.com>",
-            {"First Last": "user@example.com"},
+            [_Author(full_name="First Last", email="user@example.com")],
             marks=pytest.mark.xfail,
         ),
         pytest.param(
@@ -87,21 +75,16 @@ def test_pep_details(monkeypatch):
     ],
 )
 def test_parse_authors(test_input, expected):
-    # Arrange
-    dummy_object = parser.PEP(Path("pep-0160.txt"))
-
     # Act
-    out = parser._parse_authors(dummy_object, test_input)
+    out = parser._parse_author(test_input)
 
     # Assert
     assert out == expected
 
 
 def test_parse_authors_invalid():
-    pep = parser.PEP(Path("pep-0008.txt"))
-
-    with pytest.raises(PEPError, match="no authors found"):
-        parser._parse_authors(pep, "")
+    with pytest.raises(ValueError, match="Name is empty!"):
+        assert parser._parse_author("")
 
 
 @pytest.mark.parametrize(