From 3550731898aa878bb0016258cdd4ccc3f1da51f7 Mon Sep 17 00:00:00 2001 From: Adam Turner <9087854+AA-Turner@users.noreply.github.com> Date: Fri, 1 Sep 2023 16:19:19 +0100 Subject: [PATCH] PEP 0: Refactoring (#3340) --- .../pep_zero_generator/parser.py | 73 ++++++++----------- .../pep_zero_generator/writer.py | 24 +++--- .../tests/pep_zero_generator/test_parser.py | 31 ++------ 3 files changed, 50 insertions(+), 78 deletions(-) diff --git a/pep_sphinx_extensions/pep_zero_generator/parser.py b/pep_sphinx_extensions/pep_zero_generator/parser.py index 2ce802a13..96b4e9aba 100644 --- a/pep_sphinx_extensions/pep_zero_generator/parser.py +++ b/pep_sphinx_extensions/pep_zero_generator/parser.py @@ -2,6 +2,7 @@ from __future__ import annotations +import dataclasses from email.parser import HeaderParser from pathlib import Path import re @@ -17,6 +18,13 @@ from pep_sphinx_extensions.pep_zero_generator.constants import TYPE_VALUES from pep_sphinx_extensions.pep_zero_generator.errors import PEPError +@dataclasses.dataclass(order=True, frozen=True) +class _Author: + """Represent PEP authors.""" + full_name: str # The author's name. + email: str # The author's email address. + + class PEP: """Representation of PEPs. @@ -83,7 +91,9 @@ class PEP: self.status: str = status # Parse PEP authors - self.authors: dict[str, str] = _parse_authors(self, metadata["Author"]) + self.authors: list[_Author] = _parse_author(metadata["Author"]) + if not self.authors: + raise _raise_pep_error(self, "no authors found", pep_num=True) # Topic (for sub-indices) _topic = metadata.get("Topic", "").lower().split(",") @@ -130,7 +140,7 @@ class PEP: # a tooltip representing the type and status "shorthand": self.shorthand, # the author list as a comma-separated with only last names - "authors": ", ".join(self.authors), + "authors": ", ".join(author.full_name for author in self.authors), } @property @@ -139,7 +149,7 @@ class PEP: return { "number": self.number, "title": self.title, - "authors": ", ".join(self.authors), + "authors": ", ".join(author.full_name for author in self.authors), "discussions_to": self.discussions_to, "status": self.status, "type": self.pep_type, @@ -161,46 +171,27 @@ def _raise_pep_error(pep: PEP, msg: str, pep_num: bool = False) -> None: raise PEPError(msg, pep.filename) -def _parse_authors(pep: PEP, author_header: str) -> dict[str, str]: - """Parse Author header line""" - authors_to_emails = _parse_author(author_header) - if not authors_to_emails: - raise _raise_pep_error(pep, "no authors found", pep_num=True) - return authors_to_emails +jr_placeholder = ",Jr" -author_angled = re.compile(r"(?P.+?) <(?P.+?)>(,\s*)?") -author_paren = re.compile(r"(?P.+?) \((?P.+?)\)(,\s*)?") -author_simple = re.compile(r"(?P[^,]+)(,\s*)?") +def _parse_author(data: str) -> list[_Author]: + """Return a list of author names and emails.""" + author_list = [] + data = (data.replace("\n", " ") + .replace(", Jr", jr_placeholder) + .rstrip().removesuffix(",")) + for author_email in data.split(", "): + if ' <' in author_email: + author, email = author_email.removesuffix(">").split(" <") + else: + author, email = author_email, "" -def _parse_author(data: str) -> dict[str, str]: - """Return a mapping of author names to emails.""" + author = author.strip() + if author == "": + raise ValueError("Name is empty!") - author_items = [] - for regex in (author_angled, author_paren, author_simple): - for match in regex.finditer(data): - # Watch out for suffixes like 'Jr.' when they are comma-separated - # from the name and thus cause issues when *all* names are only - # separated by commas. - match_dict = match.groupdict() - author = match_dict["author"] - if not author.partition(" ")[1] and author.endswith("."): - prev_author = author_items.pop() - author = ", ".join([prev_author, author]) - if "email" not in match_dict: - email = "" - else: - email = match_dict["email"] - - author = author.strip() - if not author: - raise ValueError("Name is empty!") - - author_items.append((author, email.lower().strip())) - - # If authors were found then stop searching as only expect one - # style of author citation. - if author_items: - break - return dict(author_items) + author = author.replace(jr_placeholder, ", Jr") + email = email.lower() + author_list.append(_Author(author, email)) + return author_list diff --git a/pep_sphinx_extensions/pep_zero_generator/writer.py b/pep_sphinx_extensions/pep_zero_generator/writer.py index 043337a77..02af0c8bd 100644 --- a/pep_sphinx_extensions/pep_zero_generator/writer.py +++ b/pep_sphinx_extensions/pep_zero_generator/writer.py @@ -2,7 +2,6 @@ from __future__ import annotations -import datetime as dt from typing import TYPE_CHECKING import unicodedata @@ -29,11 +28,10 @@ from pep_sphinx_extensions.pep_zero_generator.errors import PEPError if TYPE_CHECKING: from pep_sphinx_extensions.pep_zero_generator.parser import PEP -HEADER = f"""\ +HEADER = """\ PEP: 0 Title: Index of Python Enhancement Proposals (PEPs) -Last-Modified: {dt.date.today()} -Author: python-dev +Author: The PEP Editors Status: Active Type: Informational Content-Type: text/x-rst @@ -241,7 +239,7 @@ class PEPZeroWriter: self.emit_newline() self.emit_newline() - pep0_string = "\n".join([str(s) for s in self.output]) + pep0_string = "\n".join(map(str, self.output)) return pep0_string @@ -295,24 +293,24 @@ def _classify_peps(peps: list[PEP]) -> tuple[list[PEP], ...]: def _verify_email_addresses(peps: list[PEP]) -> dict[str, str]: authors_dict: dict[str, set[str]] = {} for pep in peps: - for author, email in pep.authors.items(): + for author in pep.authors: # If this is the first time we have come across an author, add them. - if author not in authors_dict: - authors_dict[author] = set() + if author.full_name not in authors_dict: + authors_dict[author.full_name] = set() # If the new email is an empty string, move on. - if not email: + if not author.email: continue # If the email has not been seen, add it to the list. - authors_dict[author].add(email) + authors_dict[author.full_name].add(author.email) valid_authors_dict: dict[str, str] = {} too_many_emails: list[tuple[str, set[str]]] = [] - for name, emails in authors_dict.items(): + for full_name, emails in authors_dict.items(): if len(emails) > 1: - too_many_emails.append((name, emails)) + too_many_emails.append((full_name, emails)) else: - valid_authors_dict[name] = next(iter(emails), "") + valid_authors_dict[full_name] = next(iter(emails), "") if too_many_emails: err_output = [] for author, emails in too_many_emails: diff --git a/pep_sphinx_extensions/tests/pep_zero_generator/test_parser.py b/pep_sphinx_extensions/tests/pep_zero_generator/test_parser.py index 4950acbb1..2cba74df1 100644 --- a/pep_sphinx_extensions/tests/pep_zero_generator/test_parser.py +++ b/pep_sphinx_extensions/tests/pep_zero_generator/test_parser.py @@ -17,7 +17,7 @@ from pep_sphinx_extensions.pep_zero_generator.constants import ( TYPE_PROCESS, TYPE_STANDARDS, ) -from pep_sphinx_extensions.pep_zero_generator.errors import PEPError +from pep_sphinx_extensions.pep_zero_generator.parser import _Author def test_pep_repr(): @@ -56,27 +56,15 @@ def test_pep_details(monkeypatch): [ ( "First Last ", - {"First Last": "user@example.com"}, - ), - ( - "First Last < user@example.com >", - {"First Last": "user@example.com"}, + [_Author(full_name="First Last", email="user@example.com")], ), ( "First Last", - {"First Last": ""}, - ), - ( - "user@example.com (First Last)", - {"First Last": "user@example.com"}, - ), - ( - "user@example.com ( First Last )", - {"First Last": "user@example.com"}, + [_Author(full_name="First Last", email="")], ), pytest.param( "First Last ", - {"First Last": "user@example.com"}, + [_Author(full_name="First Last", email="user@example.com")], marks=pytest.mark.xfail, ), pytest.param( @@ -87,21 +75,16 @@ def test_pep_details(monkeypatch): ], ) def test_parse_authors(test_input, expected): - # Arrange - dummy_object = parser.PEP(Path("pep-0160.txt")) - # Act - out = parser._parse_authors(dummy_object, test_input) + out = parser._parse_author(test_input) # Assert assert out == expected def test_parse_authors_invalid(): - pep = parser.PEP(Path("pep-0008.txt")) - - with pytest.raises(PEPError, match="no authors found"): - parser._parse_authors(pep, "") + with pytest.raises(ValueError, match="Name is empty!"): + assert parser._parse_author("") @pytest.mark.parametrize(