#!/usr/bin/env python3 # This file is placed in the public domain or under the # CC0-1.0-Universal license, whichever is more permissive. """check-peps: Check PEPs for common mistakes. Usage: check-peps [-d | --detailed] Only the PEPs specified are checked. If none are specified, all PEPs are checked. Use "--detailed" to show the contents of lines where errors were found. """ from __future__ import annotations import datetime as dt import re import sys from pathlib import Path TYPE_CHECKING = False if TYPE_CHECKING: from collections.abc import Iterable, Iterator, KeysView, Sequence from typing import TypeAlias # (line number, warning message) Message: TypeAlias = tuple[int, str] MessageIterator: TypeAlias = Iterator[Message] # get the directory with the PEP sources ROOT_DIR = Path(__file__).resolve().parent PEP_ROOT = ROOT_DIR / "peps" # See PEP 12 for the order # Note we retain "BDFL-Delegate" ALL_HEADERS = ( "PEP", "Title", "Version", "Last-Modified", "Author", "Sponsor", "BDFL-Delegate", "PEP-Delegate", "Discussions-To", "Status", "Type", "Topic", "Content-Type", "Requires", "Created", "Python-Version", "Post-History", "Replaces", "Superseded-By", "Resolution", ) REQUIRED_HEADERS = frozenset({"PEP", "Title", "Author", "Status", "Type", "Created"}) # See PEP 1 for the full list ALL_STATUSES = frozenset({ "Accepted", "Active", "April Fool!", "Deferred", "Draft", "Final", "Provisional", "Rejected", "Superseded", "Withdrawn", }) # PEPs that are allowed to link directly to PEPs SKIP_DIRECT_PEP_LINK_CHECK = frozenset({"0009", "0287", "0676", "0684", "8001"}) DEFAULT_FLAGS = re.ASCII | re.IGNORECASE # Insensitive latin # any sequence of letters or '-', followed by a single ':' and a space or end of line HEADER_PATTERN = re.compile(r"^([a-z\-]+):(?: |$)", DEFAULT_FLAGS) # any sequence of unicode letters or legal special characters NAME_PATTERN = re.compile(r"(?:[^\W\d_]|[ ',\-.])+(?: |$)") # any sequence of ASCII letters, digits, or legal special characters EMAIL_LOCAL_PART_PATTERN = re.compile(r"[\w!#$%&'*+\-/=?^{|}~.]+", DEFAULT_FLAGS) DISCOURSE_THREAD_PATTERN = re.compile(r"([\w\-]+/)?\d+", DEFAULT_FLAGS) DISCOURSE_POST_PATTERN = re.compile(r"([\w\-]+/)?\d+(/\d+)?", DEFAULT_FLAGS) MAILMAN_2_PATTERN = re.compile(r"[\w\-]+/\d{4}-[a-z]+/\d+\.html", DEFAULT_FLAGS) MAILMAN_3_THREAD_PATTERN = re.compile(r"[\w\-]+@python\.org/thread/[a-z0-9]+/?", DEFAULT_FLAGS) MAILMAN_3_MESSAGE_PATTERN = re.compile(r"[\w\-]+@python\.org/message/[a-z0-9]+/?(#[a-z0-9]+)?", DEFAULT_FLAGS) # Controlled by the "--detailed" flag DETAILED_ERRORS = False def check(filenames: Sequence[str] = (), /) -> int: """The main entry-point.""" if filenames: filenames = map(Path, filenames) else: filenames = PEP_ROOT.glob("pep-????.rst") if (count := sum(map(check_file, filenames))) > 0: s = "s" * (count != 1) print(f"check-peps failed: {count} error{s}", file=sys.stderr) return 1 return 0 def check_file(filename: Path, /) -> int: filename = filename.resolve() try: content = filename.read_text(encoding="utf-8") except FileNotFoundError: return _output_error(filename, [""], [(0, "Could not read PEP!")]) else: lines = content.splitlines() return _output_error(filename, lines, check_peps(filename, lines)) def check_peps(filename: Path, lines: Sequence[str], /) -> MessageIterator: yield from check_headers(lines) for line_num, line in enumerate(lines, start=1): if filename.stem.removeprefix("pep-") in SKIP_DIRECT_PEP_LINK_CHECK: continue yield from check_direct_links(line_num, line.lstrip()) def check_headers(lines: Sequence[str], /) -> MessageIterator: yield from _validate_pep_number(next(iter(lines), "")) found_headers = {} line_num = 0 for line_num, line in enumerate(lines, start=1): if line.strip() == "": headers_end_line_num = line_num break if match := HEADER_PATTERN.match(line): header = match[1] if header in ALL_HEADERS: if header not in found_headers: found_headers[match[1]] = line_num else: yield line_num, f"Must not have duplicate header: {header} " else: yield line_num, f"Must not have invalid header: {header}" else: headers_end_line_num = line_num yield from _validate_required_headers(found_headers.keys()) shifted_line_nums = list(found_headers.values())[1:] for i, (header, line_num) in enumerate(found_headers.items()): start = line_num - 1 end = headers_end_line_num - 1 if i < len(found_headers) - 1: end = shifted_line_nums[i] - 1 remainder = "\n".join(lines[start:end]).removeprefix(f"{header}:") if remainder != "": if remainder[0] not in {" ", "\n"}: yield line_num, f"Headers must have a space after the colon: {header}" remainder = remainder.lstrip() yield from _validate_header(header, line_num, remainder) def _validate_header(header: str, line_num: int, content: str) -> MessageIterator: if header == "Title": yield from _validate_title(line_num, content) elif header == "Author": yield from _validate_author(line_num, content) elif header == "Sponsor": yield from _validate_sponsor(line_num, content) elif header in {"BDFL-Delegate", "PEP-Delegate"}: yield from _validate_delegate(line_num, content) elif header == "Discussions-To": yield from _validate_discussions_to(line_num, content) elif header == "Status": yield from _validate_status(line_num, content) elif header == "Type": yield from _validate_type(line_num, content) elif header == "Topic": yield from _validate_topic(line_num, content) elif header == "Content-Type": yield from _validate_content_type(line_num, content) elif header in {"Requires", "Replaces", "Superseded-By"}: yield from _validate_pep_references(line_num, content) elif header == "Created": yield from _validate_created(line_num, content) elif header == "Python-Version": yield from _validate_python_version(line_num, content) elif header == "Post-History": yield from _validate_post_history(line_num, content) elif header == "Resolution": yield from _validate_resolution(line_num, content) def check_direct_links(line_num: int, line: str) -> MessageIterator: """Check that PEPs and RFCs aren't linked directly""" line = line.lower() if "dev/peps/pep-" in line or "peps.python.org/pep-" in line: yield line_num, "Use the :pep:`NNN` role to refer to PEPs" if "rfc-editor.org/rfc/" in line or "ietf.org/doc/html/rfc" in line: yield line_num, "Use the :rfc:`NNN` role to refer to RFCs" def _output_error(filename: Path, lines: Sequence[str], errors: Iterable[Message]) -> int: relative_filename = filename.relative_to(ROOT_DIR) err_count = 0 for line_num, msg in errors: err_count += 1 print(f"{relative_filename}:{line_num}: {msg}") if not DETAILED_ERRORS: continue line = lines[line_num - 1] print(" |") print(f"{line_num: >4} | '{line}'") print(" |") return err_count ########################### # PEP Header Validators # ########################### def _validate_required_headers(found_headers: KeysView[str]) -> MessageIterator: """PEPs must have all required headers, in the PEP 12 order""" if missing := REQUIRED_HEADERS.difference(found_headers): for missing_header in sorted(missing, key=ALL_HEADERS.index): yield 1, f"Must have required header: {missing_header}" ordered_headers = sorted(found_headers, key=ALL_HEADERS.index) if list(found_headers) != ordered_headers: order_str = ", ".join(ordered_headers) yield 1, "Headers must be in PEP 12 order. Correct order: " + order_str def _validate_pep_number(line: str) -> MessageIterator: """'PEP' header must be a number 1-9999""" if not line.startswith("PEP: "): yield 1, "PEP must begin with the 'PEP:' header" return pep_number = line.removeprefix("PEP: ").lstrip() yield from _pep_num(1, pep_number, "'PEP:' header") def _validate_title(line_num: int, line: str) -> MessageIterator: """'Title' must be 1-79 characters""" if len(line) == 0: yield line_num, "PEP must have a title" elif len(line) > 79: yield line_num, "PEP title must be less than 80 characters" def _validate_author(line_num: int, body: str) -> MessageIterator: """'Author' must be list of 'Name , …'""" lines = body.split("\n") for offset, line in enumerate(lines): if offset >= 1 and line[:9].isspace(): # Checks for: # Author: Alice # Bob # ^^^^ # Note that len("Author: ") == 8 yield line_num + offset, "Author line must not be over-indented" if offset < len(lines) - 1: if not line.endswith(","): yield line_num + offset, "Author continuation lines must end with a comma" for part in line.removesuffix(",").split(", "): yield from _email(line_num + offset, part, "Author") def _validate_sponsor(line_num: int, line: str) -> MessageIterator: """'Sponsor' must have format 'Name '""" yield from _email(line_num, line, "Sponsor") def _validate_delegate(line_num: int, line: str) -> MessageIterator: """'Delegate' must have format 'Name '""" if line == "": return # PEP 451 if ", " in line: for part in line.removesuffix(",").split(", "): yield from _email(line_num, part, "Delegate") return yield from _email(line_num, line, "Delegate") def _validate_discussions_to(line_num: int, line: str) -> MessageIterator: """'Discussions-To' must be a thread URL""" yield from _thread(line_num, line, "Discussions-To", discussions_to=True) if line.startswith("https://"): return for suffix in "@python.org", "@googlegroups.com": if line.endswith(suffix): remainder = line.removesuffix(suffix) if re.fullmatch(r"[\w\-]+", remainder) is None: yield line_num, "Discussions-To must be a valid mailing list" return yield line_num, "Discussions-To must be a valid thread URL or mailing list" def _validate_status(line_num: int, line: str) -> MessageIterator: """'Status' must be a valid PEP status""" if line not in ALL_STATUSES: yield line_num, "Status must be a valid PEP status" def _validate_type(line_num: int, line: str) -> MessageIterator: """'Type' must be a valid PEP type""" if line not in {"Standards Track", "Informational", "Process"}: yield line_num, "Type must be a valid PEP type" def _validate_topic(line_num: int, line: str) -> MessageIterator: """'Topic' must be for a valid sub-index""" topics = line.split(", ") unique_topics = set(topics) if len(topics) > len(unique_topics): yield line_num, "Topic must not contain duplicates" if unique_topics - {"Governance", "Packaging", "Typing", "Release"}: if not all(map(str.istitle, unique_topics)): yield line_num, "Topic must be properly capitalised (Title Case)" if unique_topics - {"governance", "packaging", "typing", "release"}: yield line_num, "Topic must be for a valid sub-index" if sorted(topics) != topics: yield line_num, "Topic must be sorted lexicographically" def _validate_content_type(line_num: int, line: str) -> MessageIterator: """'Content-Type' must be 'text/x-rst'""" if line != "text/x-rst": yield line_num, "Content-Type must be 'text/x-rst'" def _validate_pep_references(line_num: int, line: str) -> MessageIterator: """`Requires`/`Replaces`/`Superseded-By` must be 'NNN' PEP IDs""" line = line.removesuffix(",").rstrip() if line.count(", ") != line.count(","): yield line_num, "PEP references must be separated by comma-spaces (', ')" return references = line.split(", ") for reference in references: yield from _pep_num(line_num, reference, "PEP reference") def _validate_created(line_num: int, line: str) -> MessageIterator: """'Created' must be a 'DD-mmm-YYYY' date""" yield from _date(line_num, line, "Created") def _validate_python_version(line_num: int, line: str) -> MessageIterator: """'Python-Version' must be an ``X.Y[.Z]`` version""" versions = line.split(", ") for version in versions: if version.count(".") not in {1, 2}: yield line_num, f"Python-Version must have two or three segments: {version}" continue try: major, minor, micro = version.split(".", 2) except ValueError: major, minor = version.split(".", 1) micro = "" if major not in "123": yield line_num, f"Python-Version major part must be 1, 2, or 3: {version}" if not _is_digits(minor) and minor != "x": yield line_num, f"Python-Version minor part must be numeric: {version}" elif minor != "0" and minor[0] == "0": yield line_num, f"Python-Version minor part must not have leading zeros: {version}" if micro == "": return if minor == "x": yield line_num, f"Python-Version micro part must be empty if minor part is 'x': {version}" elif micro[0] == "0": yield line_num, f"Python-Version micro part must not have leading zeros: {version}" elif not _is_digits(micro): yield line_num, f"Python-Version micro part must be numeric: {version}" def _validate_post_history(line_num: int, body: str) -> MessageIterator: """'Post-History' must be '`DD-mmm-YYYY `__, …'""" if body == "": return for offset, line in enumerate(body.removesuffix(",").split("\n"), start=line_num): for post in line.removesuffix(",").strip().split(", "): prefix, postfix = (post.startswith("`"), post.endswith(">`__")) if not prefix and not postfix: yield from _date(offset, post, "Post-History") elif prefix and postfix: post_date, post_url = post[1:-4].split(" <") yield from _date(offset, post_date, "Post-History") yield from _thread(offset, post_url, "Post-History") else: yield offset, f"post line must be a date or both start with “`” and end with “>`__”" def _validate_resolution(line_num: int, line: str) -> MessageIterator: """'Resolution' must be a direct thread/message URL""" yield from _thread(line_num, line, "Resolution", allow_message=True) ######################## # Validation Helpers # ######################## def _pep_num(line_num: int, pep_number: str, prefix: str) -> MessageIterator: if pep_number == "": yield line_num, f"{prefix} must not be blank: {pep_number!r}" return if pep_number.startswith("0") and pep_number != "0": yield line_num, f"{prefix} must not contain leading zeros: {pep_number!r}" if not _is_digits(pep_number): yield line_num, f"{prefix} must be numeric: {pep_number!r}" elif not 0 <= int(pep_number) <= 9999: yield line_num, f"{prefix} must be between 0 and 9999: {pep_number!r}" def _is_digits(string: str) -> bool: """Match a string of ASCII digits ([0-9]+).""" return string.isascii() and string.isdigit() def _email(line_num: int, author_email: str, prefix: str) -> MessageIterator: author_email = author_email.strip() if author_email.count("<") > 1: msg = f"{prefix} entries must not contain multiple '<': {author_email!r}" yield line_num, msg if author_email.count(">") > 1: msg = f"{prefix} entries must not contain multiple '>': {author_email!r}" yield line_num, msg if author_email.count("@") > 1: msg = f"{prefix} entries must not contain multiple '@': {author_email!r}" yield line_num, msg author = author_email.split("<", 1)[0].rstrip() if NAME_PATTERN.fullmatch(author) is None: msg = f"{prefix} entries must begin with a valid 'Name': {author_email!r}" yield line_num, msg return email_text = author_email.removeprefix(author) if not email_text: # Does not have the optional email part return if not email_text.startswith(" <") or not email_text.endswith(">"): msg = f"{prefix} entries must be formatted as 'Name ': {author_email!r}" yield line_num, msg email_text = email_text.removeprefix(" <").removesuffix(">") if "@" in email_text: local, domain = email_text.rsplit("@", 1) elif " at " in email_text: local, domain = email_text.rsplit(" at ", 1) else: yield line_num, f"{prefix} entries must contain a valid email address: {author_email!r}" return if EMAIL_LOCAL_PART_PATTERN.fullmatch(local) is None or _invalid_domain(domain): yield line_num, f"{prefix} entries must contain a valid email address: {author_email!r}" def _invalid_domain(domain_part: str) -> bool: *labels, root = domain_part.split(".") for label in labels: if not label.replace("-", "").isalnum(): return True return not root.isalnum() or not root.isascii() def _thread(line_num: int, url: str, prefix: str, *, allow_message: bool = False, discussions_to: bool = False) -> MessageIterator: if allow_message and discussions_to: msg = "allow_message and discussions_to cannot both be True" raise ValueError(msg) msg = f"{prefix} must be a valid thread URL" if not url.startswith("https://"): if not discussions_to: yield line_num, msg return if url.startswith("https://discuss.python.org/t/"): remainder = url.removeprefix("https://discuss.python.org/t/").removesuffix("/") # Discussions-To links must be the thread itself, not a post if discussions_to: # The equivalent pattern is similar to '([\w\-]+/)?\d+', # but the topic name must contain a non-numeric character # We use ``str.rpartition`` as the topic name is optional topic_name, _, topic_id = remainder.rpartition("/") if topic_name == '' and _is_digits(topic_id): return topic_name = topic_name.replace("-", "0").replace("_", "0") # the topic name must not be entirely numeric valid_topic_name = not _is_digits(topic_name) and topic_name.isalnum() if valid_topic_name and _is_digits(topic_id): return else: # The equivalent pattern is similar to '([\w\-]+/)?\d+(/\d+)?', # but the topic name must contain a non-numeric character if remainder.count("/") == 2: # When there are three parts, the URL must be "topic-name/topic-id/post-id". topic_name, topic_id, post_id = remainder.rsplit("/", 2) topic_name = topic_name.replace("-", "0").replace("_", "0") valid_topic_name = not _is_digits(topic_name) and topic_name.isalnum() if valid_topic_name and _is_digits(topic_id) and _is_digits(post_id): # the topic name must not be entirely numeric return elif remainder.count("/") == 1: # When there are only two parts, there's an ambiguity between # "topic-name/topic-id" and "topic-id/post-id". # We disambiguate by checking if the LHS is a valid name and # the RHS is a valid topic ID (for the former), # and then if both the LHS and RHS are valid IDs (for the latter). left, right = remainder.rsplit("/") left = left.replace("-", "0").replace("_", "0") # the topic name must not be entirely numeric left_is_name = not _is_digits(left) and left.isalnum() if left_is_name and _is_digits(right): return elif _is_digits(left) and _is_digits(right): return else: # When there's only one part, it must be a valid topic ID. if _is_digits(remainder): return if url.startswith("https://mail.python.org/pipermail/"): remainder = url.removeprefix("https://mail.python.org/pipermail/") if MAILMAN_2_PATTERN.fullmatch(remainder) is not None: return if url.startswith("https://mail.python.org/archives/list/"): remainder = url.removeprefix("https://mail.python.org/archives/list/") if allow_message and MAILMAN_3_MESSAGE_PATTERN.fullmatch(remainder) is not None: return if MAILMAN_3_THREAD_PATTERN.fullmatch(remainder) is not None: return yield line_num, msg def _date(line_num: int, date_str: str, prefix: str) -> MessageIterator: try: parsed_date = dt.datetime.strptime(date_str, "%d-%b-%Y") except ValueError: yield line_num, f"{prefix} must be a 'DD-mmm-YYYY' date: {date_str!r}" return else: if date_str[1] == "-": # Date must be zero-padded yield line_num, f"{prefix} must be a 'DD-mmm-YYYY' date: {date_str!r}" return if parsed_date.year < 1990: yield line_num, f"{prefix} must not be before Python was invented: {date_str!r}" if parsed_date > (dt.datetime.now() + dt.timedelta(days=14)): yield line_num, f"{prefix} must not be in the future: {date_str!r}" if __name__ == "__main__": if {"-h", "--help", "-?"}.intersection(sys.argv[1:]): print(__doc__, file=sys.stderr) raise SystemExit(0) files = {} for arg in sys.argv[1:]: if not arg.startswith("-"): files[arg] = None elif arg in {"-d", "--detailed"}: DETAILED_ERRORS = True else: print(f"Unknown option: {arg!r}", file=sys.stderr) raise SystemExit(1) raise SystemExit(check(files))