2023-09-04 23:44:46 -04:00
|
|
|
#!/usr/bin/env python3
|
|
|
|
|
|
|
|
# This file is placed in the public domain or under the
|
|
|
|
# CC0-1.0-Universal license, whichever is more permissive.
|
|
|
|
|
|
|
|
"""check-peps: Check PEPs for common mistakes.
|
|
|
|
|
|
|
|
Usage: check-peps [-d | --detailed] <PEP files...>
|
|
|
|
|
|
|
|
Only the PEPs specified are checked.
|
|
|
|
If none are specified, all PEPs are checked.
|
|
|
|
|
|
|
|
Use "--detailed" to show the contents of lines where errors were found.
|
|
|
|
"""
|
|
|
|
|
|
|
|
from __future__ import annotations
|
|
|
|
|
|
|
|
import datetime as dt
|
|
|
|
import re
|
|
|
|
import sys
|
|
|
|
from pathlib import Path
|
|
|
|
|
|
|
|
TYPE_CHECKING = False
|
|
|
|
if TYPE_CHECKING:
|
|
|
|
from collections.abc import Iterable, Iterator, KeysView, Sequence
|
|
|
|
from typing import TypeAlias
|
|
|
|
|
|
|
|
# (line number, warning message)
|
|
|
|
Message: TypeAlias = tuple[int, str]
|
|
|
|
MessageIterator: TypeAlias = Iterator[Message]
|
|
|
|
|
|
|
|
|
|
|
|
# get the directory with the PEP sources
|
2023-09-09 13:39:29 -04:00
|
|
|
ROOT_DIR = Path(__file__).resolve().parent
|
|
|
|
PEP_ROOT = ROOT_DIR / "peps"
|
2023-09-04 23:44:46 -04:00
|
|
|
|
|
|
|
# See PEP 12 for the order
|
|
|
|
# Note we retain "BDFL-Delegate"
|
|
|
|
ALL_HEADERS = (
|
|
|
|
"PEP",
|
|
|
|
"Title",
|
|
|
|
"Version",
|
|
|
|
"Last-Modified",
|
|
|
|
"Author",
|
|
|
|
"Sponsor",
|
|
|
|
"BDFL-Delegate", "PEP-Delegate",
|
|
|
|
"Discussions-To",
|
|
|
|
"Status",
|
|
|
|
"Type",
|
|
|
|
"Topic",
|
|
|
|
"Content-Type",
|
|
|
|
"Requires",
|
|
|
|
"Created",
|
|
|
|
"Python-Version",
|
|
|
|
"Post-History",
|
|
|
|
"Replaces",
|
|
|
|
"Superseded-By",
|
|
|
|
"Resolution",
|
|
|
|
)
|
|
|
|
REQUIRED_HEADERS = frozenset({"PEP", "Title", "Author", "Status", "Type", "Created"})
|
|
|
|
|
|
|
|
# See PEP 1 for the full list
|
|
|
|
ALL_STATUSES = frozenset({
|
|
|
|
"Accepted",
|
|
|
|
"Active",
|
|
|
|
"April Fool!",
|
|
|
|
"Deferred",
|
|
|
|
"Draft",
|
|
|
|
"Final",
|
|
|
|
"Provisional",
|
|
|
|
"Rejected",
|
|
|
|
"Superseded",
|
|
|
|
"Withdrawn",
|
|
|
|
})
|
|
|
|
|
|
|
|
# PEPs that are allowed to link directly to PEPs
|
|
|
|
SKIP_DIRECT_PEP_LINK_CHECK = frozenset({"0009", "0287", "0676", "0684", "8001"})
|
|
|
|
|
|
|
|
DEFAULT_FLAGS = re.ASCII | re.IGNORECASE # Insensitive latin
|
|
|
|
|
|
|
|
# any sequence of letters or '-', followed by a single ':' and a space or end of line
|
|
|
|
HEADER_PATTERN = re.compile(r"^([a-z\-]+):(?: |$)", DEFAULT_FLAGS)
|
|
|
|
# any sequence of unicode letters or legal special characters
|
|
|
|
NAME_PATTERN = re.compile(r"(?:[^\W\d_]|[ ',\-.])+(?: |$)")
|
|
|
|
# any sequence of ASCII letters, digits, or legal special characters
|
|
|
|
EMAIL_LOCAL_PART_PATTERN = re.compile(r"[\w!#$%&'*+\-/=?^{|}~.]+", DEFAULT_FLAGS)
|
|
|
|
|
|
|
|
DISCOURSE_THREAD_PATTERN = re.compile(r"([\w\-]+/)?\d+", DEFAULT_FLAGS)
|
|
|
|
DISCOURSE_POST_PATTERN = re.compile(r"([\w\-]+/)?\d+(/\d+)?", DEFAULT_FLAGS)
|
|
|
|
|
|
|
|
MAILMAN_2_PATTERN = re.compile(r"[\w\-]+/\d{4}-[a-z]+/\d+\.html", DEFAULT_FLAGS)
|
|
|
|
MAILMAN_3_THREAD_PATTERN = re.compile(r"[\w\-]+@python\.org/thread/[a-z0-9]+/?", DEFAULT_FLAGS)
|
|
|
|
MAILMAN_3_MESSAGE_PATTERN = re.compile(r"[\w\-]+@python\.org/message/[a-z0-9]+/?(#[a-z0-9]+)?", DEFAULT_FLAGS)
|
|
|
|
|
|
|
|
# Controlled by the "--detailed" flag
|
|
|
|
DETAILED_ERRORS = False
|
|
|
|
|
|
|
|
|
|
|
|
def check(filenames: Sequence[str] = (), /) -> int:
|
|
|
|
"""The main entry-point."""
|
|
|
|
if filenames:
|
|
|
|
filenames = map(Path, filenames)
|
|
|
|
else:
|
2023-09-09 13:39:29 -04:00
|
|
|
filenames = PEP_ROOT.glob("pep-????.rst")
|
2023-09-04 23:44:46 -04:00
|
|
|
if (count := sum(map(check_file, filenames))) > 0:
|
|
|
|
s = "s" * (count != 1)
|
|
|
|
print(f"check-peps failed: {count} error{s}", file=sys.stderr)
|
|
|
|
return 1
|
|
|
|
return 0
|
|
|
|
|
|
|
|
|
|
|
|
def check_file(filename: Path, /) -> int:
|
|
|
|
filename = filename.resolve()
|
|
|
|
try:
|
|
|
|
content = filename.read_text(encoding="utf-8")
|
|
|
|
except FileNotFoundError:
|
|
|
|
return _output_error(filename, [""], [(0, "Could not read PEP!")])
|
|
|
|
else:
|
|
|
|
lines = content.splitlines()
|
|
|
|
return _output_error(filename, lines, check_peps(filename, lines))
|
|
|
|
|
|
|
|
|
|
|
|
def check_peps(filename: Path, lines: Sequence[str], /) -> MessageIterator:
|
|
|
|
yield from check_headers(lines)
|
|
|
|
for line_num, line in enumerate(lines, start=1):
|
|
|
|
if filename.stem.removeprefix("pep-") in SKIP_DIRECT_PEP_LINK_CHECK:
|
|
|
|
continue
|
|
|
|
yield from check_direct_links(line_num, line.lstrip())
|
|
|
|
|
|
|
|
|
|
|
|
def check_headers(lines: Sequence[str], /) -> MessageIterator:
|
|
|
|
yield from _validate_pep_number(next(iter(lines), ""))
|
|
|
|
|
|
|
|
found_headers = {}
|
|
|
|
line_num = 0
|
|
|
|
for line_num, line in enumerate(lines, start=1):
|
|
|
|
if line.strip() == "":
|
|
|
|
headers_end_line_num = line_num
|
|
|
|
break
|
|
|
|
if match := HEADER_PATTERN.match(line):
|
|
|
|
header = match[1]
|
|
|
|
if header in ALL_HEADERS:
|
|
|
|
if header not in found_headers:
|
|
|
|
found_headers[match[1]] = line_num
|
|
|
|
else:
|
|
|
|
yield line_num, f"Must not have duplicate header: {header} "
|
|
|
|
else:
|
|
|
|
yield line_num, f"Must not have invalid header: {header}"
|
|
|
|
else:
|
|
|
|
headers_end_line_num = line_num
|
|
|
|
|
|
|
|
yield from _validate_required_headers(found_headers.keys())
|
|
|
|
|
|
|
|
shifted_line_nums = list(found_headers.values())[1:]
|
|
|
|
for i, (header, line_num) in enumerate(found_headers.items()):
|
|
|
|
start = line_num - 1
|
|
|
|
end = headers_end_line_num - 1
|
|
|
|
if i < len(found_headers) - 1:
|
|
|
|
end = shifted_line_nums[i] - 1
|
|
|
|
remainder = "\n".join(lines[start:end]).removeprefix(f"{header}:")
|
|
|
|
if remainder != "":
|
|
|
|
if remainder[0] not in {" ", "\n"}:
|
|
|
|
yield line_num, f"Headers must have a space after the colon: {header}"
|
|
|
|
remainder = remainder.lstrip()
|
|
|
|
yield from _validate_header(header, line_num, remainder)
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_header(header: str, line_num: int, content: str) -> MessageIterator:
|
|
|
|
if header == "Title":
|
|
|
|
yield from _validate_title(line_num, content)
|
|
|
|
elif header == "Author":
|
|
|
|
yield from _validate_author(line_num, content)
|
|
|
|
elif header == "Sponsor":
|
|
|
|
yield from _validate_sponsor(line_num, content)
|
|
|
|
elif header in {"BDFL-Delegate", "PEP-Delegate"}:
|
|
|
|
yield from _validate_delegate(line_num, content)
|
|
|
|
elif header == "Discussions-To":
|
|
|
|
yield from _validate_discussions_to(line_num, content)
|
|
|
|
elif header == "Status":
|
|
|
|
yield from _validate_status(line_num, content)
|
|
|
|
elif header == "Type":
|
|
|
|
yield from _validate_type(line_num, content)
|
|
|
|
elif header == "Topic":
|
|
|
|
yield from _validate_topic(line_num, content)
|
|
|
|
elif header == "Content-Type":
|
|
|
|
yield from _validate_content_type(line_num, content)
|
|
|
|
elif header in {"Requires", "Replaces", "Superseded-By"}:
|
|
|
|
yield from _validate_pep_references(line_num, content)
|
|
|
|
elif header == "Created":
|
|
|
|
yield from _validate_created(line_num, content)
|
|
|
|
elif header == "Python-Version":
|
|
|
|
yield from _validate_python_version(line_num, content)
|
|
|
|
elif header == "Post-History":
|
|
|
|
yield from _validate_post_history(line_num, content)
|
|
|
|
elif header == "Resolution":
|
|
|
|
yield from _validate_resolution(line_num, content)
|
|
|
|
|
|
|
|
|
|
|
|
def check_direct_links(line_num: int, line: str) -> MessageIterator:
|
|
|
|
"""Check that PEPs and RFCs aren't linked directly"""
|
|
|
|
|
|
|
|
line = line.lower()
|
|
|
|
if "dev/peps/pep-" in line or "peps.python.org/pep-" in line:
|
|
|
|
yield line_num, "Use the :pep:`NNN` role to refer to PEPs"
|
|
|
|
if "rfc-editor.org/rfc/" in line or "ietf.org/doc/html/rfc" in line:
|
|
|
|
yield line_num, "Use the :rfc:`NNN` role to refer to RFCs"
|
|
|
|
|
|
|
|
|
|
|
|
def _output_error(filename: Path, lines: Sequence[str], errors: Iterable[Message]) -> int:
|
2023-09-09 13:39:29 -04:00
|
|
|
relative_filename = filename.relative_to(ROOT_DIR)
|
2023-09-04 23:44:46 -04:00
|
|
|
err_count = 0
|
|
|
|
for line_num, msg in errors:
|
|
|
|
err_count += 1
|
|
|
|
|
|
|
|
print(f"{relative_filename}:{line_num}: {msg}")
|
|
|
|
if not DETAILED_ERRORS:
|
|
|
|
continue
|
|
|
|
|
|
|
|
line = lines[line_num - 1]
|
|
|
|
print(" |")
|
|
|
|
print(f"{line_num: >4} | '{line}'")
|
|
|
|
print(" |")
|
|
|
|
|
|
|
|
return err_count
|
|
|
|
|
|
|
|
|
|
|
|
###########################
|
|
|
|
# PEP Header Validators #
|
|
|
|
###########################
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_required_headers(found_headers: KeysView[str]) -> MessageIterator:
|
|
|
|
"""PEPs must have all required headers, in the PEP 12 order"""
|
|
|
|
|
|
|
|
if missing := REQUIRED_HEADERS.difference(found_headers):
|
|
|
|
for missing_header in sorted(missing, key=ALL_HEADERS.index):
|
|
|
|
yield 1, f"Must have required header: {missing_header}"
|
|
|
|
|
|
|
|
ordered_headers = sorted(found_headers, key=ALL_HEADERS.index)
|
|
|
|
if list(found_headers) != ordered_headers:
|
|
|
|
order_str = ", ".join(ordered_headers)
|
|
|
|
yield 1, "Headers must be in PEP 12 order. Correct order: " + order_str
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_pep_number(line: str) -> MessageIterator:
|
|
|
|
"""'PEP' header must be a number 1-9999"""
|
|
|
|
|
|
|
|
if not line.startswith("PEP: "):
|
|
|
|
yield 1, "PEP must begin with the 'PEP:' header"
|
|
|
|
return
|
|
|
|
|
|
|
|
pep_number = line.removeprefix("PEP: ").lstrip()
|
|
|
|
yield from _pep_num(1, pep_number, "'PEP:' header")
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_title(line_num: int, line: str) -> MessageIterator:
|
|
|
|
"""'Title' must be 1-79 characters"""
|
|
|
|
|
|
|
|
if len(line) == 0:
|
|
|
|
yield line_num, "PEP must have a title"
|
|
|
|
elif len(line) > 79:
|
|
|
|
yield line_num, "PEP title must be less than 80 characters"
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_author(line_num: int, body: str) -> MessageIterator:
|
|
|
|
"""'Author' must be list of 'Name <email@example.com>, …'"""
|
|
|
|
|
|
|
|
lines = body.split("\n")
|
|
|
|
for offset, line in enumerate(lines):
|
|
|
|
if offset >= 1 and line[:9].isspace():
|
|
|
|
# Checks for:
|
|
|
|
# Author: Alice
|
|
|
|
# Bob
|
|
|
|
# ^^^^
|
|
|
|
# Note that len("Author: ") == 8
|
|
|
|
yield line_num + offset, "Author line must not be over-indented"
|
|
|
|
if offset < len(lines) - 1:
|
|
|
|
if not line.endswith(","):
|
|
|
|
yield line_num + offset, "Author continuation lines must end with a comma"
|
|
|
|
for part in line.removesuffix(",").split(", "):
|
|
|
|
yield from _email(line_num + offset, part, "Author")
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_sponsor(line_num: int, line: str) -> MessageIterator:
|
|
|
|
"""'Sponsor' must have format 'Name <email@example.com>'"""
|
|
|
|
|
|
|
|
yield from _email(line_num, line, "Sponsor")
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_delegate(line_num: int, line: str) -> MessageIterator:
|
|
|
|
"""'Delegate' must have format 'Name <email@example.com>'"""
|
|
|
|
|
|
|
|
if line == "":
|
|
|
|
return
|
|
|
|
|
|
|
|
# PEP 451
|
|
|
|
if ", " in line:
|
|
|
|
for part in line.removesuffix(",").split(", "):
|
|
|
|
yield from _email(line_num, part, "Delegate")
|
|
|
|
return
|
|
|
|
|
|
|
|
yield from _email(line_num, line, "Delegate")
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_discussions_to(line_num: int, line: str) -> MessageIterator:
|
|
|
|
"""'Discussions-To' must be a thread URL"""
|
|
|
|
|
|
|
|
yield from _thread(line_num, line, "Discussions-To", discussions_to=True)
|
|
|
|
if line.startswith("https://"):
|
|
|
|
return
|
|
|
|
for suffix in "@python.org", "@googlegroups.com":
|
|
|
|
if line.endswith(suffix):
|
|
|
|
remainder = line.removesuffix(suffix)
|
|
|
|
if re.fullmatch(r"[\w\-]+", remainder) is None:
|
|
|
|
yield line_num, "Discussions-To must be a valid mailing list"
|
|
|
|
return
|
|
|
|
yield line_num, "Discussions-To must be a valid thread URL or mailing list"
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_status(line_num: int, line: str) -> MessageIterator:
|
|
|
|
"""'Status' must be a valid PEP status"""
|
|
|
|
|
|
|
|
if line not in ALL_STATUSES:
|
|
|
|
yield line_num, "Status must be a valid PEP status"
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_type(line_num: int, line: str) -> MessageIterator:
|
|
|
|
"""'Type' must be a valid PEP type"""
|
|
|
|
|
|
|
|
if line not in {"Standards Track", "Informational", "Process"}:
|
|
|
|
yield line_num, "Type must be a valid PEP type"
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_topic(line_num: int, line: str) -> MessageIterator:
|
|
|
|
"""'Topic' must be for a valid sub-index"""
|
|
|
|
|
|
|
|
topics = line.split(", ")
|
|
|
|
unique_topics = set(topics)
|
|
|
|
if len(topics) > len(unique_topics):
|
|
|
|
yield line_num, "Topic must not contain duplicates"
|
|
|
|
|
|
|
|
if unique_topics - {"Governance", "Packaging", "Typing", "Release"}:
|
|
|
|
if not all(map(str.istitle, unique_topics)):
|
|
|
|
yield line_num, "Topic must be properly capitalised (Title Case)"
|
|
|
|
if unique_topics - {"governance", "packaging", "typing", "release"}:
|
|
|
|
yield line_num, "Topic must be for a valid sub-index"
|
|
|
|
if sorted(topics) != topics:
|
|
|
|
yield line_num, "Topic must be sorted lexicographically"
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_content_type(line_num: int, line: str) -> MessageIterator:
|
|
|
|
"""'Content-Type' must be 'text/x-rst'"""
|
|
|
|
|
|
|
|
if line != "text/x-rst":
|
|
|
|
yield line_num, "Content-Type must be 'text/x-rst'"
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_pep_references(line_num: int, line: str) -> MessageIterator:
|
|
|
|
"""`Requires`/`Replaces`/`Superseded-By` must be 'NNN' PEP IDs"""
|
|
|
|
|
|
|
|
line = line.removesuffix(",").rstrip()
|
|
|
|
if line.count(", ") != line.count(","):
|
|
|
|
yield line_num, "PEP references must be separated by comma-spaces (', ')"
|
|
|
|
return
|
|
|
|
|
|
|
|
references = line.split(", ")
|
|
|
|
for reference in references:
|
|
|
|
yield from _pep_num(line_num, reference, "PEP reference")
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_created(line_num: int, line: str) -> MessageIterator:
|
|
|
|
"""'Created' must be a 'DD-mmm-YYYY' date"""
|
|
|
|
|
|
|
|
yield from _date(line_num, line, "Created")
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_python_version(line_num: int, line: str) -> MessageIterator:
|
|
|
|
"""'Python-Version' must be an ``X.Y[.Z]`` version"""
|
|
|
|
|
|
|
|
versions = line.split(", ")
|
|
|
|
for version in versions:
|
|
|
|
if version.count(".") not in {1, 2}:
|
|
|
|
yield line_num, f"Python-Version must have two or three segments: {version}"
|
|
|
|
continue
|
|
|
|
|
|
|
|
try:
|
|
|
|
major, minor, micro = version.split(".", 2)
|
|
|
|
except ValueError:
|
|
|
|
major, minor = version.split(".", 1)
|
|
|
|
micro = ""
|
|
|
|
|
|
|
|
if major not in "123":
|
|
|
|
yield line_num, f"Python-Version major part must be 1, 2, or 3: {version}"
|
|
|
|
if not _is_digits(minor) and minor != "x":
|
|
|
|
yield line_num, f"Python-Version minor part must be numeric: {version}"
|
|
|
|
elif minor != "0" and minor[0] == "0":
|
|
|
|
yield line_num, f"Python-Version minor part must not have leading zeros: {version}"
|
|
|
|
|
|
|
|
if micro == "":
|
|
|
|
return
|
|
|
|
if minor == "x":
|
|
|
|
yield line_num, f"Python-Version micro part must be empty if minor part is 'x': {version}"
|
|
|
|
elif micro[0] == "0":
|
|
|
|
yield line_num, f"Python-Version micro part must not have leading zeros: {version}"
|
|
|
|
elif not _is_digits(micro):
|
|
|
|
yield line_num, f"Python-Version micro part must be numeric: {version}"
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_post_history(line_num: int, body: str) -> MessageIterator:
|
|
|
|
"""'Post-History' must be '`DD-mmm-YYYY <Thread URL>`__, …'"""
|
|
|
|
|
|
|
|
if body == "":
|
|
|
|
return
|
|
|
|
|
|
|
|
for offset, line in enumerate(body.removesuffix(",").split("\n"), start=line_num):
|
|
|
|
for post in line.removesuffix(",").strip().split(", "):
|
|
|
|
if not post.startswith("`") and not post.endswith(">`__"):
|
|
|
|
yield from _date(offset, post, "Post-History")
|
|
|
|
else:
|
|
|
|
post_date, post_url = post[1:-4].split(" <")
|
|
|
|
yield from _date(offset, post_date, "Post-History")
|
|
|
|
yield from _thread(offset, post_url, "Post-History")
|
|
|
|
|
|
|
|
|
|
|
|
def _validate_resolution(line_num: int, line: str) -> MessageIterator:
|
|
|
|
"""'Resolution' must be a direct thread/message URL"""
|
|
|
|
|
|
|
|
yield from _thread(line_num, line, "Resolution", allow_message=True)
|
|
|
|
|
|
|
|
|
|
|
|
########################
|
|
|
|
# Validation Helpers #
|
|
|
|
########################
|
|
|
|
|
|
|
|
def _pep_num(line_num: int, pep_number: str, prefix: str) -> MessageIterator:
|
|
|
|
if pep_number == "":
|
|
|
|
yield line_num, f"{prefix} must not be blank: {pep_number!r}"
|
|
|
|
return
|
|
|
|
if pep_number.startswith("0") and pep_number != "0":
|
|
|
|
yield line_num, f"{prefix} must not contain leading zeros: {pep_number!r}"
|
|
|
|
if not _is_digits(pep_number):
|
|
|
|
yield line_num, f"{prefix} must be numeric: {pep_number!r}"
|
|
|
|
elif not 0 <= int(pep_number) <= 9999:
|
|
|
|
yield line_num, f"{prefix} must be between 0 and 9999: {pep_number!r}"
|
|
|
|
|
|
|
|
|
|
|
|
def _is_digits(string: str) -> bool:
|
|
|
|
"""Match a string of ASCII digits ([0-9]+)."""
|
|
|
|
return string.isascii() and string.isdigit()
|
|
|
|
|
|
|
|
|
|
|
|
def _email(line_num: int, author_email: str, prefix: str) -> MessageIterator:
|
|
|
|
author_email = author_email.strip()
|
|
|
|
|
|
|
|
if author_email.count("<") > 1:
|
|
|
|
msg = f"{prefix} entries must not contain multiple '<': {author_email!r}"
|
|
|
|
yield line_num, msg
|
|
|
|
if author_email.count(">") > 1:
|
|
|
|
msg = f"{prefix} entries must not contain multiple '>': {author_email!r}"
|
|
|
|
yield line_num, msg
|
|
|
|
if author_email.count("@") > 1:
|
|
|
|
msg = f"{prefix} entries must not contain multiple '@': {author_email!r}"
|
|
|
|
yield line_num, msg
|
|
|
|
|
|
|
|
author = author_email.split("<", 1)[0].rstrip()
|
|
|
|
if NAME_PATTERN.fullmatch(author) is None:
|
|
|
|
msg = f"{prefix} entries must begin with a valid 'Name': {author_email!r}"
|
|
|
|
yield line_num, msg
|
|
|
|
return
|
|
|
|
|
|
|
|
email_text = author_email.removeprefix(author)
|
|
|
|
if not email_text:
|
|
|
|
# Does not have the optional email part
|
|
|
|
return
|
|
|
|
|
|
|
|
if not email_text.startswith(" <") or not email_text.endswith(">"):
|
|
|
|
msg = f"{prefix} entries must be formatted as 'Name <email@example.com>': {author_email!r}"
|
|
|
|
yield line_num, msg
|
|
|
|
email_text = email_text.removeprefix(" <").removesuffix(">")
|
|
|
|
|
|
|
|
if "@" in email_text:
|
|
|
|
local, domain = email_text.rsplit("@", 1)
|
|
|
|
elif " at " in email_text:
|
|
|
|
local, domain = email_text.rsplit(" at ", 1)
|
|
|
|
else:
|
|
|
|
yield line_num, f"{prefix} entries must contain a valid email address: {author_email!r}"
|
|
|
|
return
|
|
|
|
if EMAIL_LOCAL_PART_PATTERN.fullmatch(local) is None or _invalid_domain(domain):
|
|
|
|
yield line_num, f"{prefix} entries must contain a valid email address: {author_email!r}"
|
|
|
|
|
|
|
|
|
|
|
|
def _invalid_domain(domain_part: str) -> bool:
|
|
|
|
*labels, root = domain_part.split(".")
|
|
|
|
for label in labels:
|
|
|
|
if not label.replace("-", "").isalnum():
|
|
|
|
return True
|
|
|
|
return not root.isalnum() or not root.isascii()
|
|
|
|
|
|
|
|
|
|
|
|
def _thread(line_num: int, url: str, prefix: str, *, allow_message: bool = False, discussions_to: bool = False) -> MessageIterator:
|
|
|
|
if allow_message and discussions_to:
|
|
|
|
msg = "allow_message and discussions_to cannot both be True"
|
|
|
|
raise ValueError(msg)
|
|
|
|
|
|
|
|
msg = f"{prefix} must be a valid thread URL"
|
|
|
|
|
|
|
|
if not url.startswith("https://"):
|
|
|
|
if not discussions_to:
|
|
|
|
yield line_num, msg
|
|
|
|
return
|
|
|
|
|
|
|
|
if url.startswith("https://discuss.python.org/t/"):
|
|
|
|
remainder = url.removeprefix("https://discuss.python.org/t/").removesuffix("/")
|
|
|
|
|
|
|
|
# Discussions-To links must be the thread itself, not a post
|
|
|
|
if discussions_to:
|
|
|
|
# The equivalent pattern is similar to '([\w\-]+/)?\d+',
|
|
|
|
# but the topic name must contain a non-numeric character
|
|
|
|
|
|
|
|
# We use ``str.rpartition`` as the topic name is optional
|
|
|
|
topic_name, _, topic_id = remainder.rpartition("/")
|
|
|
|
if topic_name == '' and _is_digits(topic_id):
|
|
|
|
return
|
|
|
|
topic_name = topic_name.replace("-", "0").replace("_", "0")
|
|
|
|
# the topic name must not be entirely numeric
|
|
|
|
valid_topic_name = not _is_digits(topic_name) and topic_name.isalnum()
|
|
|
|
if valid_topic_name and _is_digits(topic_id):
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
# The equivalent pattern is similar to '([\w\-]+/)?\d+(/\d+)?',
|
|
|
|
# but the topic name must contain a non-numeric character
|
|
|
|
if remainder.count("/") == 2:
|
|
|
|
# When there are three parts, the URL must be "topic-name/topic-id/post-id".
|
|
|
|
topic_name, topic_id, post_id = remainder.rsplit("/", 2)
|
|
|
|
topic_name = topic_name.replace("-", "0").replace("_", "0")
|
|
|
|
valid_topic_name = not _is_digits(topic_name) and topic_name.isalnum()
|
|
|
|
if valid_topic_name and _is_digits(topic_id) and _is_digits(post_id):
|
|
|
|
# the topic name must not be entirely numeric
|
|
|
|
return
|
|
|
|
elif remainder.count("/") == 1:
|
|
|
|
# When there are only two parts, there's an ambiguity between
|
|
|
|
# "topic-name/topic-id" and "topic-id/post-id".
|
|
|
|
# We disambiguate by checking if the LHS is a valid name and
|
|
|
|
# the RHS is a valid topic ID (for the former),
|
|
|
|
# and then if both the LHS and RHS are valid IDs (for the latter).
|
|
|
|
left, right = remainder.rsplit("/")
|
|
|
|
left = left.replace("-", "0").replace("_", "0")
|
|
|
|
# the topic name must not be entirely numeric
|
|
|
|
left_is_name = not _is_digits(left) and left.isalnum()
|
|
|
|
if left_is_name and _is_digits(right):
|
|
|
|
return
|
|
|
|
elif _is_digits(left) and _is_digits(right):
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
# When there's only one part, it must be a valid topic ID.
|
|
|
|
if _is_digits(remainder):
|
|
|
|
return
|
|
|
|
|
|
|
|
if url.startswith("https://mail.python.org/pipermail/"):
|
|
|
|
remainder = url.removeprefix("https://mail.python.org/pipermail/")
|
|
|
|
if MAILMAN_2_PATTERN.fullmatch(remainder) is not None:
|
|
|
|
return
|
|
|
|
|
|
|
|
if url.startswith("https://mail.python.org/archives/list/"):
|
|
|
|
remainder = url.removeprefix("https://mail.python.org/archives/list/")
|
|
|
|
if allow_message and MAILMAN_3_MESSAGE_PATTERN.fullmatch(remainder) is not None:
|
|
|
|
return
|
|
|
|
if MAILMAN_3_THREAD_PATTERN.fullmatch(remainder) is not None:
|
|
|
|
return
|
|
|
|
|
|
|
|
yield line_num, msg
|
|
|
|
|
|
|
|
|
|
|
|
def _date(line_num: int, date_str: str, prefix: str) -> MessageIterator:
|
|
|
|
try:
|
|
|
|
parsed_date = dt.datetime.strptime(date_str, "%d-%b-%Y")
|
|
|
|
except ValueError:
|
|
|
|
yield line_num, f"{prefix} must be a 'DD-mmm-YYYY' date: {date_str!r}"
|
|
|
|
return
|
|
|
|
else:
|
|
|
|
if date_str[1] == "-": # Date must be zero-padded
|
|
|
|
yield line_num, f"{prefix} must be a 'DD-mmm-YYYY' date: {date_str!r}"
|
|
|
|
return
|
|
|
|
|
|
|
|
if parsed_date.year < 1990:
|
|
|
|
yield line_num, f"{prefix} must not be before Python was invented: {date_str!r}"
|
|
|
|
if parsed_date > (dt.datetime.now() + dt.timedelta(days=14)):
|
|
|
|
yield line_num, f"{prefix} must not be in the future: {date_str!r}"
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
if {"-h", "--help", "-?"}.intersection(sys.argv[1:]):
|
|
|
|
print(__doc__, file=sys.stderr)
|
|
|
|
raise SystemExit(0)
|
|
|
|
|
|
|
|
files = {}
|
|
|
|
for arg in sys.argv[1:]:
|
|
|
|
if not arg.startswith("-"):
|
|
|
|
files[arg] = None
|
|
|
|
elif arg in {"-d", "--detailed"}:
|
|
|
|
DETAILED_ERRORS = True
|
|
|
|
else:
|
|
|
|
print(f"Unknown option: {arg!r}", file=sys.stderr)
|
|
|
|
raise SystemExit(1)
|
|
|
|
raise SystemExit(check(files))
|