diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index 6d605c747..9cd7f698f 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -16,7 +16,6 @@ AUTHOR_OVERRIDES.csv @AA-Turner build.py @AA-Turner conf.py @AA-Turner contents.rst @AA-Turner -generate_rss.py @AA-Turner # Linting infrastructure .codespell/ @CAM-Gerlach @hugovk diff --git a/Makefile b/Makefile index cd1c2165f..b5e954bff 100644 --- a/Makefile +++ b/Makefile @@ -27,7 +27,7 @@ htmlview: html ## dirhtml to render PEPs to "index.html" files within "pep-NNNN" directories .PHONY: dirhtml dirhtml: BUILDER = dirhtml -dirhtml: venv rss +dirhtml: venv $(SPHINXBUILD) $(ALLSPHINXOPTS) ## fail-warning to render PEPs to "pep-NNNN.html" files and fail the Sphinx build on any warning @@ -41,11 +41,6 @@ check-links: BUILDER = linkcheck check-links: venv $(SPHINXBUILD) $(ALLSPHINXOPTS) -## rss to generate the peps.rss file -.PHONY: rss -rss: venv - $(VENVDIR)/bin/python3 generate_rss.py -o $(OUTPUT_DIR) - ## clean to remove the venv and build files .PHONY: clean clean: clean-venv diff --git a/generate_rss.py b/generate_rss.py deleted file mode 100755 index 256b519c1..000000000 --- a/generate_rss.py +++ /dev/null @@ -1,210 +0,0 @@ -#!/usr/bin/env python3 -# This file is placed in the public domain or under the -# CC0-1.0-Universal license, whichever is more permissive. - -import argparse -import datetime as dt -import email.utils -from html import escape -from pathlib import Path -import re - -import docutils.frontend -from docutils import nodes -from docutils import utils -from docutils.parsers import rst -from docutils.parsers.rst import roles - -# get the directory with the PEP sources -PEP_ROOT = Path(__file__).parent - - -def _format_rfc_2822(datetime: dt.datetime) -> str: - datetime = datetime.replace(tzinfo=dt.timezone.utc) - return email.utils.format_datetime(datetime, usegmt=True) - - -line_cache: dict[Path, dict[str, str]] = {} - -# Monkeypatch PEP and RFC reference roles to match Sphinx behaviour -EXPLICIT_TITLE_RE = re.compile(r'^(.+?)\s*(?$', re.DOTALL) - - -def _pep_reference_role(role, rawtext, text, lineno, inliner, - options={}, content=[]): - matched = EXPLICIT_TITLE_RE.match(text) - if matched: - title = utils.unescape(matched.group(1)) - target = utils.unescape(matched.group(2)) - else: - target = utils.unescape(text) - title = "PEP " + utils.unescape(text) - pep_str, _, fragment = target.partition("#") - try: - pepnum = int(pep_str) - if pepnum < 0 or pepnum > 9999: - raise ValueError - except ValueError: - msg = inliner.reporter.error( - f'PEP number must be a number from 0 to 9999; "{pep_str}" is invalid.', - line=lineno) - prb = inliner.problematic(rawtext, rawtext, msg) - return [prb], [msg] - # Base URL mainly used by inliner.pep_reference; so this is correct: - ref = (inliner.document.settings.pep_base_url - + inliner.document.settings.pep_file_url_template % pepnum) - if fragment: - ref += "#" + fragment - roles.set_classes(options) - return [nodes.reference(rawtext, title, refuri=ref, **options)], [] - - -def _rfc_reference_role(role, rawtext, text, lineno, inliner, - options={}, content=[]): - matched = EXPLICIT_TITLE_RE.match(text) - if matched: - title = utils.unescape(matched.group(1)) - target = utils.unescape(matched.group(2)) - else: - target = utils.unescape(text) - title = "RFC " + utils.unescape(text) - pep_str, _, fragment = target.partition("#") - try: - rfcnum = int(pep_str) - if rfcnum < 0 or rfcnum > 9999: - raise ValueError - except ValueError: - msg = inliner.reporter.error( - f'RFC number must be a number from 0 to 9999; "{pep_str}" is invalid.', - line=lineno) - prb = inliner.problematic(rawtext, rawtext, msg) - return [prb], [msg] - ref = (inliner.document.settings.rfc_base_url + inliner.rfc_url % rfcnum) - if fragment: - ref += "#" + fragment - roles.set_classes(options) - return [nodes.reference(rawtext, title, refuri=ref, **options)], [] - - -roles.register_canonical_role("pep-reference", _pep_reference_role) -roles.register_canonical_role("rfc-reference", _rfc_reference_role) - - -def first_line_starting_with(full_path: Path, text: str) -> str: - # Try and retrieve from cache - if full_path in line_cache: - return line_cache[full_path].get(text, "") - - # Else read source - line_cache[full_path] = path_cache = {} - for line in full_path.open(encoding="utf-8"): - if line.startswith("Created:"): - path_cache["Created:"] = line.removeprefix("Created:").strip() - elif line.startswith("Title:"): - path_cache["Title:"] = line.removeprefix("Title:").strip() - elif line.startswith("Author:"): - path_cache["Author:"] = line.removeprefix("Author:").strip() - - # Once all have been found, exit loop - if path_cache.keys == {"Created:", "Title:", "Author:"}: - break - return path_cache.get(text, "") - - -def pep_creation(full_path: Path) -> dt.datetime: - created_str = first_line_starting_with(full_path, "Created:") - if full_path.stem == "pep-0102": - # remove additional content on the Created line - created_str = created_str.split(" ", 1)[0] - return dt.datetime.strptime(created_str, "%d-%b-%Y") - - -def parse_rst(full_path: Path) -> nodes.document: - text = full_path.read_text(encoding="utf-8") - settings = docutils.frontend.get_default_settings(rst.Parser) - document = utils.new_document(f'<{full_path}>', settings=settings) - rst.Parser(rfc2822=True).parse(text, document) - return document - - -def pep_abstract(full_path: Path) -> str: - """Return the first paragraph of the PEP abstract""" - for node in parse_rst(full_path).findall(nodes.section): - if node.next_node(nodes.title).astext() == "Abstract": - return node.next_node(nodes.paragraph).astext().strip().replace("\n", " ") - return "" - - -def main(): - parser = argparse.ArgumentParser(description="Generate RSS feed") - parser.add_argument( - "-o", - "--output-dir", - default="build", # synchronise with render.yaml -> deploy step - help="Output directory, relative to root. Default 'build'.", - ) - args = parser.parse_args() - - # get list of peps with creation time (from "Created:" string in pep source) - peps_with_dt = sorted((pep_creation(path), path) for path in PEP_ROOT.glob("pep-????.???")) - - # generate rss items for 10 most recent peps - items = [] - for datetime, full_path in peps_with_dt[-10:]: - try: - pep_num = int(full_path.stem.split("-")[-1]) - except ValueError: - continue - - title = first_line_starting_with(full_path, "Title:") - author = first_line_starting_with(full_path, "Author:") - if "@" in author or " at " in author: - parsed_authors = email.utils.getaddresses([author]) - joined_authors = ", ".join(f"{name} ({email_address})" for name, email_address in parsed_authors) - else: - joined_authors = author - url = f"https://peps.python.org/pep-{pep_num:0>4}/" - - item = f"""\ - - PEP {pep_num}: {escape(title, quote=False)} - {escape(url, quote=False)} - {escape(pep_abstract(full_path), quote=False)} - {escape(joined_authors, quote=False)} - {url} - {_format_rfc_2822(datetime)} - """ - items.append(item) - - # The rss envelope - desc = """ - Newest Python Enhancement Proposals (PEPs) - Information on new - language features, and some meta-information like release - procedure and schedules. - """ - last_build_date = _format_rfc_2822(dt.datetime.now(dt.timezone.utc)) - items = "\n".join(reversed(items)) - output = f"""\ - - - - Newest Python PEPs - https://peps.python.org/peps.rss - {" ".join(desc.split())} - - https://cyber.harvard.edu/rss/rss.html - en - {last_build_date} -{items} - - -""" - - # output directory for target HTML files - out_dir = PEP_ROOT / args.output_dir - out_dir.mkdir(exist_ok=True, parents=True) - out_dir.joinpath("peps.rss").write_text(output) - - -if __name__ == "__main__": - main() diff --git a/pep_sphinx_extensions/__init__.py b/pep_sphinx_extensions/__init__.py index af53f240c..672a6a452 100644 --- a/pep_sphinx_extensions/__init__.py +++ b/pep_sphinx_extensions/__init__.py @@ -7,6 +7,7 @@ from typing import TYPE_CHECKING from docutils.writers.html5_polyglot import HTMLTranslator from sphinx import environment +from pep_sphinx_extensions.generate_rss import create_rss_feed from pep_sphinx_extensions.pep_processor.html import pep_html_builder from pep_sphinx_extensions.pep_processor.html import pep_html_translator from pep_sphinx_extensions.pep_processor.parsing import pep_banner_directive @@ -29,9 +30,7 @@ def _update_config_for_builder(app: Sphinx) -> None: if app.builder.name == "dirhtml": app.env.settings["pep_url"] = "pep-{:0>4}" - # internal_builder exists if Sphinx is run by build.py - if "internal_builder" not in app.tags: - app.connect("build-finished", _post_build) # Post-build tasks + app.connect("build-finished", _post_build) # Post-build tasks def _post_build(app: Sphinx, exception: Exception | None) -> None: @@ -41,7 +40,11 @@ def _post_build(app: Sphinx, exception: Exception | None) -> None: if exception is not None: return - create_index_file(Path(app.outdir), app.builder.name) + + # internal_builder exists if Sphinx is run by build.py + if "internal_builder" not in app.tags: + create_index_file(Path(app.outdir), app.builder.name) + create_rss_feed(app.doctreedir, app.outdir) def setup(app: Sphinx) -> dict[str, bool]: diff --git a/pep_sphinx_extensions/generate_rss.py b/pep_sphinx_extensions/generate_rss.py new file mode 100644 index 000000000..a7120c9d6 --- /dev/null +++ b/pep_sphinx_extensions/generate_rss.py @@ -0,0 +1,120 @@ +# This file is placed in the public domain or under the +# CC0-1.0-Universal license, whichever is more permissive. + +from __future__ import annotations + +import datetime as dt +import pickle +from email.utils import format_datetime, getaddresses +from html import escape +from pathlib import Path + +from docutils import nodes + +RSS_DESCRIPTION = ( + "Newest Python Enhancement Proposals (PEPs): " + "Information on new language features " + "and some meta-information like release procedure and schedules." +) + +# get the directory with the PEP sources +PEP_ROOT = Path(__file__).parent + + +def _format_rfc_2822(datetime: dt.datetime) -> str: + datetime = datetime.replace(tzinfo=dt.timezone.utc) + return format_datetime(datetime, usegmt=True) + + +document_cache: dict[Path, dict[str, str]] = {} + + +def get_from_doctree(full_path: Path, text: str) -> str: + # Try and retrieve from cache + if full_path in document_cache: + return document_cache[full_path].get(text, "") + + # Else load doctree + document = pickle.loads(full_path.read_bytes()) + # Store the headers (populated in the PEPHeaders transform) + document_cache[full_path] = path_cache = document.get("headers", {}) + # Store the Abstract + path_cache["Abstract"] = pep_abstract(document) + # Return the requested key + return path_cache.get(text, "") + + +def pep_creation(full_path: Path) -> dt.datetime: + created_str = get_from_doctree(full_path, "Created") + try: + return dt.datetime.strptime(created_str, "%d-%b-%Y") + except ValueError: + return dt.datetime.min + + +def pep_abstract(document: nodes.document) -> str: + """Return the first paragraph of the PEP abstract""" + for node in document.findall(nodes.section): + title_node = node.next_node(nodes.title) + if title_node is None: + continue + if title_node.astext() == "Abstract": + return node.next_node(nodes.paragraph).astext().strip().replace("\n", " ") + return "" + + +def _generate_items(doctree_dir: Path): + # get list of peps with creation time (from "Created:" string in pep source) + peps_with_dt = sorted((pep_creation(path), path) for path in doctree_dir.glob("pep-????.doctree")) + + # generate rss items for 10 most recent peps (in reverse order) + for datetime, full_path in reversed(peps_with_dt[-10:]): + try: + pep_num = int(get_from_doctree(full_path, "PEP")) + except ValueError: + continue + + title = get_from_doctree(full_path, "Title") + url = f"https://peps.python.org/pep-{pep_num:0>4}/" + abstract = get_from_doctree(full_path, "Abstract") + author = get_from_doctree(full_path, "Author") + if "@" in author or " at " in author: + parsed_authors = getaddresses([author]) + joined_authors = ", ".join(f"{name} ({email_address})" for name, email_address in parsed_authors) + else: + joined_authors = author + + item = f"""\ + + PEP {pep_num}: {escape(title, quote=False)} + {escape(url, quote=False)} + {escape(abstract, quote=False)} + {escape(joined_authors, quote=False)} + {url} + {_format_rfc_2822(datetime)} + """ + yield item + + +def create_rss_feed(doctree_dir: Path, output_dir: Path): + # The rss envelope + last_build_date = _format_rfc_2822(dt.datetime.now(dt.timezone.utc)) + items = "\n".join(_generate_items(Path(doctree_dir))) + output = f"""\ + + + + Newest Python PEPs + https://peps.python.org/peps.rss + {RSS_DESCRIPTION} + + https://cyber.harvard.edu/rss/rss.html + en + {last_build_date} +{items} + + +""" + + # output directory for target HTML files + Path(output_dir, "peps.rss").write_text(output, encoding="utf-8") diff --git a/pep_sphinx_extensions/pep_processor/transforms/pep_headers.py b/pep_sphinx_extensions/pep_processor/transforms/pep_headers.py index 4d0065588..a7cd0c303 100644 --- a/pep_sphinx_extensions/pep_processor/transforms/pep_headers.py +++ b/pep_sphinx_extensions/pep_processor/transforms/pep_headers.py @@ -72,11 +72,11 @@ class PEPHeaders(transforms.Transform): raise PEPParsingError("Document does not contain an RFC-2822 'PEP' header!") # Extract PEP number - value = pep_field[1].astext() + pep_num_str = pep_field[1].astext() try: - pep_num = int(value) + pep_num = int(pep_num_str) except ValueError: - raise PEPParsingError(f"'PEP' header must contain an integer. '{value}' is invalid!") + raise PEPParsingError(f"PEP header must contain an integer. '{pep_num_str}' is invalid!") # Special processing for PEP 0. if pep_num == 0: @@ -89,7 +89,11 @@ class PEPHeaders(transforms.Transform): raise PEPParsingError("No title!") fields_to_remove = [] + self.document["headers"] = headers = {} for field in header: + row_attributes = {sub.tagname: sub.rawsource for sub in field} + headers[row_attributes["field_name"]] = row_attributes["field_body"] + name = field[0].astext().lower() body = field[1] if len(body) == 0: