diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index 6d605c747..9cd7f698f 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -16,7 +16,6 @@ AUTHOR_OVERRIDES.csv @AA-Turner
build.py @AA-Turner
conf.py @AA-Turner
contents.rst @AA-Turner
-generate_rss.py @AA-Turner
# Linting infrastructure
.codespell/ @CAM-Gerlach @hugovk
diff --git a/Makefile b/Makefile
index cd1c2165f..b5e954bff 100644
--- a/Makefile
+++ b/Makefile
@@ -27,7 +27,7 @@ htmlview: html
## dirhtml to render PEPs to "index.html" files within "pep-NNNN" directories
.PHONY: dirhtml
dirhtml: BUILDER = dirhtml
-dirhtml: venv rss
+dirhtml: venv
$(SPHINXBUILD) $(ALLSPHINXOPTS)
## fail-warning to render PEPs to "pep-NNNN.html" files and fail the Sphinx build on any warning
@@ -41,11 +41,6 @@ check-links: BUILDER = linkcheck
check-links: venv
$(SPHINXBUILD) $(ALLSPHINXOPTS)
-## rss to generate the peps.rss file
-.PHONY: rss
-rss: venv
- $(VENVDIR)/bin/python3 generate_rss.py -o $(OUTPUT_DIR)
-
## clean to remove the venv and build files
.PHONY: clean
clean: clean-venv
diff --git a/generate_rss.py b/generate_rss.py
deleted file mode 100755
index 256b519c1..000000000
--- a/generate_rss.py
+++ /dev/null
@@ -1,210 +0,0 @@
-#!/usr/bin/env python3
-# This file is placed in the public domain or under the
-# CC0-1.0-Universal license, whichever is more permissive.
-
-import argparse
-import datetime as dt
-import email.utils
-from html import escape
-from pathlib import Path
-import re
-
-import docutils.frontend
-from docutils import nodes
-from docutils import utils
-from docutils.parsers import rst
-from docutils.parsers.rst import roles
-
-# get the directory with the PEP sources
-PEP_ROOT = Path(__file__).parent
-
-
-def _format_rfc_2822(datetime: dt.datetime) -> str:
- datetime = datetime.replace(tzinfo=dt.timezone.utc)
- return email.utils.format_datetime(datetime, usegmt=True)
-
-
-line_cache: dict[Path, dict[str, str]] = {}
-
-# Monkeypatch PEP and RFC reference roles to match Sphinx behaviour
-EXPLICIT_TITLE_RE = re.compile(r'^(.+?)\s*(?$', re.DOTALL)
-
-
-def _pep_reference_role(role, rawtext, text, lineno, inliner,
- options={}, content=[]):
- matched = EXPLICIT_TITLE_RE.match(text)
- if matched:
- title = utils.unescape(matched.group(1))
- target = utils.unescape(matched.group(2))
- else:
- target = utils.unescape(text)
- title = "PEP " + utils.unescape(text)
- pep_str, _, fragment = target.partition("#")
- try:
- pepnum = int(pep_str)
- if pepnum < 0 or pepnum > 9999:
- raise ValueError
- except ValueError:
- msg = inliner.reporter.error(
- f'PEP number must be a number from 0 to 9999; "{pep_str}" is invalid.',
- line=lineno)
- prb = inliner.problematic(rawtext, rawtext, msg)
- return [prb], [msg]
- # Base URL mainly used by inliner.pep_reference; so this is correct:
- ref = (inliner.document.settings.pep_base_url
- + inliner.document.settings.pep_file_url_template % pepnum)
- if fragment:
- ref += "#" + fragment
- roles.set_classes(options)
- return [nodes.reference(rawtext, title, refuri=ref, **options)], []
-
-
-def _rfc_reference_role(role, rawtext, text, lineno, inliner,
- options={}, content=[]):
- matched = EXPLICIT_TITLE_RE.match(text)
- if matched:
- title = utils.unescape(matched.group(1))
- target = utils.unescape(matched.group(2))
- else:
- target = utils.unescape(text)
- title = "RFC " + utils.unescape(text)
- pep_str, _, fragment = target.partition("#")
- try:
- rfcnum = int(pep_str)
- if rfcnum < 0 or rfcnum > 9999:
- raise ValueError
- except ValueError:
- msg = inliner.reporter.error(
- f'RFC number must be a number from 0 to 9999; "{pep_str}" is invalid.',
- line=lineno)
- prb = inliner.problematic(rawtext, rawtext, msg)
- return [prb], [msg]
- ref = (inliner.document.settings.rfc_base_url + inliner.rfc_url % rfcnum)
- if fragment:
- ref += "#" + fragment
- roles.set_classes(options)
- return [nodes.reference(rawtext, title, refuri=ref, **options)], []
-
-
-roles.register_canonical_role("pep-reference", _pep_reference_role)
-roles.register_canonical_role("rfc-reference", _rfc_reference_role)
-
-
-def first_line_starting_with(full_path: Path, text: str) -> str:
- # Try and retrieve from cache
- if full_path in line_cache:
- return line_cache[full_path].get(text, "")
-
- # Else read source
- line_cache[full_path] = path_cache = {}
- for line in full_path.open(encoding="utf-8"):
- if line.startswith("Created:"):
- path_cache["Created:"] = line.removeprefix("Created:").strip()
- elif line.startswith("Title:"):
- path_cache["Title:"] = line.removeprefix("Title:").strip()
- elif line.startswith("Author:"):
- path_cache["Author:"] = line.removeprefix("Author:").strip()
-
- # Once all have been found, exit loop
- if path_cache.keys == {"Created:", "Title:", "Author:"}:
- break
- return path_cache.get(text, "")
-
-
-def pep_creation(full_path: Path) -> dt.datetime:
- created_str = first_line_starting_with(full_path, "Created:")
- if full_path.stem == "pep-0102":
- # remove additional content on the Created line
- created_str = created_str.split(" ", 1)[0]
- return dt.datetime.strptime(created_str, "%d-%b-%Y")
-
-
-def parse_rst(full_path: Path) -> nodes.document:
- text = full_path.read_text(encoding="utf-8")
- settings = docutils.frontend.get_default_settings(rst.Parser)
- document = utils.new_document(f'<{full_path}>', settings=settings)
- rst.Parser(rfc2822=True).parse(text, document)
- return document
-
-
-def pep_abstract(full_path: Path) -> str:
- """Return the first paragraph of the PEP abstract"""
- for node in parse_rst(full_path).findall(nodes.section):
- if node.next_node(nodes.title).astext() == "Abstract":
- return node.next_node(nodes.paragraph).astext().strip().replace("\n", " ")
- return ""
-
-
-def main():
- parser = argparse.ArgumentParser(description="Generate RSS feed")
- parser.add_argument(
- "-o",
- "--output-dir",
- default="build", # synchronise with render.yaml -> deploy step
- help="Output directory, relative to root. Default 'build'.",
- )
- args = parser.parse_args()
-
- # get list of peps with creation time (from "Created:" string in pep source)
- peps_with_dt = sorted((pep_creation(path), path) for path in PEP_ROOT.glob("pep-????.???"))
-
- # generate rss items for 10 most recent peps
- items = []
- for datetime, full_path in peps_with_dt[-10:]:
- try:
- pep_num = int(full_path.stem.split("-")[-1])
- except ValueError:
- continue
-
- title = first_line_starting_with(full_path, "Title:")
- author = first_line_starting_with(full_path, "Author:")
- if "@" in author or " at " in author:
- parsed_authors = email.utils.getaddresses([author])
- joined_authors = ", ".join(f"{name} ({email_address})" for name, email_address in parsed_authors)
- else:
- joined_authors = author
- url = f"https://peps.python.org/pep-{pep_num:0>4}/"
-
- item = f"""\
- -
- PEP {pep_num}: {escape(title, quote=False)}
- {escape(url, quote=False)}
- {escape(pep_abstract(full_path), quote=False)}
- {escape(joined_authors, quote=False)}
- {url}
- {_format_rfc_2822(datetime)}
-
"""
- items.append(item)
-
- # The rss envelope
- desc = """
- Newest Python Enhancement Proposals (PEPs) - Information on new
- language features, and some meta-information like release
- procedure and schedules.
- """
- last_build_date = _format_rfc_2822(dt.datetime.now(dt.timezone.utc))
- items = "\n".join(reversed(items))
- output = f"""\
-
-
-
- Newest Python PEPs
- https://peps.python.org/peps.rss
- {" ".join(desc.split())}
-
- https://cyber.harvard.edu/rss/rss.html
- en
- {last_build_date}
-{items}
-
-
-"""
-
- # output directory for target HTML files
- out_dir = PEP_ROOT / args.output_dir
- out_dir.mkdir(exist_ok=True, parents=True)
- out_dir.joinpath("peps.rss").write_text(output)
-
-
-if __name__ == "__main__":
- main()
diff --git a/pep_sphinx_extensions/__init__.py b/pep_sphinx_extensions/__init__.py
index af53f240c..672a6a452 100644
--- a/pep_sphinx_extensions/__init__.py
+++ b/pep_sphinx_extensions/__init__.py
@@ -7,6 +7,7 @@ from typing import TYPE_CHECKING
from docutils.writers.html5_polyglot import HTMLTranslator
from sphinx import environment
+from pep_sphinx_extensions.generate_rss import create_rss_feed
from pep_sphinx_extensions.pep_processor.html import pep_html_builder
from pep_sphinx_extensions.pep_processor.html import pep_html_translator
from pep_sphinx_extensions.pep_processor.parsing import pep_banner_directive
@@ -29,9 +30,7 @@ def _update_config_for_builder(app: Sphinx) -> None:
if app.builder.name == "dirhtml":
app.env.settings["pep_url"] = "pep-{:0>4}"
- # internal_builder exists if Sphinx is run by build.py
- if "internal_builder" not in app.tags:
- app.connect("build-finished", _post_build) # Post-build tasks
+ app.connect("build-finished", _post_build) # Post-build tasks
def _post_build(app: Sphinx, exception: Exception | None) -> None:
@@ -41,7 +40,11 @@ def _post_build(app: Sphinx, exception: Exception | None) -> None:
if exception is not None:
return
- create_index_file(Path(app.outdir), app.builder.name)
+
+ # internal_builder exists if Sphinx is run by build.py
+ if "internal_builder" not in app.tags:
+ create_index_file(Path(app.outdir), app.builder.name)
+ create_rss_feed(app.doctreedir, app.outdir)
def setup(app: Sphinx) -> dict[str, bool]:
diff --git a/pep_sphinx_extensions/generate_rss.py b/pep_sphinx_extensions/generate_rss.py
new file mode 100644
index 000000000..a7120c9d6
--- /dev/null
+++ b/pep_sphinx_extensions/generate_rss.py
@@ -0,0 +1,120 @@
+# This file is placed in the public domain or under the
+# CC0-1.0-Universal license, whichever is more permissive.
+
+from __future__ import annotations
+
+import datetime as dt
+import pickle
+from email.utils import format_datetime, getaddresses
+from html import escape
+from pathlib import Path
+
+from docutils import nodes
+
+RSS_DESCRIPTION = (
+ "Newest Python Enhancement Proposals (PEPs): "
+ "Information on new language features "
+ "and some meta-information like release procedure and schedules."
+)
+
+# get the directory with the PEP sources
+PEP_ROOT = Path(__file__).parent
+
+
+def _format_rfc_2822(datetime: dt.datetime) -> str:
+ datetime = datetime.replace(tzinfo=dt.timezone.utc)
+ return format_datetime(datetime, usegmt=True)
+
+
+document_cache: dict[Path, dict[str, str]] = {}
+
+
+def get_from_doctree(full_path: Path, text: str) -> str:
+ # Try and retrieve from cache
+ if full_path in document_cache:
+ return document_cache[full_path].get(text, "")
+
+ # Else load doctree
+ document = pickle.loads(full_path.read_bytes())
+ # Store the headers (populated in the PEPHeaders transform)
+ document_cache[full_path] = path_cache = document.get("headers", {})
+ # Store the Abstract
+ path_cache["Abstract"] = pep_abstract(document)
+ # Return the requested key
+ return path_cache.get(text, "")
+
+
+def pep_creation(full_path: Path) -> dt.datetime:
+ created_str = get_from_doctree(full_path, "Created")
+ try:
+ return dt.datetime.strptime(created_str, "%d-%b-%Y")
+ except ValueError:
+ return dt.datetime.min
+
+
+def pep_abstract(document: nodes.document) -> str:
+ """Return the first paragraph of the PEP abstract"""
+ for node in document.findall(nodes.section):
+ title_node = node.next_node(nodes.title)
+ if title_node is None:
+ continue
+ if title_node.astext() == "Abstract":
+ return node.next_node(nodes.paragraph).astext().strip().replace("\n", " ")
+ return ""
+
+
+def _generate_items(doctree_dir: Path):
+ # get list of peps with creation time (from "Created:" string in pep source)
+ peps_with_dt = sorted((pep_creation(path), path) for path in doctree_dir.glob("pep-????.doctree"))
+
+ # generate rss items for 10 most recent peps (in reverse order)
+ for datetime, full_path in reversed(peps_with_dt[-10:]):
+ try:
+ pep_num = int(get_from_doctree(full_path, "PEP"))
+ except ValueError:
+ continue
+
+ title = get_from_doctree(full_path, "Title")
+ url = f"https://peps.python.org/pep-{pep_num:0>4}/"
+ abstract = get_from_doctree(full_path, "Abstract")
+ author = get_from_doctree(full_path, "Author")
+ if "@" in author or " at " in author:
+ parsed_authors = getaddresses([author])
+ joined_authors = ", ".join(f"{name} ({email_address})" for name, email_address in parsed_authors)
+ else:
+ joined_authors = author
+
+ item = f"""\
+ -
+ PEP {pep_num}: {escape(title, quote=False)}
+ {escape(url, quote=False)}
+ {escape(abstract, quote=False)}
+ {escape(joined_authors, quote=False)}
+ {url}
+ {_format_rfc_2822(datetime)}
+
"""
+ yield item
+
+
+def create_rss_feed(doctree_dir: Path, output_dir: Path):
+ # The rss envelope
+ last_build_date = _format_rfc_2822(dt.datetime.now(dt.timezone.utc))
+ items = "\n".join(_generate_items(Path(doctree_dir)))
+ output = f"""\
+
+
+
+ Newest Python PEPs
+ https://peps.python.org/peps.rss
+ {RSS_DESCRIPTION}
+
+ https://cyber.harvard.edu/rss/rss.html
+ en
+ {last_build_date}
+{items}
+
+
+"""
+
+ # output directory for target HTML files
+ Path(output_dir, "peps.rss").write_text(output, encoding="utf-8")
diff --git a/pep_sphinx_extensions/pep_processor/transforms/pep_headers.py b/pep_sphinx_extensions/pep_processor/transforms/pep_headers.py
index 4d0065588..a7cd0c303 100644
--- a/pep_sphinx_extensions/pep_processor/transforms/pep_headers.py
+++ b/pep_sphinx_extensions/pep_processor/transforms/pep_headers.py
@@ -72,11 +72,11 @@ class PEPHeaders(transforms.Transform):
raise PEPParsingError("Document does not contain an RFC-2822 'PEP' header!")
# Extract PEP number
- value = pep_field[1].astext()
+ pep_num_str = pep_field[1].astext()
try:
- pep_num = int(value)
+ pep_num = int(pep_num_str)
except ValueError:
- raise PEPParsingError(f"'PEP' header must contain an integer. '{value}' is invalid!")
+ raise PEPParsingError(f"PEP header must contain an integer. '{pep_num_str}' is invalid!")
# Special processing for PEP 0.
if pep_num == 0:
@@ -89,7 +89,11 @@ class PEPHeaders(transforms.Transform):
raise PEPParsingError("No title!")
fields_to_remove = []
+ self.document["headers"] = headers = {}
for field in header:
+ row_attributes = {sub.tagname: sub.rawsource for sub in field}
+ headers[row_attributes["field_name"]] = row_attributes["field_body"]
+
name = field[0].astext().lower()
body = field[1]
if len(body) == 0: