Generate ``peps.rss`` via Sphinx (#3274)
This commit is contained in:
parent
7b86f6deb0
commit
2b53c224d1
|
@ -16,7 +16,6 @@ AUTHOR_OVERRIDES.csv @AA-Turner
|
||||||
build.py @AA-Turner
|
build.py @AA-Turner
|
||||||
conf.py @AA-Turner
|
conf.py @AA-Turner
|
||||||
contents.rst @AA-Turner
|
contents.rst @AA-Turner
|
||||||
generate_rss.py @AA-Turner
|
|
||||||
|
|
||||||
# Linting infrastructure
|
# Linting infrastructure
|
||||||
.codespell/ @CAM-Gerlach @hugovk
|
.codespell/ @CAM-Gerlach @hugovk
|
||||||
|
|
7
Makefile
7
Makefile
|
@ -27,7 +27,7 @@ htmlview: html
|
||||||
## dirhtml to render PEPs to "index.html" files within "pep-NNNN" directories
|
## dirhtml to render PEPs to "index.html" files within "pep-NNNN" directories
|
||||||
.PHONY: dirhtml
|
.PHONY: dirhtml
|
||||||
dirhtml: BUILDER = dirhtml
|
dirhtml: BUILDER = dirhtml
|
||||||
dirhtml: venv rss
|
dirhtml: venv
|
||||||
$(SPHINXBUILD) $(ALLSPHINXOPTS)
|
$(SPHINXBUILD) $(ALLSPHINXOPTS)
|
||||||
|
|
||||||
## fail-warning to render PEPs to "pep-NNNN.html" files and fail the Sphinx build on any warning
|
## fail-warning to render PEPs to "pep-NNNN.html" files and fail the Sphinx build on any warning
|
||||||
|
@ -41,11 +41,6 @@ check-links: BUILDER = linkcheck
|
||||||
check-links: venv
|
check-links: venv
|
||||||
$(SPHINXBUILD) $(ALLSPHINXOPTS)
|
$(SPHINXBUILD) $(ALLSPHINXOPTS)
|
||||||
|
|
||||||
## rss to generate the peps.rss file
|
|
||||||
.PHONY: rss
|
|
||||||
rss: venv
|
|
||||||
$(VENVDIR)/bin/python3 generate_rss.py -o $(OUTPUT_DIR)
|
|
||||||
|
|
||||||
## clean to remove the venv and build files
|
## clean to remove the venv and build files
|
||||||
.PHONY: clean
|
.PHONY: clean
|
||||||
clean: clean-venv
|
clean: clean-venv
|
||||||
|
|
210
generate_rss.py
210
generate_rss.py
|
@ -1,210 +0,0 @@
|
||||||
#!/usr/bin/env python3
|
|
||||||
# This file is placed in the public domain or under the
|
|
||||||
# CC0-1.0-Universal license, whichever is more permissive.
|
|
||||||
|
|
||||||
import argparse
|
|
||||||
import datetime as dt
|
|
||||||
import email.utils
|
|
||||||
from html import escape
|
|
||||||
from pathlib import Path
|
|
||||||
import re
|
|
||||||
|
|
||||||
import docutils.frontend
|
|
||||||
from docutils import nodes
|
|
||||||
from docutils import utils
|
|
||||||
from docutils.parsers import rst
|
|
||||||
from docutils.parsers.rst import roles
|
|
||||||
|
|
||||||
# get the directory with the PEP sources
|
|
||||||
PEP_ROOT = Path(__file__).parent
|
|
||||||
|
|
||||||
|
|
||||||
def _format_rfc_2822(datetime: dt.datetime) -> str:
|
|
||||||
datetime = datetime.replace(tzinfo=dt.timezone.utc)
|
|
||||||
return email.utils.format_datetime(datetime, usegmt=True)
|
|
||||||
|
|
||||||
|
|
||||||
line_cache: dict[Path, dict[str, str]] = {}
|
|
||||||
|
|
||||||
# Monkeypatch PEP and RFC reference roles to match Sphinx behaviour
|
|
||||||
EXPLICIT_TITLE_RE = re.compile(r'^(.+?)\s*(?<!\x00)<(.*?)>$', re.DOTALL)
|
|
||||||
|
|
||||||
|
|
||||||
def _pep_reference_role(role, rawtext, text, lineno, inliner,
|
|
||||||
options={}, content=[]):
|
|
||||||
matched = EXPLICIT_TITLE_RE.match(text)
|
|
||||||
if matched:
|
|
||||||
title = utils.unescape(matched.group(1))
|
|
||||||
target = utils.unescape(matched.group(2))
|
|
||||||
else:
|
|
||||||
target = utils.unescape(text)
|
|
||||||
title = "PEP " + utils.unescape(text)
|
|
||||||
pep_str, _, fragment = target.partition("#")
|
|
||||||
try:
|
|
||||||
pepnum = int(pep_str)
|
|
||||||
if pepnum < 0 or pepnum > 9999:
|
|
||||||
raise ValueError
|
|
||||||
except ValueError:
|
|
||||||
msg = inliner.reporter.error(
|
|
||||||
f'PEP number must be a number from 0 to 9999; "{pep_str}" is invalid.',
|
|
||||||
line=lineno)
|
|
||||||
prb = inliner.problematic(rawtext, rawtext, msg)
|
|
||||||
return [prb], [msg]
|
|
||||||
# Base URL mainly used by inliner.pep_reference; so this is correct:
|
|
||||||
ref = (inliner.document.settings.pep_base_url
|
|
||||||
+ inliner.document.settings.pep_file_url_template % pepnum)
|
|
||||||
if fragment:
|
|
||||||
ref += "#" + fragment
|
|
||||||
roles.set_classes(options)
|
|
||||||
return [nodes.reference(rawtext, title, refuri=ref, **options)], []
|
|
||||||
|
|
||||||
|
|
||||||
def _rfc_reference_role(role, rawtext, text, lineno, inliner,
|
|
||||||
options={}, content=[]):
|
|
||||||
matched = EXPLICIT_TITLE_RE.match(text)
|
|
||||||
if matched:
|
|
||||||
title = utils.unescape(matched.group(1))
|
|
||||||
target = utils.unescape(matched.group(2))
|
|
||||||
else:
|
|
||||||
target = utils.unescape(text)
|
|
||||||
title = "RFC " + utils.unescape(text)
|
|
||||||
pep_str, _, fragment = target.partition("#")
|
|
||||||
try:
|
|
||||||
rfcnum = int(pep_str)
|
|
||||||
if rfcnum < 0 or rfcnum > 9999:
|
|
||||||
raise ValueError
|
|
||||||
except ValueError:
|
|
||||||
msg = inliner.reporter.error(
|
|
||||||
f'RFC number must be a number from 0 to 9999; "{pep_str}" is invalid.',
|
|
||||||
line=lineno)
|
|
||||||
prb = inliner.problematic(rawtext, rawtext, msg)
|
|
||||||
return [prb], [msg]
|
|
||||||
ref = (inliner.document.settings.rfc_base_url + inliner.rfc_url % rfcnum)
|
|
||||||
if fragment:
|
|
||||||
ref += "#" + fragment
|
|
||||||
roles.set_classes(options)
|
|
||||||
return [nodes.reference(rawtext, title, refuri=ref, **options)], []
|
|
||||||
|
|
||||||
|
|
||||||
roles.register_canonical_role("pep-reference", _pep_reference_role)
|
|
||||||
roles.register_canonical_role("rfc-reference", _rfc_reference_role)
|
|
||||||
|
|
||||||
|
|
||||||
def first_line_starting_with(full_path: Path, text: str) -> str:
|
|
||||||
# Try and retrieve from cache
|
|
||||||
if full_path in line_cache:
|
|
||||||
return line_cache[full_path].get(text, "")
|
|
||||||
|
|
||||||
# Else read source
|
|
||||||
line_cache[full_path] = path_cache = {}
|
|
||||||
for line in full_path.open(encoding="utf-8"):
|
|
||||||
if line.startswith("Created:"):
|
|
||||||
path_cache["Created:"] = line.removeprefix("Created:").strip()
|
|
||||||
elif line.startswith("Title:"):
|
|
||||||
path_cache["Title:"] = line.removeprefix("Title:").strip()
|
|
||||||
elif line.startswith("Author:"):
|
|
||||||
path_cache["Author:"] = line.removeprefix("Author:").strip()
|
|
||||||
|
|
||||||
# Once all have been found, exit loop
|
|
||||||
if path_cache.keys == {"Created:", "Title:", "Author:"}:
|
|
||||||
break
|
|
||||||
return path_cache.get(text, "")
|
|
||||||
|
|
||||||
|
|
||||||
def pep_creation(full_path: Path) -> dt.datetime:
|
|
||||||
created_str = first_line_starting_with(full_path, "Created:")
|
|
||||||
if full_path.stem == "pep-0102":
|
|
||||||
# remove additional content on the Created line
|
|
||||||
created_str = created_str.split(" ", 1)[0]
|
|
||||||
return dt.datetime.strptime(created_str, "%d-%b-%Y")
|
|
||||||
|
|
||||||
|
|
||||||
def parse_rst(full_path: Path) -> nodes.document:
|
|
||||||
text = full_path.read_text(encoding="utf-8")
|
|
||||||
settings = docutils.frontend.get_default_settings(rst.Parser)
|
|
||||||
document = utils.new_document(f'<{full_path}>', settings=settings)
|
|
||||||
rst.Parser(rfc2822=True).parse(text, document)
|
|
||||||
return document
|
|
||||||
|
|
||||||
|
|
||||||
def pep_abstract(full_path: Path) -> str:
|
|
||||||
"""Return the first paragraph of the PEP abstract"""
|
|
||||||
for node in parse_rst(full_path).findall(nodes.section):
|
|
||||||
if node.next_node(nodes.title).astext() == "Abstract":
|
|
||||||
return node.next_node(nodes.paragraph).astext().strip().replace("\n", " ")
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
|
||||||
parser = argparse.ArgumentParser(description="Generate RSS feed")
|
|
||||||
parser.add_argument(
|
|
||||||
"-o",
|
|
||||||
"--output-dir",
|
|
||||||
default="build", # synchronise with render.yaml -> deploy step
|
|
||||||
help="Output directory, relative to root. Default 'build'.",
|
|
||||||
)
|
|
||||||
args = parser.parse_args()
|
|
||||||
|
|
||||||
# get list of peps with creation time (from "Created:" string in pep source)
|
|
||||||
peps_with_dt = sorted((pep_creation(path), path) for path in PEP_ROOT.glob("pep-????.???"))
|
|
||||||
|
|
||||||
# generate rss items for 10 most recent peps
|
|
||||||
items = []
|
|
||||||
for datetime, full_path in peps_with_dt[-10:]:
|
|
||||||
try:
|
|
||||||
pep_num = int(full_path.stem.split("-")[-1])
|
|
||||||
except ValueError:
|
|
||||||
continue
|
|
||||||
|
|
||||||
title = first_line_starting_with(full_path, "Title:")
|
|
||||||
author = first_line_starting_with(full_path, "Author:")
|
|
||||||
if "@" in author or " at " in author:
|
|
||||||
parsed_authors = email.utils.getaddresses([author])
|
|
||||||
joined_authors = ", ".join(f"{name} ({email_address})" for name, email_address in parsed_authors)
|
|
||||||
else:
|
|
||||||
joined_authors = author
|
|
||||||
url = f"https://peps.python.org/pep-{pep_num:0>4}/"
|
|
||||||
|
|
||||||
item = f"""\
|
|
||||||
<item>
|
|
||||||
<title>PEP {pep_num}: {escape(title, quote=False)}</title>
|
|
||||||
<link>{escape(url, quote=False)}</link>
|
|
||||||
<description>{escape(pep_abstract(full_path), quote=False)}</description>
|
|
||||||
<author>{escape(joined_authors, quote=False)}</author>
|
|
||||||
<guid isPermaLink="true">{url}</guid>
|
|
||||||
<pubDate>{_format_rfc_2822(datetime)}</pubDate>
|
|
||||||
</item>"""
|
|
||||||
items.append(item)
|
|
||||||
|
|
||||||
# The rss envelope
|
|
||||||
desc = """
|
|
||||||
Newest Python Enhancement Proposals (PEPs) - Information on new
|
|
||||||
language features, and some meta-information like release
|
|
||||||
procedure and schedules.
|
|
||||||
"""
|
|
||||||
last_build_date = _format_rfc_2822(dt.datetime.now(dt.timezone.utc))
|
|
||||||
items = "\n".join(reversed(items))
|
|
||||||
output = f"""\
|
|
||||||
<?xml version='1.0' encoding='UTF-8'?>
|
|
||||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0">
|
|
||||||
<channel>
|
|
||||||
<title>Newest Python PEPs</title>
|
|
||||||
<link>https://peps.python.org/peps.rss</link>
|
|
||||||
<description>{" ".join(desc.split())}</description>
|
|
||||||
<atom:link href="https://peps.python.org/peps.rss" rel="self"/>
|
|
||||||
<docs>https://cyber.harvard.edu/rss/rss.html</docs>
|
|
||||||
<language>en</language>
|
|
||||||
<lastBuildDate>{last_build_date}</lastBuildDate>
|
|
||||||
{items}
|
|
||||||
</channel>
|
|
||||||
</rss>
|
|
||||||
"""
|
|
||||||
|
|
||||||
# output directory for target HTML files
|
|
||||||
out_dir = PEP_ROOT / args.output_dir
|
|
||||||
out_dir.mkdir(exist_ok=True, parents=True)
|
|
||||||
out_dir.joinpath("peps.rss").write_text(output)
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
|
@ -7,6 +7,7 @@ from typing import TYPE_CHECKING
|
||||||
from docutils.writers.html5_polyglot import HTMLTranslator
|
from docutils.writers.html5_polyglot import HTMLTranslator
|
||||||
from sphinx import environment
|
from sphinx import environment
|
||||||
|
|
||||||
|
from pep_sphinx_extensions.generate_rss import create_rss_feed
|
||||||
from pep_sphinx_extensions.pep_processor.html import pep_html_builder
|
from pep_sphinx_extensions.pep_processor.html import pep_html_builder
|
||||||
from pep_sphinx_extensions.pep_processor.html import pep_html_translator
|
from pep_sphinx_extensions.pep_processor.html import pep_html_translator
|
||||||
from pep_sphinx_extensions.pep_processor.parsing import pep_banner_directive
|
from pep_sphinx_extensions.pep_processor.parsing import pep_banner_directive
|
||||||
|
@ -29,9 +30,7 @@ def _update_config_for_builder(app: Sphinx) -> None:
|
||||||
if app.builder.name == "dirhtml":
|
if app.builder.name == "dirhtml":
|
||||||
app.env.settings["pep_url"] = "pep-{:0>4}"
|
app.env.settings["pep_url"] = "pep-{:0>4}"
|
||||||
|
|
||||||
# internal_builder exists if Sphinx is run by build.py
|
app.connect("build-finished", _post_build) # Post-build tasks
|
||||||
if "internal_builder" not in app.tags:
|
|
||||||
app.connect("build-finished", _post_build) # Post-build tasks
|
|
||||||
|
|
||||||
|
|
||||||
def _post_build(app: Sphinx, exception: Exception | None) -> None:
|
def _post_build(app: Sphinx, exception: Exception | None) -> None:
|
||||||
|
@ -41,7 +40,11 @@ def _post_build(app: Sphinx, exception: Exception | None) -> None:
|
||||||
|
|
||||||
if exception is not None:
|
if exception is not None:
|
||||||
return
|
return
|
||||||
create_index_file(Path(app.outdir), app.builder.name)
|
|
||||||
|
# internal_builder exists if Sphinx is run by build.py
|
||||||
|
if "internal_builder" not in app.tags:
|
||||||
|
create_index_file(Path(app.outdir), app.builder.name)
|
||||||
|
create_rss_feed(app.doctreedir, app.outdir)
|
||||||
|
|
||||||
|
|
||||||
def setup(app: Sphinx) -> dict[str, bool]:
|
def setup(app: Sphinx) -> dict[str, bool]:
|
||||||
|
|
|
@ -0,0 +1,120 @@
|
||||||
|
# This file is placed in the public domain or under the
|
||||||
|
# CC0-1.0-Universal license, whichever is more permissive.
|
||||||
|
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import datetime as dt
|
||||||
|
import pickle
|
||||||
|
from email.utils import format_datetime, getaddresses
|
||||||
|
from html import escape
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
from docutils import nodes
|
||||||
|
|
||||||
|
RSS_DESCRIPTION = (
|
||||||
|
"Newest Python Enhancement Proposals (PEPs): "
|
||||||
|
"Information on new language features "
|
||||||
|
"and some meta-information like release procedure and schedules."
|
||||||
|
)
|
||||||
|
|
||||||
|
# get the directory with the PEP sources
|
||||||
|
PEP_ROOT = Path(__file__).parent
|
||||||
|
|
||||||
|
|
||||||
|
def _format_rfc_2822(datetime: dt.datetime) -> str:
|
||||||
|
datetime = datetime.replace(tzinfo=dt.timezone.utc)
|
||||||
|
return format_datetime(datetime, usegmt=True)
|
||||||
|
|
||||||
|
|
||||||
|
document_cache: dict[Path, dict[str, str]] = {}
|
||||||
|
|
||||||
|
|
||||||
|
def get_from_doctree(full_path: Path, text: str) -> str:
|
||||||
|
# Try and retrieve from cache
|
||||||
|
if full_path in document_cache:
|
||||||
|
return document_cache[full_path].get(text, "")
|
||||||
|
|
||||||
|
# Else load doctree
|
||||||
|
document = pickle.loads(full_path.read_bytes())
|
||||||
|
# Store the headers (populated in the PEPHeaders transform)
|
||||||
|
document_cache[full_path] = path_cache = document.get("headers", {})
|
||||||
|
# Store the Abstract
|
||||||
|
path_cache["Abstract"] = pep_abstract(document)
|
||||||
|
# Return the requested key
|
||||||
|
return path_cache.get(text, "")
|
||||||
|
|
||||||
|
|
||||||
|
def pep_creation(full_path: Path) -> dt.datetime:
|
||||||
|
created_str = get_from_doctree(full_path, "Created")
|
||||||
|
try:
|
||||||
|
return dt.datetime.strptime(created_str, "%d-%b-%Y")
|
||||||
|
except ValueError:
|
||||||
|
return dt.datetime.min
|
||||||
|
|
||||||
|
|
||||||
|
def pep_abstract(document: nodes.document) -> str:
|
||||||
|
"""Return the first paragraph of the PEP abstract"""
|
||||||
|
for node in document.findall(nodes.section):
|
||||||
|
title_node = node.next_node(nodes.title)
|
||||||
|
if title_node is None:
|
||||||
|
continue
|
||||||
|
if title_node.astext() == "Abstract":
|
||||||
|
return node.next_node(nodes.paragraph).astext().strip().replace("\n", " ")
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _generate_items(doctree_dir: Path):
|
||||||
|
# get list of peps with creation time (from "Created:" string in pep source)
|
||||||
|
peps_with_dt = sorted((pep_creation(path), path) for path in doctree_dir.glob("pep-????.doctree"))
|
||||||
|
|
||||||
|
# generate rss items for 10 most recent peps (in reverse order)
|
||||||
|
for datetime, full_path in reversed(peps_with_dt[-10:]):
|
||||||
|
try:
|
||||||
|
pep_num = int(get_from_doctree(full_path, "PEP"))
|
||||||
|
except ValueError:
|
||||||
|
continue
|
||||||
|
|
||||||
|
title = get_from_doctree(full_path, "Title")
|
||||||
|
url = f"https://peps.python.org/pep-{pep_num:0>4}/"
|
||||||
|
abstract = get_from_doctree(full_path, "Abstract")
|
||||||
|
author = get_from_doctree(full_path, "Author")
|
||||||
|
if "@" in author or " at " in author:
|
||||||
|
parsed_authors = getaddresses([author])
|
||||||
|
joined_authors = ", ".join(f"{name} ({email_address})" for name, email_address in parsed_authors)
|
||||||
|
else:
|
||||||
|
joined_authors = author
|
||||||
|
|
||||||
|
item = f"""\
|
||||||
|
<item>
|
||||||
|
<title>PEP {pep_num}: {escape(title, quote=False)}</title>
|
||||||
|
<link>{escape(url, quote=False)}</link>
|
||||||
|
<description>{escape(abstract, quote=False)}</description>
|
||||||
|
<author>{escape(joined_authors, quote=False)}</author>
|
||||||
|
<guid isPermaLink="true">{url}</guid>
|
||||||
|
<pubDate>{_format_rfc_2822(datetime)}</pubDate>
|
||||||
|
</item>"""
|
||||||
|
yield item
|
||||||
|
|
||||||
|
|
||||||
|
def create_rss_feed(doctree_dir: Path, output_dir: Path):
|
||||||
|
# The rss envelope
|
||||||
|
last_build_date = _format_rfc_2822(dt.datetime.now(dt.timezone.utc))
|
||||||
|
items = "\n".join(_generate_items(Path(doctree_dir)))
|
||||||
|
output = f"""\
|
||||||
|
<?xml version='1.0' encoding='UTF-8'?>
|
||||||
|
<rss xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/" version="2.0">
|
||||||
|
<channel>
|
||||||
|
<title>Newest Python PEPs</title>
|
||||||
|
<link>https://peps.python.org/peps.rss</link>
|
||||||
|
<description>{RSS_DESCRIPTION}</description>
|
||||||
|
<atom:link href="https://peps.python.org/peps.rss" rel="self"/>
|
||||||
|
<docs>https://cyber.harvard.edu/rss/rss.html</docs>
|
||||||
|
<language>en</language>
|
||||||
|
<lastBuildDate>{last_build_date}</lastBuildDate>
|
||||||
|
{items}
|
||||||
|
</channel>
|
||||||
|
</rss>
|
||||||
|
"""
|
||||||
|
|
||||||
|
# output directory for target HTML files
|
||||||
|
Path(output_dir, "peps.rss").write_text(output, encoding="utf-8")
|
|
@ -72,11 +72,11 @@ class PEPHeaders(transforms.Transform):
|
||||||
raise PEPParsingError("Document does not contain an RFC-2822 'PEP' header!")
|
raise PEPParsingError("Document does not contain an RFC-2822 'PEP' header!")
|
||||||
|
|
||||||
# Extract PEP number
|
# Extract PEP number
|
||||||
value = pep_field[1].astext()
|
pep_num_str = pep_field[1].astext()
|
||||||
try:
|
try:
|
||||||
pep_num = int(value)
|
pep_num = int(pep_num_str)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
raise PEPParsingError(f"'PEP' header must contain an integer. '{value}' is invalid!")
|
raise PEPParsingError(f"PEP header must contain an integer. '{pep_num_str}' is invalid!")
|
||||||
|
|
||||||
# Special processing for PEP 0.
|
# Special processing for PEP 0.
|
||||||
if pep_num == 0:
|
if pep_num == 0:
|
||||||
|
@ -89,7 +89,11 @@ class PEPHeaders(transforms.Transform):
|
||||||
raise PEPParsingError("No title!")
|
raise PEPParsingError("No title!")
|
||||||
|
|
||||||
fields_to_remove = []
|
fields_to_remove = []
|
||||||
|
self.document["headers"] = headers = {}
|
||||||
for field in header:
|
for field in header:
|
||||||
|
row_attributes = {sub.tagname: sub.rawsource for sub in field}
|
||||||
|
headers[row_attributes["field_name"]] = row_attributes["field_body"]
|
||||||
|
|
||||||
name = field[0].astext().lower()
|
name = field[0].astext().lower()
|
||||||
body = field[1]
|
body = field[1]
|
||||||
if len(body) == 0:
|
if len(body) == 0:
|
||||||
|
|
Loading…
Reference in New Issue