Sphinx support: Add support for RSS (#1934)
See #2, #1385 for context. This is the RSS generation part, building on PR #1930. It contains the logic for generating RSS This was originally in #1385 and #1565, split out for ease of review
This commit is contained in:
parent
35b0cc1a92
commit
b8b64248d0
6
Makefile
6
Makefile
|
@ -60,7 +60,11 @@ lint:
|
|||
SPHINX_JOBS=8
|
||||
SPHINX_BUILD=$(PYTHON) build.py -j $(SPHINX_JOBS)
|
||||
|
||||
pages: rss
|
||||
# TODO replace `rss:` with this when merged & tested
|
||||
pep_rss:
|
||||
$(PYTHON) pep_rss_gen.py
|
||||
|
||||
pages: pep_rss
|
||||
$(SPHINX_BUILD) --index-file
|
||||
|
||||
sphinx:
|
||||
|
|
|
@ -0,0 +1,145 @@
|
|||
import datetime
|
||||
import email.utils
|
||||
from pathlib import Path
|
||||
import re
|
||||
|
||||
from dateutil import parser
|
||||
import docutils.frontend
|
||||
import docutils.nodes
|
||||
import docutils.parsers.rst
|
||||
import docutils.utils
|
||||
from feedgen import entry
|
||||
from feedgen import feed
|
||||
|
||||
|
||||
# Monkeypatch feedgen.util.formatRFC2822
|
||||
def _format_rfc_2822(dt: datetime.datetime) -> str:
|
||||
return email.utils.format_datetime(dt, usegmt=True)
|
||||
|
||||
|
||||
entry.formatRFC2822 = feed.formatRFC2822 = _format_rfc_2822
|
||||
line_cache: dict[Path, dict[str, str]] = {}
|
||||
|
||||
|
||||
def first_line_starting_with(full_path: Path, text: str) -> str:
|
||||
# Try and retrieve from cache
|
||||
if full_path in line_cache:
|
||||
return line_cache[full_path].get(text, "")
|
||||
|
||||
# Else read source
|
||||
line_cache[full_path] = path_cache = {}
|
||||
for line in full_path.open(encoding="utf-8"):
|
||||
if line.startswith("Created:"):
|
||||
path_cache["Created:"] = line.removeprefix("Created:").strip()
|
||||
elif line.startswith("Title:"):
|
||||
path_cache["Title:"] = line.removeprefix("Title:").strip()
|
||||
elif line.startswith("Author:"):
|
||||
path_cache["Author:"] = line.removeprefix("Author:").strip()
|
||||
|
||||
# Once all have been found, exit loop
|
||||
if path_cache.keys == {"Created:", "Title:", "Author:"}:
|
||||
break
|
||||
return path_cache.get(text, "")
|
||||
|
||||
|
||||
def pep_creation(full_path: Path) -> datetime.datetime:
|
||||
created_str = first_line_starting_with(full_path, "Created:")
|
||||
# bleh, I was hoping to avoid re but some PEPs editorialize on the Created line
|
||||
# (note as of Aug 2020 only PEP 102 has additional content on the Created line)
|
||||
m = re.search(r"(\d+[- ][\w\d]+[- ]\d{2,4})", created_str)
|
||||
if not m:
|
||||
# some older ones have an empty line, that's okay, if it's old we ipso facto don't care about it.
|
||||
# "return None" would make the most sense but datetime objects refuse to compare with that. :-|
|
||||
return datetime.datetime(1900, 1, 1)
|
||||
created_str = m.group(1)
|
||||
try:
|
||||
return parser.parse(created_str, dayfirst=True)
|
||||
except (ValueError, OverflowError):
|
||||
return datetime.datetime(1900, 1, 1)
|
||||
|
||||
|
||||
def parse_rst(text: str) -> docutils.nodes.document:
|
||||
rst_parser = docutils.parsers.rst.Parser()
|
||||
components = (docutils.parsers.rst.Parser,)
|
||||
settings = docutils.frontend.OptionParser(components=components).get_default_values()
|
||||
document = docutils.utils.new_document('<rst-doc>', settings=settings)
|
||||
rst_parser.parse(text, document)
|
||||
return document
|
||||
|
||||
|
||||
def pep_abstract(full_path: Path) -> str:
|
||||
"""Return the first paragraph of the PEP abstract"""
|
||||
text = full_path.read_text(encoding="utf-8")
|
||||
for node in parse_rst(text):
|
||||
if "<title>Abstract</title>" in str(node):
|
||||
for child in node:
|
||||
if child.tagname == "paragraph":
|
||||
return child.astext().strip().replace("\n", " ")
|
||||
return ""
|
||||
|
||||
|
||||
def main():
|
||||
# get the directory with the PEP sources
|
||||
pep_dir = Path(__file__).parent
|
||||
|
||||
# get list of peps with creation time (from "Created:" string in pep source)
|
||||
peps_with_dt = sorted((pep_creation(path), path) for path in pep_dir.glob("pep-????.*"))
|
||||
|
||||
# generate rss items for 10 most recent peps
|
||||
items = []
|
||||
for dt, full_path in peps_with_dt[-10:]:
|
||||
try:
|
||||
pep_num = int(full_path.stem.split("-")[-1])
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
title = first_line_starting_with(full_path, "Title:")
|
||||
author = first_line_starting_with(full_path, "Author:")
|
||||
if "@" in author or " at " in author:
|
||||
parsed_authors = email.utils.getaddresses([author])
|
||||
# ideal would be to pass as a list of dicts with names and emails to
|
||||
# item.author, but FeedGen's RSS <author/> output doesn't pass W3C
|
||||
# validation (as of 12/06/2021)
|
||||
joined_authors = ", ".join(f"{name} ({email_address})" for name, email_address in parsed_authors)
|
||||
else:
|
||||
joined_authors = author
|
||||
url = f"https://www.python.org/dev/peps/pep-{pep_num:0>4}"
|
||||
|
||||
item = entry.FeedEntry()
|
||||
item.title(f"PEP {pep_num}: {title}")
|
||||
item.link(href=url)
|
||||
item.description(pep_abstract(full_path))
|
||||
item.guid(url, permalink=True)
|
||||
item.published(dt.replace(tzinfo=datetime.timezone.utc)) # ensure datetime has a timezone
|
||||
item.author(email=joined_authors)
|
||||
items.append(item)
|
||||
|
||||
# The rss envelope
|
||||
desc = """
|
||||
Newest Python Enhancement Proposals (PEPs) - Information on new
|
||||
language features, and some meta-information like release
|
||||
procedure and schedules.
|
||||
""".replace("\n ", " ").strip()
|
||||
|
||||
# Setup feed generator
|
||||
fg = feed.FeedGenerator()
|
||||
fg.language("en")
|
||||
fg.generator("")
|
||||
fg.docs("https://cyber.harvard.edu/rss/rss.html")
|
||||
|
||||
# Add metadata
|
||||
fg.title("Newest Python PEPs")
|
||||
fg.link(href="https://www.python.org/dev/peps")
|
||||
fg.link(href="https://www.python.org/dev/peps/peps.rss", rel="self")
|
||||
fg.description(desc)
|
||||
fg.lastBuildDate(datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc))
|
||||
|
||||
# Add PEP information (ordered by newest first)
|
||||
for item in items:
|
||||
fg.add_entry(item)
|
||||
|
||||
pep_dir.joinpath("peps.rss").write_bytes(fg.rss_str(pretty=True))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
|
@ -1,3 +1,6 @@
|
|||
# Requirements for building PEPs with Sphinx
|
||||
sphinx >= 3.5
|
||||
docutils >= 0.16
|
||||
|
||||
# For RSS
|
||||
feedgen >= 0.9.0 # For RSS feed
|
||||
|
|
Loading…
Reference in New Issue