Sphinx support: add docutils support files (#1931)

See #2, #1385 for context. Superseeds #1566.

This is the docutils parsing, transforms and writing part, building on PR #1930. It contains a pseudo-package, `sphinx_pep_extensions`, which itself contains:

### Docutils parsing:
- `PEPParser` - collates transforms and interfaces with Sphinx core
- `PEPRole` - deals with :PEP:`blah` in RST source

### Docutils transforms:
- `PEPContents` (Creates table of contents without page title)
- `PEPFooter` (Dels with footnotes, link to source, last modified commit)
- `PEPHeaders` (Parses RFC2822 headers)
- `PEPTitle` - Creates document title from PEP headers
- `PEPZero` - Masks email addresses and creates links to PEP numbers from tables in `pep-0000.rst`

### Docutils HTML output:
- `PEPTranslator` - Overrides to the default HTML translator to enable better matching of the current PEP styles
This commit is contained in:
Adam Turner 2021-06-09 01:37:55 +01:00 committed by GitHub
parent 353379966d
commit 0f3bbd9ad9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 615 additions and 3 deletions

View File

@ -14,7 +14,7 @@ def create_parser():
# flags / options
parser.add_argument("-f", "--fail-on-warning", action="store_true")
parser.add_argument("-n", "--nitpicky", action="store_true")
parser.add_argument("-j", "--jobs", type=int)
parser.add_argument("-j", "--jobs", type=int, default=1)
# extra build steps
parser.add_argument("-i", "--index-file", action="store_true") # for PEP 0

13
conf.py
View File

@ -1,5 +1,10 @@
"""Configuration for building PEPs using Sphinx."""
import sys
from pathlib import Path
sys.path.append(str(Path("pep_sphinx_extensions").absolute()))
# -- Project information -----------------------------------------------------
project = "PEPs"
@ -7,10 +12,13 @@ master_doc = "contents"
# -- General configuration ---------------------------------------------------
# Add any Sphinx extension module names here, as strings.
extensions = ["pep_sphinx_extensions", "sphinx.ext.githubpages"]
# The file extensions of source files. Sphinx uses these suffixes as sources.
source_suffix = {
".rst": "restructuredtext",
".txt": "restructuredtext",
".rst": "pep",
".txt": "pep",
}
# List of patterns (relative to source dir) to ignore when looking for source files.
@ -32,6 +40,7 @@ exclude_patterns = [
# -- Options for HTML output -------------------------------------------------
# HTML output settings
html_math_renderer = "maths_to_html" # Maths rendering
html_show_copyright = False # Turn off miscellany
html_show_sphinx = False
html_title = "peps.python.org" # Set <title/>

View File

@ -0,0 +1,47 @@
"""Sphinx extensions for performant PEP processing"""
from __future__ import annotations
from typing import TYPE_CHECKING
from sphinx.environment import default_settings
from docutils.writers.html5_polyglot import HTMLTranslator
from pep_sphinx_extensions.pep_processor.html import pep_html_translator
from pep_sphinx_extensions.pep_processor.parsing import pep_parser
from pep_sphinx_extensions.pep_processor.parsing import pep_role
if TYPE_CHECKING:
from sphinx.application import Sphinx
# Monkeypatch sphinx.environment.default_settings as Sphinx doesn't allow custom settings or Readers
# These settings should go in docutils.conf, but are overridden here for now so as not to affect
# pep2html.py
default_settings |= {
"pep_references": True,
"rfc_references": True,
"pep_base_url": "",
"pep_file_url_template": "pep-%04d.html",
"_disable_config": True, # disable using docutils.conf whilst running both PEP generators
}
def _depart_maths():
pass # No-op callable for the type checker
def setup(app: Sphinx) -> dict[str, bool]:
"""Initialize Sphinx extension."""
# Register plugin logic
app.add_source_parser(pep_parser.PEPParser) # Add PEP transforms
app.add_role("pep", pep_role.PEPRole(), override=True) # Transform PEP references to links
app.set_translator("html", pep_html_translator.PEPTranslator) # Docutils Node Visitor overrides
# Mathematics rendering
inline_maths = HTMLTranslator.visit_math, _depart_maths
block_maths = HTMLTranslator.visit_math_block, _depart_maths
app.add_html_math_renderer("maths_to_html", inline_maths, block_maths) # Render maths to HTML
# Parallel safety: https://www.sphinx-doc.org/en/master/extdev/index.html#extension-metadata
return {"parallel_read_safe": True, "parallel_write_safe": True}

View File

@ -0,0 +1,6 @@
"""Miscellaneous configuration variables for the PEP Sphinx extensions."""
pep_stem = "pep-{:0>4}"
pep_url = f"{pep_stem}.html"
pep_vcs_url = "https://github.com/python/peps/blob/master/"
pep_commits_url = "https://github.com/python/peps/commits/master/"

View File

@ -0,0 +1,86 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from docutils import nodes
import sphinx.writers.html5 as html5
if TYPE_CHECKING:
from sphinx.builders import html
class PEPTranslator(html5.HTML5Translator):
"""Custom RST -> HTML translation rules for PEPs."""
def __init__(self, document: nodes.document, builder: html.StandaloneHTMLBuilder):
super().__init__(document, builder)
self.compact_simple: bool = False
@staticmethod
def should_be_compact_paragraph(node: nodes.paragraph) -> bool:
"""Check if paragraph should be compact.
Omitting <p/> tags around paragraph nodes gives visually compact lists.
"""
# Never compact paragraphs that are children of document or compound.
if isinstance(node.parent, (nodes.document, nodes.compound)):
return False
# Check for custom attributes in paragraph.
for key, value in node.non_default_attributes().items():
# if key equals "classes", carry on
# if value is empty, or contains only "first", only "last", or both
# "first" and "last", carry on
# else return False
if any((key != "classes", not set(value) <= {"first", "last"})):
return False
# Only first paragraph can be compact (ignoring initial label & invisible nodes)
first = isinstance(node.parent[0], nodes.label)
visible_siblings = [child for child in node.parent.children[first:] if not isinstance(child, nodes.Invisible)]
if visible_siblings[0] is not node:
return False
# otherwise, the paragraph should be compact
return True
def visit_paragraph(self, node: nodes.paragraph) -> None:
"""Remove <p> tags if possible."""
if self.should_be_compact_paragraph(node):
self.context.append("")
else:
self.body.append(self.starttag(node, "p", ""))
self.context.append("</p>\n")
def depart_paragraph(self, _: nodes.paragraph) -> None:
"""Add corresponding end tag from `visit_paragraph`."""
self.body.append(self.context.pop())
def depart_label(self, node) -> None:
"""PEP link/citation block cleanup with italicised backlinks."""
if not self.settings.footnote_backlinks:
self.body.append("</span>")
self.body.append("</dt>\n<dd>")
return
# If only one reference to this footnote
back_references = node.parent["backrefs"]
if len(back_references) == 1:
self.body.append("</a>")
# Close the tag
self.body.append("</span>")
# If more than one reference
if len(back_references) > 1:
back_links = [f"<a href='#{ref}'>{i}</a>" for i, ref in enumerate(back_references, start=1)]
back_links_str = ", ".join(back_links)
self.body.append(f"<span class='fn-backref''><em> ({back_links_str}) </em></span>")
# Close the def tags
self.body.append("</dt>\n<dd>")
def unknown_visit(self, node: nodes.Node) -> None:
"""No processing for unknown node types."""
pass

View File

@ -0,0 +1,32 @@
from __future__ import annotations
from typing import TYPE_CHECKING
from sphinx import parsers
from pep_sphinx_extensions.pep_processor.transforms import pep_headers
from pep_sphinx_extensions.pep_processor.transforms import pep_title
from pep_sphinx_extensions.pep_processor.transforms import pep_contents
from pep_sphinx_extensions.pep_processor.transforms import pep_footer
if TYPE_CHECKING:
from docutils import transforms
class PEPParser(parsers.RSTParser):
"""RST parser with custom PEP transforms."""
supported = ("pep", "python-enhancement-proposal") # for source_suffix in conf.py
def __init__(self):
"""Mark the document as containing RFC 2822 headers."""
super().__init__(rfc2822=True)
def get_transforms(self) -> list[type[transforms.Transform]]:
"""Use our custom PEP transform rules."""
return [
pep_headers.PEPHeaders,
pep_title.PEPTitle,
pep_contents.PEPContents,
pep_footer.PEPFooter,
]

View File

@ -0,0 +1,16 @@
from sphinx import roles
from pep_sphinx_extensions.config import pep_url
class PEPRole(roles.PEP):
"""Override the :pep: role"""
def build_uri(self) -> str:
"""Get PEP URI from role text."""
base_url = self.inliner.document.settings.pep_base_url
pep_num, _, fragment = self.target.partition("#")
pep_base = base_url + pep_url.format(int(pep_num))
if fragment:
return f"{pep_base}#{fragment}"
return pep_base

View File

@ -0,0 +1,63 @@
from pathlib import Path
from docutils import nodes
from docutils import transforms
from docutils.transforms import parts
class PEPContents(transforms.Transform):
"""Add TOC placeholder and horizontal rule after PEP title and headers."""
# Use same priority as docutils.transforms.Contents
default_priority = 380
def apply(self) -> None:
if not Path(self.document["source"]).match("pep-*"):
return # not a PEP file, exit early
# Create the contents placeholder section
title = nodes.title("", "Contents")
contents_topic = nodes.topic("", title, classes=["contents"])
if not self.document.has_name("contents"):
contents_topic["names"].append("contents")
self.document.note_implicit_target(contents_topic)
# Add a table of contents builder
pending = nodes.pending(Contents)
contents_topic += pending
self.document.note_pending(pending)
# Insert the toc after title and PEP headers
self.document.children[0].insert(2, contents_topic)
# Add a horizontal rule before contents
transition = nodes.transition()
self.document[0].insert(2, transition)
class Contents(parts.Contents):
"""Build Table of Contents from document."""
def __init__(self, document, startnode=None):
super().__init__(document, startnode)
# used in parts.Contents.build_contents
self.toc_id = None
self.backlinks = None
def apply(self) -> None:
# used in parts.Contents.build_contents
self.toc_id = self.startnode.parent["ids"][0]
self.backlinks = self.document.settings.toc_backlinks
# let the writer (or output software) build the contents list?
if getattr(self.document.settings, "use_latex_toc", False):
# move customisation settings to the parent node
self.startnode.parent.attributes.update(self.startnode.details)
self.startnode.parent.remove(self.startnode)
else:
contents = self.build_contents(self.document[0])
if contents:
self.startnode.replace_self(contents)
else:
# if no contents, remove the empty placeholder
self.startnode.parent.parent.remove(self.startnode.parent)

View File

@ -0,0 +1,111 @@
import datetime
import subprocess
from pathlib import Path
from docutils import nodes
from docutils import transforms
from docutils.transforms import misc
from docutils.transforms import references
from pep_sphinx_extensions import config
class PEPFooter(transforms.Transform):
"""Footer transforms for PEPs.
- Appends external links to footnotes.
- Creates a link to the (GitHub) source text.
TargetNotes:
Locate the `References` section, insert a placeholder at the end
for an external target footnote insertion transform, and schedule
the transform to run immediately.
Source Link:
Create the link to the source file from the document source path,
and append the text to the end of the document.
"""
# Uses same priority as docutils.transforms.TargetNotes
default_priority = 520
def apply(self) -> None:
pep_source_path = Path(self.document["source"])
if not pep_source_path.match("pep-*"):
return # not a PEP file, exit early
doc = self.document[0]
reference_section = copyright_section = None
# Iterate through sections from the end of the document
num_sections = len(doc)
for i, section in enumerate(reversed(doc)):
if not isinstance(section, nodes.section):
continue
title_words = section[0].astext().lower().split()
if "references" in title_words:
reference_section = section
break
elif "copyright" in title_words:
copyright_section = num_sections - i - 1
# Add a references section if we didn't find one
if not reference_section:
reference_section = nodes.section()
reference_section += nodes.title("", "References")
self.document.set_id(reference_section)
if copyright_section:
# Put the new "References" section before "Copyright":
doc.insert(copyright_section, reference_section)
else:
# Put the new "References" section at end of doc:
doc.append(reference_section)
# Add and schedule execution of the TargetNotes transform
pending = nodes.pending(references.TargetNotes)
reference_section.append(pending)
self.document.note_pending(pending, priority=0)
# If there are no references after TargetNotes has finished, remove the
# references section
pending = nodes.pending(misc.CallBack, details={"callback": self.cleanup_callback})
reference_section.append(pending)
self.document.note_pending(pending, priority=1)
# Add link to source text and last modified date
self.add_source_link(pep_source_path)
self.add_commit_history_info(pep_source_path)
@staticmethod
def cleanup_callback(pending: nodes.pending) -> None:
"""Remove an empty "References" section.
Called after the `references.TargetNotes` transform is complete.
"""
if len(pending.parent) == 2: # <title> and <pending>
pending.parent.parent.remove(pending.parent)
def add_source_link(self, pep_source_path: Path) -> None:
"""Add link to source text on VCS (GitHub)"""
source_link = config.pep_vcs_url + pep_source_path.name
link_node = nodes.reference("", source_link, refuri=source_link)
span_node = nodes.inline("", "Source: ", link_node)
self.document.append(span_node)
def add_commit_history_info(self, pep_source_path: Path) -> None:
"""Use local git history to find last modified date."""
args = ["git", "--no-pager", "log", "-1", "--format=%at", pep_source_path.name]
try:
file_modified = subprocess.check_output(args)
since_epoch = file_modified.decode("utf-8").strip()
dt = datetime.datetime.utcfromtimestamp(float(since_epoch))
except (subprocess.CalledProcessError, ValueError):
return None
commit_link = config.pep_commits_url + pep_source_path.name
link_node = nodes.reference("", f"{dt.isoformat()}Z", refuri=commit_link)
span_node = nodes.inline("", "Last modified: ", link_node)
self.document.append(nodes.line("", "", classes=["zero-height"]))
self.document.append(span_node)

View File

@ -0,0 +1,119 @@
import re
from pathlib import Path
from docutils import nodes
from docutils import transforms
from docutils.transforms import peps
from sphinx import errors
from pep_sphinx_extensions.pep_processor.transforms import pep_zero
from pep_sphinx_extensions.config import pep_url
class PEPParsingError(errors.SphinxError):
pass
# PEPHeaders is identical to docutils.transforms.peps.Headers excepting bdfl-delegate, sponsor & superseeded-by
class PEPHeaders(transforms.Transform):
"""Process fields in a PEP's initial RFC-2822 header."""
# Run before pep_processor.transforms.pep_title.PEPTitle
default_priority = 330
def apply(self) -> None:
if not Path(self.document["source"]).match("pep-*"):
return # not a PEP file, exit early
if not len(self.document):
raise PEPParsingError("Document tree is empty.")
header = self.document[0]
if not isinstance(header, nodes.field_list) or "rfc2822" not in header["classes"]:
raise PEPParsingError("Document does not begin with an RFC-2822 header; it is not a PEP.")
# PEP number should be the first field
pep_field = header[0]
if pep_field[0].astext().lower() != "pep":
raise PEPParsingError("Document does not contain an RFC-2822 'PEP' header!")
# Extract PEP number
value = pep_field[1].astext()
try:
pep = int(value)
except ValueError:
raise PEPParsingError(f"'PEP' header must contain an integer. '{value}' is invalid!")
# Special processing for PEP 0.
if pep == 0:
pending = nodes.pending(pep_zero.PEPZero)
self.document.insert(1, pending)
self.document.note_pending(pending)
# If there are less than two headers in the preamble, or if Title is absent
if len(header) < 2 or header[1][0].astext().lower() != "title":
raise PEPParsingError("No title!")
fields_to_remove = []
for field in header:
name = field[0].astext().lower()
body = field[1]
if len(body) == 0:
# body is empty
continue
elif len(body) > 1:
msg = f"PEP header field body contains multiple elements:\n{field.pformat(level=1)}"
raise PEPParsingError(msg)
elif not isinstance(body[0], nodes.paragraph): # len(body) == 1
msg = f"PEP header field body may only contain a single paragraph:\n{field.pformat(level=1)}"
raise PEPParsingError(msg)
para = body[0]
if name in {"author", "bdfl-delegate", "pep-delegate", "sponsor"}:
# mask emails
for node in para:
if isinstance(node, nodes.reference):
pep_num = pep if name == "discussions-to" else -1
node.replace_self(peps.mask_email(node, pep_num))
elif name in {"replaces", "superseded-by", "requires"}:
# replace PEP numbers with normalised list of links to PEPs
new_body = []
space = nodes.Text(" ")
for ref_pep in re.split(r",?\s+", body.astext()):
new_body.append(nodes.reference(
ref_pep, ref_pep,
refuri=(self.document.settings.pep_base_url + pep_url.format(int(ref_pep)))))
new_body.append(space)
para[:] = new_body[:-1] # drop trailing space
elif name in {"last-modified", "content-type", "version"}:
# Mark unneeded fields
fields_to_remove.append(field)
# Remove unneeded fields
for field in fields_to_remove:
field.parent.remove(field)
def _mask_email(ref: nodes.reference, pep_num: int = -1) -> nodes.reference:
"""Mask the email address in `ref` and return a replacement node.
`ref` is returned unchanged if it contains no email address.
If given an email not explicitly whitelisted, process it such that
`user@host` -> `user at host`.
If given a PEP number `pep_num`, add a default email subject.
"""
if "refuri" in ref and ref["refuri"].startswith("mailto:"):
non_masked_addresses = {"peps@python.org", "python-list@python.org", "python-dev@python.org"}
if ref['refuri'].removeprefix("mailto:").strip() in non_masked_addresses:
replacement = ref[0]
else:
replacement_text = ref.astext().replace("@", "&#32;&#97;t&#32;")
replacement = nodes.raw('', replacement_text, format="html")
if pep_num != -1:
replacement['refuri'] += f"?subject=PEP%20{pep_num}"
return replacement
return ref

View File

@ -0,0 +1,49 @@
from pathlib import Path
from docutils import nodes
import docutils.transforms as transforms
class PEPTitle(transforms.Transform):
"""Add PEP title and organise document hierarchy."""
# needs to run before docutils.transforms.frontmatter.DocInfo and after
# pep_processor.transforms.pep_title.PEPTitle
default_priority = 335
def apply(self) -> None:
if not Path(self.document["source"]).match("pep-*"):
return # not a PEP file, exit early
# Directory to hold the PEP's RFC2822 header details, to extract a title string
pep_header_details = {}
# Iterate through the header fields, which are the first section of the document
for field in self.document[0]:
# Hold details of the attribute's tag against its details
row_attributes = {sub.tagname: sub.rawsource for sub in field}
pep_header_details[row_attributes["field_name"]] = row_attributes["field_body"]
# We only need the PEP number and title
if pep_header_details.keys() >= {"PEP", "Title"}:
break
# Create the title string for the PEP
pep_number = int(pep_header_details["PEP"])
pep_title = pep_header_details["Title"]
pep_title_string = f"PEP {pep_number} -- {pep_title}" # double hyphen for en dash
# Generate the title section node and its properties
pep_title_node = nodes.section()
text_node = nodes.Text(pep_title_string, pep_title_string)
title_node = nodes.title(pep_title_string, "", text_node)
title_node["classes"].append("page-title")
name = " ".join(title_node.astext().lower().split()) # normalise name
pep_title_node["names"].append(name)
pep_title_node += title_node
# Insert the title node as the root element, move children down
document_children = self.document.children
self.document.children = [pep_title_node]
pep_title_node.extend(document_children)
self.document.note_implicit_target(pep_title_node, pep_title_node)

View File

@ -0,0 +1,74 @@
from docutils import nodes
from docutils import transforms
from docutils.transforms import peps
from pep_sphinx_extensions.config import pep_url
class PEPZero(transforms.Transform):
"""Schedule PEP 0 processing."""
# Run during sphinx post processing
default_priority = 760
def apply(self) -> None:
# Walk document and then remove this node
visitor = PEPZeroSpecial(self.document)
self.document.walk(visitor)
self.startnode.parent.remove(self.startnode)
class PEPZeroSpecial(nodes.SparseNodeVisitor):
"""Perform the special processing needed by PEP 0:
- Mask email addresses.
- Link PEP numbers in the second column of 4-column tables to the PEPs themselves.
"""
def __init__(self, document: nodes.document):
super().__init__(document)
self.pep_table: int = 0
self.entry: int = 0
def unknown_visit(self, node: nodes.Node) -> None:
"""No processing for undefined node types."""
pass
@staticmethod
def visit_reference(node: nodes.reference) -> None:
"""Mask email addresses if present."""
node.replace_self(peps.mask_email(node))
@staticmethod
def visit_field_list(node: nodes.field_list) -> None:
"""Skip PEP headers."""
if "rfc2822" in node["classes"]:
raise nodes.SkipNode
def visit_tgroup(self, node: nodes.tgroup) -> None:
"""Set column counter and PEP table marker."""
self.pep_table = node["cols"] == 4
self.entry = 0 # reset column number
def visit_colspec(self, node: nodes.colspec) -> None:
self.entry += 1
if self.pep_table and self.entry == 2:
node["classes"].append("num")
def visit_row(self, _node: nodes.row) -> None:
self.entry = 0 # reset column number
def visit_entry(self, node: nodes.entry) -> None:
self.entry += 1
if self.pep_table and self.entry == 2 and len(node) == 1:
node["classes"].append("num")
# if this is the PEP number column, replace the number with a link to the PEP
para = node[0]
if isinstance(para, nodes.paragraph) and len(para) == 1:
pep_str = para.astext()
try:
ref = self.document.settings.pep_base_url + pep_url.format(int(pep_str))
para[0] = nodes.reference(pep_str, pep_str, refuri=ref)
except ValueError:
pass