Add PEP abstract to the RSS feed (#1679)

* Flake8 fixes

* Use first paragraph of abstract as 'description', and PEP author as 'author'

* Check RSS generation runs with no error
This commit is contained in:
Hugo van Kemenade 2021-07-12 14:55:16 +03:00 committed by GitHub
parent 86332b3564
commit e61ca95fce
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 67 additions and 13 deletions

View File

@ -20,7 +20,9 @@ jobs:
python -m pip install -U docutils python -m pip install -U docutils
- name: Build - name: Build
run: make -j$(nproc) run: |
make rss
make -j$(nproc)
- name: Deploy - name: Deploy
if: > if: >

View File

@ -1,23 +1,71 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
# usage: pep-hook.py $REPOS $REV # usage: python3 pep2rss.py .
# (standard post-commit args)
import os, glob, time, datetime, stat, re, sys import datetime
import glob
import os
import re
import sys
import time
import PyRSS2Gen as rssgen import PyRSS2Gen as rssgen
import docutils.frontend
import docutils.nodes
import docutils.parsers.rst
import docutils.utils
RSS_PATH = os.path.join(sys.argv[1], 'peps.rss') RSS_PATH = os.path.join(sys.argv[1], 'peps.rss')
def remove_prefix(text: str, prefix: str) -> str:
try:
# Python 3.9+
return text.removeprefix(prefix)
except AttributeError:
if text.startswith(prefix):
return text[len(prefix):]
return text
def parse_rst(text: str) -> docutils.nodes.document:
parser = docutils.parsers.rst.Parser()
components = (docutils.parsers.rst.Parser,)
settings = docutils.frontend.OptionParser(components=components).get_default_values()
document = docutils.utils.new_document('<rst-doc>', settings=settings)
parser.parse(text, document)
return document
def pep_abstract(full_path: str) -> str:
"""Return the first paragraph of the PEP abstract"""
abstract = None
with open(full_path, encoding="utf-8") as f:
text = f.read()
document = parse_rst(text)
nodes = list(document)
for node in nodes:
if "<title>Abstract</title>" in str(node):
for child in node:
if child.tagname == "paragraph":
abstract = child.astext()
# Just fetch the first paragraph
break
return abstract
def firstline_startingwith(full_path, text): def firstline_startingwith(full_path, text):
for line in open(full_path, encoding="utf-8"): for line in open(full_path, encoding="utf-8"):
if line.startswith(text): if line.startswith(text):
return line[len(text):].strip() return line[len(text):].strip()
return None return None
# get list of peps with creation time # get list of peps with creation time
# (from "Created:" string in pep .rst or .txt) # (from "Created:" string in pep .rst or .txt)
peps = glob.glob('pep-*.txt') peps = glob.glob('pep-*.txt')
peps.extend(glob.glob('pep-*.rst')) peps.extend(glob.glob('pep-*.rst'))
def pep_creation_dt(full_path): def pep_creation_dt(full_path):
created_str = firstline_startingwith(full_path, 'Created:') created_str = firstline_startingwith(full_path, 'Created:')
# bleh, I was hoping to avoid re but some PEPs editorialize # bleh, I was hoping to avoid re but some PEPs editorialize
@ -35,6 +83,8 @@ def pep_creation_dt(full_path):
except ValueError: except ValueError:
t = time.strptime(created_str, '%d-%B-%Y') t = time.strptime(created_str, '%d-%B-%Y')
return datetime.datetime(*t[:6]) return datetime.datetime(*t[:6])
peps_with_dt = [(pep_creation_dt(full_path), full_path) for full_path in peps] peps_with_dt = [(pep_creation_dt(full_path), full_path) for full_path in peps]
# sort peps by date, newest first # sort peps by date, newest first
peps_with_dt.sort(reverse=True) peps_with_dt.sort(reverse=True)
@ -48,13 +98,15 @@ for dt, full_path in peps_with_dt[:10]:
pass pass
title = firstline_startingwith(full_path, 'Title:') title = firstline_startingwith(full_path, 'Title:')
author = firstline_startingwith(full_path, 'Author:') author = firstline_startingwith(full_path, 'Author:')
abstract = pep_abstract(full_path)
url = 'https://www.python.org/dev/peps/pep-%0.4d/' % n url = 'https://www.python.org/dev/peps/pep-%0.4d/' % n
item = rssgen.RSSItem( item = rssgen.RSSItem(
title = 'PEP %d: %s' % (n, title), title='PEP %d: %s' % (n, title),
link = url, link=url,
description = 'Author: %s' % author, description=abstract,
guid = rssgen.Guid(url), author=author,
pubDate = dt) guid=rssgen.Guid(url),
pubDate=dt)
items.append(item) items.append(item)
# the rss envelope # the rss envelope
@ -64,11 +116,11 @@ language features, and some meta-information like release
procedure and schedules procedure and schedules
""".strip() """.strip()
rss = rssgen.RSS2( rss = rssgen.RSS2(
title = 'Newest Python PEPs', title='Newest Python PEPs',
link = 'https://www.python.org/dev/peps/', link = 'https://www.python.org/dev/peps/',
description = desc, description=desc,
lastBuildDate = datetime.datetime.now(), lastBuildDate=datetime.datetime.now(),
items = items) items=items)
with open(RSS_PATH, 'w', encoding="utf-8") as fp: with open(RSS_PATH, 'w', encoding="utf-8") as fp:
fp.write(rss.to_xml(encoding="utf-8")) fp.write(rss.to_xml(encoding="utf-8"))