python-peps/pep2rss.py

#!/usr/bin/env python3

# usage: python3 pep2rss.py .

import datetime
import glob
import os
import re
import sys
import time
import PyRSS2Gen as rssgen
import docutils.frontend
import docutils.nodes
import docutils.parsers.rst
import docutils.utils

RSS_PATH = os.path.join(sys.argv[1], 'peps.rss')


def remove_prefix(text: str, prefix: str) -> str:
    try:
        # Python 3.9+
        return text.removeprefix(prefix)
    except AttributeError:
        if text.startswith(prefix):
            return text[len(prefix):]
        return text


def parse_rst(text: str) -> docutils.nodes.document:
    parser = docutils.parsers.rst.Parser()
    components = (docutils.parsers.rst.Parser,)
    settings = docutils.frontend.OptionParser(components=components).get_default_values()
    document = docutils.utils.new_document('<rst-doc>', settings=settings)
    parser.parse(text, document)
    return document


def pep_abstract(full_path: str) -> str:
    """Return the first paragraph of the PEP abstract"""
    abstract = None
    with open(full_path, encoding="utf-8") as f:
        text = f.read()
        document = parse_rst(text)
        nodes = list(document)
        for node in nodes:
            if "<title>Abstract</title>" in str(node):
                for child in node:
                    if child.tagname == "paragraph":
                        abstract = child.astext()
                        # Just fetch the first paragraph
                        break
    return abstract


def firstline_startingwith(full_path, text):
    for line in open(full_path, encoding="utf-8"):
        if line.startswith(text):
            return line[len(text):].strip()
    return None


# get list of peps with creation time
# (from "Created:" string in pep .rst or .txt)
peps = glob.glob('pep-*.txt')
peps.extend(glob.glob('pep-*.rst'))


def pep_creation_dt(full_path):
    created_str = firstline_startingwith(full_path, 'Created:')
    # bleh, I was hoping to avoid re but some PEPs editorialize
    # on the Created line
    m = re.search(r'''(\d+-\w+-\d{4})''', created_str)
    if not m:
        # some older ones have an empty line, that's okay, if it's old
        # we ipso facto don't care about it.
        # "return None" would make the most sense but datetime objects
        # refuse to compare with that. :-|
        return datetime.datetime(*time.localtime(0)[:6])
    created_str = m.group(1)
    try:
        t = time.strptime(created_str, '%d-%b-%Y')
    except ValueError:
        t = time.strptime(created_str, '%d-%B-%Y')
    return datetime.datetime(*t[:6])


peps_with_dt = [(pep_creation_dt(full_path), full_path) for full_path in peps]
# sort peps by date, newest first
peps_with_dt.sort(reverse=True)

# generate rss items for 10 most recent peps
items = []
for dt, full_path in peps_with_dt[:10]:
    try:
        n = int(full_path.split('-')[-1].split('.')[0])
    except ValueError:
        pass
    title = firstline_startingwith(full_path, 'Title:')
    author = firstline_startingwith(full_path, 'Author:')
    abstract = pep_abstract(full_path)
    url = 'https://www.python.org/dev/peps/pep-%0.4d/' % n
    item = rssgen.RSSItem(
        title='PEP %d: %s' % (n, title),
        link=url,
        description=abstract,
        author=author,
        guid=rssgen.Guid(url),
        pubDate=dt)
    items.append(item)

# the rss envelope
desc = """
Newest Python Enhancement Proposals (PEPs) - Information on new
language features, and some meta-information like release
procedure and schedules
""".strip()
rss = rssgen.RSS2(
    title='Newest Python PEPs',
    link = 'https://www.python.org/dev/peps/',
    description=desc,
    lastBuildDate=datetime.datetime.now(),
    items=items)

with open(RSS_PATH, 'w', encoding="utf-8") as fp:
    fp.write(rss.to_xml(encoding="utf-8"))
Fix pep2rss to specify output encoding (GH-634) 2018-07-10 10:05:08 -04:00			`#!/usr/bin/env python3`
Add pep2rss, contributed by Jonathan Ellis. 2007-07-08 04:49:54 -04:00
Add PEP abstract to the RSS feed (#1679) * Flake8 fixes * Use first paragraph of abstract as 'description', and PEP author as 'author' * Check RSS generation runs with no error 2021-07-12 07:55:16 -04:00			`# usage: python3 pep2rss.py .`
Add pep2rss, contributed by Jonathan Ellis. 2007-07-08 04:49:54 -04:00
Add PEP abstract to the RSS feed (#1679) * Flake8 fixes * Use first paragraph of abstract as 'description', and PEP author as 'author' * Check RSS generation runs with no error 2021-07-12 07:55:16 -04:00			`import datetime`
			`import glob`
			`import os`
			`import re`
			`import sys`
			`import time`
Add pep2rss, contributed by Jonathan Ellis. 2007-07-08 04:49:54 -04:00			`import PyRSS2Gen as rssgen`
Add PEP abstract to the RSS feed (#1679) * Flake8 fixes * Use first paragraph of abstract as 'description', and PEP author as 'author' * Check RSS generation runs with no error 2021-07-12 07:55:16 -04:00			`import docutils.frontend`
			`import docutils.nodes`
			`import docutils.parsers.rst`
			`import docutils.utils`
Add pep2rss, contributed by Jonathan Ellis. 2007-07-08 04:49:54 -04:00
			`RSS_PATH = os.path.join(sys.argv[1], 'peps.rss')`

Add PEP abstract to the RSS feed (#1679) * Flake8 fixes * Use first paragraph of abstract as 'description', and PEP author as 'author' * Check RSS generation runs with no error 2021-07-12 07:55:16 -04:00
			`def remove_prefix(text: str, prefix: str) -> str:`
			`try:`
			`# Python 3.9+`
			`return text.removeprefix(prefix)`
			`except AttributeError:`
			`if text.startswith(prefix):`
			`return text[len(prefix):]`
			`return text`


			`def parse_rst(text: str) -> docutils.nodes.document:`
			`parser = docutils.parsers.rst.Parser()`
			`components = (docutils.parsers.rst.Parser,)`
			`settings = docutils.frontend.OptionParser(components=components).get_default_values()`
			`document = docutils.utils.new_document('<rst-doc>', settings=settings)`
			`parser.parse(text, document)`
			`return document`


			`def pep_abstract(full_path: str) -> str:`
			`"""Return the first paragraph of the PEP abstract"""`
			`abstract = None`
			`with open(full_path, encoding="utf-8") as f:`
			`text = f.read()`
			`document = parse_rst(text)`
			`nodes = list(document)`
			`for node in nodes:`
			`if "<title>Abstract</title>" in str(node):`
			`for child in node:`
			`if child.tagname == "paragraph":`
			`abstract = child.astext()`
			`# Just fetch the first paragraph`
			`break`
			`return abstract`


Add pep2rss, contributed by Jonathan Ellis. 2007-07-08 04:49:54 -04:00			`def firstline_startingwith(full_path, text):`
Fix pep2rss to specify output encoding (GH-634) 2018-07-10 10:05:08 -04:00			`for line in open(full_path, encoding="utf-8"):`
Add pep2rss, contributed by Jonathan Ellis. 2007-07-08 04:49:54 -04:00			`if line.startswith(text):`
			`return line[len(text):].strip()`
			`return None`

Add PEP abstract to the RSS feed (#1679) * Flake8 fixes * Use first paragraph of abstract as 'description', and PEP author as 'author' * Check RSS generation runs with no error 2021-07-12 07:55:16 -04:00
Fix pep2rss to include PEPs written in reStructuredText format (GH-632) 2018-04-27 01:20:27 -04:00			`# get list of peps with creation time`
			`# (from "Created:" string in pep .rst or .txt)`
Add pep2rss, contributed by Jonathan Ellis. 2007-07-08 04:49:54 -04:00			`peps = glob.glob('pep-*.txt')`
Fix pep2rss to include PEPs written in reStructuredText format (GH-632) 2018-04-27 01:20:27 -04:00			`peps.extend(glob.glob('pep-*.rst'))`
Add PEP abstract to the RSS feed (#1679) * Flake8 fixes * Use first paragraph of abstract as 'description', and PEP author as 'author' * Check RSS generation runs with no error 2021-07-12 07:55:16 -04:00

Add pep2rss, contributed by Jonathan Ellis. 2007-07-08 04:49:54 -04:00			`def pep_creation_dt(full_path):`
			`created_str = firstline_startingwith(full_path, 'Created:')`
			`# bleh, I was hoping to avoid re but some PEPs editorialize`
			`# on the Created line`
			`m = re.search(r'''(\d+-\w+-\d{4})''', created_str)`
			`if not m:`
			`# some older ones have an empty line, that's okay, if it's old`
			`# we ipso facto don't care about it.`
			`# "return None" would make the most sense but datetime objects`
			`# refuse to compare with that. :-\|`
			`return datetime.datetime(*time.localtime(0)[:6])`
			`created_str = m.group(1)`
			`try:`
			`t = time.strptime(created_str, '%d-%b-%Y')`
			`except ValueError:`
			`t = time.strptime(created_str, '%d-%B-%Y')`
			`return datetime.datetime(*t[:6])`
Add PEP abstract to the RSS feed (#1679) * Flake8 fixes * Use first paragraph of abstract as 'description', and PEP author as 'author' * Check RSS generation runs with no error 2021-07-12 07:55:16 -04:00

Add pep2rss, contributed by Jonathan Ellis. 2007-07-08 04:49:54 -04:00			`peps_with_dt = [(pep_creation_dt(full_path), full_path) for full_path in peps]`
Cosmetic patch from Frank Benksten: * Show most recent items in reverse order so the newest item is at the top. 2008-01-15 08:56:50 -05:00			`# sort peps by date, newest first`
			`peps_with_dt.sort(reverse=True)`
Add pep2rss, contributed by Jonathan Ellis. 2007-07-08 04:49:54 -04:00
			`# generate rss items for 10 most recent peps`
			`items = []`
Cosmetic patch from Frank Benksten: * Show most recent items in reverse order so the newest item is at the top. 2008-01-15 08:56:50 -05:00			`for dt, full_path in peps_with_dt[:10]:`
Add pep2rss, contributed by Jonathan Ellis. 2007-07-08 04:49:54 -04:00			`try:`
			`n = int(full_path.split('-')[-1].split('.')[0])`
			`except ValueError:`
			`pass`
			`title = firstline_startingwith(full_path, 'Title:')`
			`author = firstline_startingwith(full_path, 'Author:')`
Add PEP abstract to the RSS feed (#1679) * Flake8 fixes * Use first paragraph of abstract as 'description', and PEP author as 'author' * Check RSS generation runs with no error 2021-07-12 07:55:16 -04:00			`abstract = pep_abstract(full_path)`
pep2rss.py: Use HTTPS for RSS links (#1680) 2020-10-23 11:38:15 -04:00			`url = 'https://www.python.org/dev/peps/pep-%0.4d/' % n`
Add pep2rss, contributed by Jonathan Ellis. 2007-07-08 04:49:54 -04:00			`item = rssgen.RSSItem(`
Add PEP abstract to the RSS feed (#1679) * Flake8 fixes * Use first paragraph of abstract as 'description', and PEP author as 'author' * Check RSS generation runs with no error 2021-07-12 07:55:16 -04:00			`title='PEP %d: %s' % (n, title),`
			`link=url,`
			`description=abstract,`
			`author=author,`
			`guid=rssgen.Guid(url),`
			`pubDate=dt)`
Add pep2rss, contributed by Jonathan Ellis. 2007-07-08 04:49:54 -04:00			`items.append(item)`

			`# the rss envelope`
			`desc = """`
			`Newest Python Enhancement Proposals (PEPs) - Information on new`
			`language features, and some meta-information like release`
			`procedure and schedules`
			`""".strip()`
			`rss = rssgen.RSS2(`
Add PEP abstract to the RSS feed (#1679) * Flake8 fixes * Use first paragraph of abstract as 'description', and PEP author as 'author' * Check RSS generation runs with no error 2021-07-12 07:55:16 -04:00			`title='Newest Python PEPs',`
pep2rss.py: Use HTTPS for RSS links (#1680) 2020-10-23 11:38:15 -04:00			`link = 'https://www.python.org/dev/peps/',`
Add PEP abstract to the RSS feed (#1679) * Flake8 fixes * Use first paragraph of abstract as 'description', and PEP author as 'author' * Check RSS generation runs with no error 2021-07-12 07:55:16 -04:00			`description=desc,`
			`lastBuildDate=datetime.datetime.now(),`
			`items=items)`
Add pep2rss, contributed by Jonathan Ellis. 2007-07-08 04:49:54 -04:00
Fix pep2rss to specify output encoding (GH-634) 2018-07-10 10:05:08 -04:00			`with open(RSS_PATH, 'w', encoding="utf-8") as fp:`
			`fp.write(rss.to_xml(encoding="utf-8"))`