Skip to content

Commit a0be5e5

Browse files
committed
Add pep_rss_gen.py
1 parent c69fdbf commit a0be5e5

File tree

1 file changed

+137
-0
lines changed

1 file changed

+137
-0
lines changed

pep_rss_gen.py

+137
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
import datetime
2+
import email.utils
3+
from pathlib import Path
4+
import re
5+
6+
from dateutil import parser
7+
import docutils.frontend
8+
import docutils.nodes
9+
import docutils.parsers.rst
10+
import docutils.utils
11+
from feedgen import entry
12+
from feedgen import feed
13+
14+
15+
# Monkeypatch feedgen.util.formatRFC2822
16+
def _format_rfc_2822(dt: datetime.datetime) -> str:
17+
return email.utils.format_datetime(dt, usegmt=True)
18+
19+
20+
entry.formatRFC2822 = feed.formatRFC2822 = _format_rfc_2822
21+
line_cache: dict[Path, dict[str, str]] = {}
22+
23+
24+
def first_line_starting_with(full_path: Path, text: str) -> str:
25+
# Try and retrieve from cache
26+
if full_path in line_cache:
27+
return line_cache[full_path].get(text, "")
28+
29+
# Else read source
30+
line_cache[full_path] = path_cache = {}
31+
for line in full_path.open(encoding="utf-8"):
32+
if line.startswith("Created:"):
33+
path_cache["Created:"] = line.removeprefix("Created:").strip()
34+
elif line.startswith("Title:"):
35+
path_cache["Title:"] = line.removeprefix("Title:").strip()
36+
elif line.startswith("Author:"):
37+
path_cache["Author:"] = line.removeprefix("Author:").strip()
38+
39+
# Once all have been found, exit loop
40+
if path_cache.keys == {"Created:", "Title:", "Author:"}:
41+
break
42+
return path_cache.get(text, "")
43+
44+
45+
def pep_creation(full_path: Path) -> datetime.datetime:
46+
created_str = first_line_starting_with(full_path, "Created:")
47+
# bleh, I was hoping to avoid re but some PEPs editorialize on the Created line
48+
# (note as of Aug 2020 only PEP 102 has additional content on the Created line)
49+
m = re.search(r"(\d+[- ][\w\d]+[- ]\d{2,4})", created_str)
50+
if not m:
51+
# some older ones have an empty line, that's okay, if it's old we ipso facto don't care about it.
52+
# "return None" would make the most sense but datetime objects refuse to compare with that. :-|
53+
return datetime.datetime(1900, 1, 1)
54+
created_str = m.group(1)
55+
try:
56+
return parser.parse(created_str, dayfirst=True)
57+
except (ValueError, OverflowError):
58+
return datetime.datetime(1900, 1, 1)
59+
60+
61+
def parse_rst(text: str) -> docutils.nodes.document:
62+
rst_parser = docutils.parsers.rst.Parser()
63+
components = (docutils.parsers.rst.Parser,)
64+
settings = docutils.frontend.OptionParser(components=components).get_default_values()
65+
document = docutils.utils.new_document('<rst-doc>', settings=settings)
66+
rst_parser.parse(text, document)
67+
return document
68+
69+
70+
def pep_abstract(full_path: Path) -> str:
71+
"""Return the first paragraph of the PEP abstract"""
72+
text = full_path.read_text(encoding="utf-8")
73+
for node in parse_rst(text):
74+
if "<title>Abstract</title>" in str(node):
75+
for child in node:
76+
if child.tagname == "paragraph":
77+
return child.astext().strip().replace("\n", " ")
78+
return ""
79+
80+
81+
def main():
82+
# get the directory with the PEP sources
83+
pep_dir = Path(__file__).parent
84+
85+
# get list of peps with creation time (from "Created:" string in pep source)
86+
peps_with_dt = sorted((pep_creation(path), path) for path in pep_dir.glob("pep-????.*"))
87+
88+
# generate rss items for 10 most recent peps
89+
items = []
90+
for dt, full_path in peps_with_dt[-10:]:
91+
try:
92+
pep_num = int(full_path.stem.split("-")[-1])
93+
except ValueError:
94+
continue
95+
96+
title = first_line_starting_with(full_path, "Title:")
97+
author = first_line_starting_with(full_path, "Author:")
98+
parsed_authors = email.utils.getaddresses([author]) if "@" in author else [(author, "")]
99+
url = f"https://www.python.org/dev/peps/pep-{pep_num:0>4}"
100+
101+
item = entry.FeedEntry()
102+
item.title(f"PEP {pep_num}: {title}")
103+
item.link(href=url)
104+
item.description(pep_abstract(full_path))
105+
item.guid(url, permalink=True)
106+
item.published(dt.replace(tzinfo=datetime.timezone.utc)) # ensure datetime has a timezone
107+
item.author([dict(name=parsed_author[0], email=parsed_author[1]) for parsed_author in parsed_authors])
108+
items.append(item)
109+
110+
# The rss envelope
111+
desc = """
112+
Newest Python Enhancement Proposals (PEPs) - Information on new
113+
language features, and some meta-information like release
114+
procedure and schedules.
115+
""".replace("\n ", " ").strip()
116+
117+
# Setup feed generator
118+
fg = feed.FeedGenerator()
119+
fg.language("en")
120+
fg.generator("")
121+
fg.docs("https://cyber.harvard.edu/rss/rss.html")
122+
123+
# Add metadata
124+
fg.title("Newest Python PEPs")
125+
fg.link(href="https://www.python.org/dev/peps")
126+
fg.description(desc)
127+
fg.lastBuildDate(datetime.datetime.utcnow().replace(tzinfo=datetime.timezone.utc))
128+
129+
# Add PEP information (ordered by newest first)
130+
for item in items:
131+
fg.add_entry(item)
132+
133+
pep_dir.joinpath("peps.rss").write_bytes(fg.rss_str(pretty=True))
134+
135+
136+
if __name__ == "__main__":
137+
main()

0 commit comments

Comments
 (0)