Skip to content

Commit 580c1fe

Browse files
committed
Add BibTeX citations to PEPs
1 parent febb590 commit 580c1fe

File tree

6 files changed

+283
-45
lines changed

6 files changed

+283
-45
lines changed

pep_sphinx_extensions/__init__.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@
99
from docutils.writers.html5_polyglot import HTMLTranslator
1010
from sphinx import environment
1111

12-
from pep_sphinx_extensions.generate_rss import (
13-
create_rss_feed,
14-
get_from_doctree,
15-
pep_abstract,
16-
)
12+
from pep_sphinx_extensions.doctree import get_from_doctree
13+
from pep_sphinx_extensions.generate_bibtex import create_bibtex_files
14+
from pep_sphinx_extensions.generate_rss import create_rss_feed
1715
from pep_sphinx_extensions.pep_processor.html import (
1816
pep_html_builder,
1917
pep_html_translator,
@@ -51,6 +49,7 @@ def _post_build(app: Sphinx, exception: Exception | None) -> None:
5149
if "internal_builder" not in app.tags:
5250
create_index_file(Path(app.outdir), app.builder.name)
5351
create_rss_feed(app.doctreedir, app.outdir)
52+
create_bibtex_files(app.doctreedir, app.outdir)
5453

5554

5655
def set_description(

pep_sphinx_extensions/doctree.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
# This file is placed in the public domain or under the
2+
# CC0-1.0-Universal license, whichever is more permissive.
3+
4+
from __future__ import annotations
5+
6+
import pickle
7+
from pathlib import Path
8+
9+
from docutils import nodes
10+
11+
document_cache: dict[Path, dict[str, str]] = {}
12+
13+
14+
def pep_abstract(document: nodes.document) -> str:
15+
"""Return the first paragraph of the PEP abstract.
16+
If not found, return the first paragraph of the introduction.
17+
"""
18+
introduction = ""
19+
for node in document.findall(nodes.section):
20+
title_node = node.next_node(nodes.title)
21+
if title_node is None:
22+
continue
23+
24+
if title_node.astext() == "Abstract":
25+
if (para_node := node.next_node(nodes.paragraph)) is not None:
26+
return para_node.astext().strip().replace("\n", " ")
27+
return ""
28+
if title_node.astext() == "Introduction":
29+
introduction = node.next_node(nodes.paragraph).astext().strip().replace("\n", " ")
30+
31+
return introduction
32+
33+
34+
def get_from_doctree(full_path: Path, text: str) -> str:
35+
"""Retrieve a header value from a pickled doctree, with caching."""
36+
# Try and retrieve from cache
37+
if full_path in document_cache:
38+
return document_cache[full_path].get(text, "")
39+
40+
# Else load doctree
41+
document = pickle.loads(full_path.read_bytes())
42+
# Store the headers (populated in the PEPHeaders transform)
43+
document_cache[full_path] = path_cache = document.get("headers", {})
44+
# Store the Abstract
45+
path_cache["Abstract"] = pep_abstract(document)
46+
# Return the requested key
47+
return path_cache.get(text, "")
Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
# This file is placed in the public domain or under the
2+
# CC0-1.0-Universal license, whichever is more permissive.
3+
4+
from __future__ import annotations
5+
6+
import re
7+
import textwrap
8+
from pathlib import Path
9+
10+
from pep_sphinx_extensions.doctree import get_from_doctree
11+
12+
# LaTeX special characters that need escaping in BibTeX values
13+
_BIBTEX_SPECIAL = re.compile(r"([&%$#_{}~^])")
14+
_EMAIL_ADDRESS = re.compile(r"\s*<[^>]+>")
15+
16+
17+
def _escape_bibtex(text: str) -> str:
18+
"""Escape special BibTeX characters in a string."""
19+
return _BIBTEX_SPECIAL.sub(r"\\\1", text)
20+
21+
22+
def _parse_created(created: str) -> tuple[str, str]:
23+
"""Parse a PEP 'Created' date string (e.g. '01-Jan-2020') into (year, month).
24+
25+
Returns the year as a string and the BibTeX month abbreviation.
26+
"""
27+
_, month_abbr, year = created.split("-")
28+
return year, month_abbr.lower()
29+
30+
31+
def _format_authors(author_header: str) -> str:
32+
"""Format the Author header value for BibTeX.
33+
34+
Strips email addresses and joins names with " and ".
35+
"""
36+
# Remove email addresses in angle brackets
37+
author_header = _EMAIL_ADDRESS.sub("", author_header)
38+
# Split on commas and clean up
39+
authors = [name.strip() for name in author_header.split(",") if name.strip()]
40+
return " and ".join(authors)
41+
42+
43+
def _generate_bibtex_entry(full_path: Path) -> str:
44+
"""Generate a BibTeX entry for a single PEP from its doctree."""
45+
number = int(get_from_doctree(full_path, "PEP"))
46+
created = get_from_doctree(full_path, "Created")
47+
author = get_from_doctree(full_path, "Author")
48+
title = get_from_doctree(full_path, "Title")
49+
50+
year, month = _parse_created(created)
51+
authors_bibtex = _escape_bibtex(_format_authors(author))
52+
title_escaped = _escape_bibtex(title)
53+
54+
return textwrap.dedent(f"""\
55+
@techreport{{pep{number},
56+
author = "{authors_bibtex}",
57+
title = "PEP {number} --- {title_escaped}",
58+
institution = "Python Software Foundation",
59+
year = "{year}",
60+
month = {month},
61+
type = "PEP",
62+
number = "{number}",
63+
url = "https://peps.python.org/pep-{number:0>4}/",
64+
}}""")
65+
66+
67+
def create_bibtex_files(doctree_dir: str, output_dir: str) -> None:
68+
"""Generate a .bib file for each PEP in the output directory."""
69+
out = Path(output_dir)
70+
for doctree_file in Path(doctree_dir).glob("pep-????.doctree"):
71+
pep_name = doctree_file.stem # for example "pep-0008"
72+
entry = _generate_bibtex_entry(doctree_file)
73+
(out / f"{pep_name}.bib").write_text(entry + "\n", encoding="utf-8")

pep_sphinx_extensions/generate_rss.py

Lines changed: 1 addition & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,11 @@
44
from __future__ import annotations
55

66
import datetime as dt
7-
import pickle
87
from email.utils import format_datetime, getaddresses
98
from html import escape
109
from pathlib import Path
1110

12-
from docutils import nodes
11+
from pep_sphinx_extensions.doctree import get_from_doctree
1312

1413
RSS_DESCRIPTION = (
1514
"Newest Python Enhancement Proposals (PEPs): "
@@ -23,24 +22,6 @@ def _format_rfc_2822(datetime: dt.datetime) -> str:
2322
return format_datetime(datetime, usegmt=True)
2423

2524

26-
document_cache: dict[Path, dict[str, str]] = {}
27-
28-
29-
def get_from_doctree(full_path: Path, text: str) -> str:
30-
# Try and retrieve from cache
31-
if full_path in document_cache:
32-
return document_cache[full_path].get(text, "")
33-
34-
# Else load doctree
35-
document = pickle.loads(full_path.read_bytes())
36-
# Store the headers (populated in the PEPHeaders transform)
37-
document_cache[full_path] = path_cache = document.get("headers", {})
38-
# Store the Abstract
39-
path_cache["Abstract"] = pep_abstract(document)
40-
# Return the requested key
41-
return path_cache.get(text, "")
42-
43-
4425
def pep_creation(full_path: Path) -> dt.datetime:
4526
created_str = get_from_doctree(full_path, "Created")
4627
try:
@@ -49,26 +30,6 @@ def pep_creation(full_path: Path) -> dt.datetime:
4930
return dt.datetime.min
5031

5132

52-
def pep_abstract(document: nodes.document) -> str:
53-
"""Return the first paragraph of the PEP abstract.
54-
If not found, return the first paragraph of the introduction.
55-
"""
56-
introduction = ""
57-
for node in document.findall(nodes.section):
58-
title_node = node.next_node(nodes.title)
59-
if title_node is None:
60-
continue
61-
62-
if title_node.astext() == "Abstract":
63-
if (para_node := node.next_node(nodes.paragraph)) is not None:
64-
return para_node.astext().strip().replace("\n", " ")
65-
return ""
66-
if title_node.astext() == "Introduction":
67-
introduction = node.next_node(nodes.paragraph).astext().strip().replace("\n", " ")
68-
69-
return introduction
70-
71-
7233
def _generate_items(doctree_dir: Path):
7334
# get list of peps with creation time (from "Created:" string in pep source)
7435
peps_with_dt = sorted((pep_creation(path), path) for path in doctree_dir.glob("pep-????.doctree"))

pep_sphinx_extensions/pep_processor/transforms/pep_footer.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ def apply(self) -> None:
5050
self.document += nodes.transition()
5151
self.document += _add_source_link(pep_source_path)
5252
self.document += _add_commit_history_info(pep_source_path)
53+
self.document += _add_bibtex_link(pep_source_path)
5354

5455

5556
def _add_source_link(pep_source_path: Path) -> nodes.paragraph:
@@ -71,6 +72,13 @@ def _add_commit_history_info(pep_source_path: Path) -> nodes.paragraph:
7172
return nodes.paragraph("", "Last modified: ", link_node)
7273

7374

75+
def _add_bibtex_link(pep_source_path: Path) -> nodes.paragraph:
76+
"""Add link to download BibTeX citation."""
77+
bib_url = f"{pep_source_path.stem}.bib"
78+
link_node = nodes.reference("", "BibTeX", refuri=bib_url)
79+
return nodes.paragraph("", "Cite: ", link_node)
80+
81+
7482
def _get_last_modified_timestamps():
7583
# get timestamps and changed files from all commits (without paging results)
7684
args = ("git", "--no-pager", "log", "--format=#%at", "--name-only")
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
from pathlib import Path
2+
from unittest.mock import patch
3+
4+
import pytest
5+
6+
from pep_sphinx_extensions.generate_bibtex import (
7+
_escape_bibtex,
8+
_format_authors,
9+
_generate_bibtex_entry,
10+
_parse_created,
11+
create_bibtex_files,
12+
)
13+
14+
MOCK_TARGET = "pep_sphinx_extensions.generate_bibtex.get_from_doctree"
15+
16+
PEP_8_HEADERS = {
17+
"PEP": "8",
18+
"Title": "Style Guide for Python Code",
19+
"Author": "Guido van Rossum, Barry Warsaw, Alyssa Coghlan",
20+
"Created": "05-Jul-2001",
21+
}
22+
23+
24+
def _mock_doctree(headers: dict[str, str]):
25+
"""Return a mock get_from_doctree that returns values from headers dict."""
26+
return lambda full_path, text: headers.get(text, "")
27+
28+
29+
@pytest.mark.parametrize(
30+
("text", "expected"),
31+
[
32+
("Hello World", "Hello World"),
33+
("Tom & Jerry", r"Tom \& Jerry"),
34+
("100%", r"100\%"),
35+
("$x$", r"\$x\$"),
36+
("C#", r"C\#"),
37+
("snake_case", r"snake\_case"),
38+
("{}", r"\{\}"),
39+
("~tilde", r"\~tilde"),
40+
("no specials", "no specials"),
41+
],
42+
)
43+
def test_escape_bibtex(text: str, expected: str) -> None:
44+
assert _escape_bibtex(text) == expected
45+
46+
47+
@pytest.mark.parametrize(
48+
("created", "expected"),
49+
[
50+
("01-Jan-1990", ("1990", "jan")),
51+
("15-Sep-2021", ("2021", "sep")),
52+
("28-Feb-2000", ("2000", "feb")),
53+
],
54+
)
55+
def test_parse_created(created: str, expected: tuple[str, str]) -> None:
56+
assert _parse_created(created) == expected
57+
58+
59+
@pytest.mark.parametrize(
60+
("author_header", "expected"),
61+
[
62+
("Cardinal Ximénez", "Cardinal Ximénez"),
63+
(
64+
"Cardinal Ximénez <Cardinal.Ximenez@spanish.inquisition>,"
65+
" Cardinal Biggles <Cardinal.Biggles@spanish.inquisition>",
66+
"Cardinal Ximénez and Cardinal Biggles",
67+
),
68+
(
69+
"Cardinal Ximénez,\n Cardinal Biggles",
70+
"Cardinal Ximénez and Cardinal Biggles",
71+
),
72+
(
73+
"Cardinal Ximénez, Cardinal Biggles, Cardinal Fang",
74+
"Cardinal Ximénez and Cardinal Biggles and Cardinal Fang",
75+
),
76+
],
77+
)
78+
def test_format_authors(author_header: str, expected: str) -> None:
79+
assert _format_authors(author_header) == expected
80+
81+
82+
def test_generate_bibtex_entry() -> None:
83+
# Arrange / Act
84+
with patch(MOCK_TARGET, _mock_doctree(PEP_8_HEADERS)):
85+
result = _generate_bibtex_entry(Path("pep-0008.doctree"))
86+
87+
# Assert
88+
assert "@techreport{pep8," in result
89+
assert 'author = "Guido van Rossum and Barry Warsaw and Alyssa Coghlan"' in result
90+
assert 'title = "PEP 8 --- Style Guide for Python Code"' in result
91+
assert 'year = "2001"' in result
92+
assert "month = jul," in result
93+
assert 'number = "8"' in result
94+
assert 'url = "https://peps.python.org/pep-0008/"' in result
95+
96+
97+
def test_generate_bibtex_entry_title_escaped() -> None:
98+
# Arrange
99+
headers = {**PEP_8_HEADERS, "PEP": "999", "Title": "Use of $ & % in PEPs"}
100+
101+
# Act
102+
with patch(MOCK_TARGET, _mock_doctree(headers)):
103+
result = _generate_bibtex_entry(Path("pep-0999.doctree"))
104+
105+
# Assert
106+
assert r"Use of \$ \& \% in PEPs" in result
107+
108+
109+
def test_generate_bibtex_entry_author_escaped() -> None:
110+
# Arrange
111+
headers = {**PEP_8_HEADERS, "Author": "Tom & Jerry <tj@example.com>"}
112+
113+
# Act
114+
with patch(MOCK_TARGET, _mock_doctree(headers)):
115+
result = _generate_bibtex_entry(Path("pep-0008.doctree"))
116+
117+
# Assert
118+
assert r"Tom \& Jerry" in result
119+
120+
121+
def test_create_bibtex_files(tmp_path: Path) -> None:
122+
# Arrange
123+
doctree_dir = tmp_path / "doctrees"
124+
doctree_dir.mkdir()
125+
output_dir = tmp_path / "output"
126+
output_dir.mkdir()
127+
(doctree_dir / "pep-0008.doctree").touch()
128+
129+
# Act
130+
with patch(MOCK_TARGET, _mock_doctree(PEP_8_HEADERS)):
131+
create_bibtex_files(str(doctree_dir), str(output_dir))
132+
133+
# Assert
134+
bib = (output_dir / "pep-0008.bib").read_text()
135+
assert "@techreport{pep8," in bib
136+
assert 'author = "Guido van Rossum and Barry Warsaw and Alyssa Coghlan"' in bib
137+
138+
139+
def test_create_bibtex_files_no_doctrees(tmp_path: Path) -> None:
140+
# Arrange
141+
doctree_dir = tmp_path / "doctrees"
142+
doctree_dir.mkdir()
143+
output_dir = tmp_path / "output"
144+
output_dir.mkdir()
145+
146+
# Act
147+
create_bibtex_files(str(doctree_dir), str(output_dir))
148+
149+
# Assert
150+
assert list(output_dir.glob("*.bib")) == []

0 commit comments

Comments
 (0)