Skip to content

Commit 8b83eb2

Browse files
committed
Add Pygments-based syntax highlighting for signatures
1 parent 92d4edc commit 8b83eb2

3 files changed

Lines changed: 127 additions & 3 deletions

File tree

great_docs/assets/post-render.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@
33
import os
44
import re
55

6+
from pygments.lexers import PythonLexer
7+
from pygments.token import Token
8+
69
# Print the working directory
710
print("Current working directory:", os.getcwd())
811

@@ -25,6 +28,77 @@
2528
print("No source links file found, skipping source link injection")
2629

2730

31+
# Mapping from Pygments token types to Quarto/highlight.js class names
32+
PYGMENTS_TO_QUARTO_CLASS = {
33+
Token.Keyword: "kw", # Keywords like def, return, if, etc.
34+
Token.Keyword.Constant: "va", # True, False, None
35+
Token.Name.Function: "fu", # Function names
36+
Token.Name.Class: "fu", # Class names (treat like functions)
37+
Token.Name.Builtin: "bu", # Built-in names like str, int, list, dict
38+
Token.Name.Builtin.Pseudo: "va", # self, cls
39+
Token.Name.Decorator: "at", # @decorator
40+
Token.Name: "op", # Generic names (parameters, variables)
41+
Token.Operator: "op", # Operators like =, |, etc.
42+
Token.Punctuation: "op", # Punctuation like (, ), [, ], etc.
43+
Token.Literal.String: "st", # Strings
44+
Token.Literal.String.Single: "st",
45+
Token.Literal.String.Double: "st",
46+
Token.Literal.String.Doc: "st", # Docstrings
47+
Token.Literal.Number: "dv", # Numbers
48+
Token.Literal.Number.Integer: "dv",
49+
Token.Literal.Number.Float: "fl",
50+
Token.Comment: "co", # Comments
51+
Token.Comment.Single: "co",
52+
}
53+
54+
55+
def get_quarto_class(token_type):
56+
"""Get the Quarto highlight class for a Pygments token type."""
57+
# Check exact match first
58+
if token_type in PYGMENTS_TO_QUARTO_CLASS:
59+
return PYGMENTS_TO_QUARTO_CLASS[token_type]
60+
61+
# Check parent types
62+
for parent in token_type.split():
63+
if parent in PYGMENTS_TO_QUARTO_CLASS:
64+
return PYGMENTS_TO_QUARTO_CLASS[parent]
65+
66+
# Default to no special class
67+
return None
68+
69+
70+
def highlight_signature_with_pygments(signature_text):
71+
"""
72+
Use Pygments to tokenize a Python signature and apply Quarto-compatible highlighting.
73+
74+
This produces syntax highlighting that matches Quarto's code block styling.
75+
"""
76+
lexer = PythonLexer()
77+
tokens = list(lexer.get_tokens(signature_text))
78+
79+
result = []
80+
for token_type, value in tokens:
81+
if not value or value == "\n":
82+
continue
83+
84+
quarto_class = get_quarto_class(token_type)
85+
86+
# HTML-escape the value
87+
escaped_value = (
88+
value.replace("&", "&")
89+
.replace("<", "&lt;")
90+
.replace(">", "&gt;")
91+
.replace('"', "&quot;")
92+
)
93+
94+
if quarto_class:
95+
result.append(f'<span class="{quarto_class}">{escaped_value}</span>')
96+
else:
97+
result.append(escaped_value)
98+
99+
return "".join(result)
100+
101+
28102
def get_source_link_html(item_name):
29103
"""Generate HTML for a source link given an item name."""
30104
if item_name in source_links:
@@ -150,6 +224,49 @@ def reformat_signature(match):
150224
return signature_pattern.sub(reformat_signature, html_content)
151225

152226

227+
def apply_pygments_highlighting(html_content):
228+
"""
229+
Apply Pygments-based syntax highlighting to Python signatures in HTML.
230+
231+
This function finds signature code blocks and replaces quartodoc's highlighting
232+
with Pygments-generated highlighting for consistent, accurate syntax coloring.
233+
"""
234+
# Pattern to match the main signature code block (cb1)
235+
# This matches the entire sourceCode div containing the signature
236+
cb1_pattern = re.compile(
237+
r'(<div class="sourceCode" id="cb1">.*?<code[^>]*>)'
238+
r"(.*?)"
239+
r"(</code></pre></div>)",
240+
re.DOTALL,
241+
)
242+
243+
def replace_with_pygments(match):
244+
pre_code = match.group(1)
245+
code_content = match.group(2)
246+
post_code = match.group(3)
247+
248+
# Extract plain text from the HTML (strip existing span tags)
249+
plain_text = re.sub(r"<[^>]+>", "", code_content)
250+
# Decode HTML entities
251+
plain_text = (
252+
plain_text.replace("&lt;", "<")
253+
.replace("&gt;", ">")
254+
.replace("&amp;", "&")
255+
.replace("&quot;", '"')
256+
)
257+
258+
# Apply Pygments highlighting
259+
highlighted = highlight_signature_with_pygments(plain_text)
260+
261+
# Wrap in a single span with the same ID structure as original
262+
# The signature code typically starts with <span id="cb1-1">
263+
highlighted_wrapped = f'<span id="cb1-1">{highlighted}</span>'
264+
265+
return f"{pre_code}{highlighted_wrapped}{post_code}"
266+
267+
return cb1_pattern.sub(replace_with_pygments, html_content)
268+
269+
153270
def strip_directives_from_html(html_content):
154271
"""
155272
Remove Great Docs %directive lines from rendered HTML.
@@ -270,6 +387,9 @@ def generate_seealso_html(seealso_items):
270387
# Format signatures with multiple arguments onto separate lines
271388
content = format_signature_multiline(content)
272389

390+
# Apply Pygments-based syntax highlighting to signatures
391+
content = apply_pygments_highlighting(content)
392+
273393
# Convert back to lines for line-by-line processing
274394
content = content.splitlines(keepends=True)
275395

great_docs/core.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1608,15 +1608,18 @@ def family_sort_key(family_name: str) -> tuple:
16081608
# Add items without %family to fallback sections
16091609
# Exclude both items with families AND items marked %nodoc
16101610
unassigned_classes = [
1611-
c for c in categories.get("classes", [])
1611+
c
1612+
for c in categories.get("classes", [])
16121613
if c not in items_with_family and c not in excluded_items
16131614
]
16141615
unassigned_functions = [
1615-
f for f in categories.get("functions", [])
1616+
f
1617+
for f in categories.get("functions", [])
16161618
if f not in items_with_family and f not in excluded_items
16171619
]
16181620
unassigned_other = [
1619-
o for o in categories.get("other", [])
1621+
o
1622+
for o in categories.get("other", [])
16201623
if o not in items_with_family and o not in excluded_items
16211624
]
16221625

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ dependencies = [
3333
"pyyaml>=5.4.0",
3434
"click>=8.0.0",
3535
"griffe>=0.35.0",
36+
"pygments>=2.0.0",
3637
]
3738

3839
[project.optional-dependencies]

0 commit comments

Comments
 (0)