Skip to content

Commit 7d03da4

Browse files
committed
Update post-render.py
1 parent 8b83eb2 commit 7d03da4

1 file changed

Lines changed: 129 additions & 110 deletions

File tree

great_docs/assets/post-render.py

Lines changed: 129 additions & 110 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
import os
44
import re
55

6+
from pygments import highlight
7+
from pygments.formatters import HtmlFormatter
68
from pygments.lexers import PythonLexer
7-
from pygments.token import Token
89

910
# Print the working directory
1011
print("Current working directory:", os.getcwd())
@@ -28,83 +29,144 @@
2829
print("No source links file found, skipping source link injection")
2930

3031

31-
# Mapping from Pygments token types to Quarto/highlight.js class names
32+
def get_source_link_html(item_name):
33+
"""Generate HTML for a source link given an item name."""
34+
if item_name in source_links:
35+
url = source_links[item_name]["url"]
36+
return f'<a href="{url}" class="source-link" target="_blank" rel="noopener">SOURCE</a>'
37+
return ""
38+
39+
40+
# Pygments class to Quarto class mapping
41+
# Quarto uses different class names than Pygments default
3242
PYGMENTS_TO_QUARTO_CLASS = {
33-
Token.Keyword: "kw", # Keywords like def, return, if, etc.
34-
Token.Keyword.Constant: "va", # True, False, None
35-
Token.Name.Function: "fu", # Function names
36-
Token.Name.Class: "fu", # Class names (treat like functions)
37-
Token.Name.Builtin: "bu", # Built-in names like str, int, list, dict
38-
Token.Name.Builtin.Pseudo: "va", # self, cls
39-
Token.Name.Decorator: "at", # @decorator
40-
Token.Name: "op", # Generic names (parameters, variables)
41-
Token.Operator: "op", # Operators like =, |, etc.
42-
Token.Punctuation: "op", # Punctuation like (, ), [, ], etc.
43-
Token.Literal.String: "st", # Strings
44-
Token.Literal.String.Single: "st",
45-
Token.Literal.String.Double: "st",
46-
Token.Literal.String.Doc: "st", # Docstrings
47-
Token.Literal.Number: "dv", # Numbers
48-
Token.Literal.Number.Integer: "dv",
49-
Token.Literal.Number.Float: "fl",
50-
Token.Comment: "co", # Comments
51-
Token.Comment.Single: "co",
43+
"n": "va", # Name -> variable (generic names)
44+
"nc": "fu", # Name.Class -> function (we want class names highlighted)
45+
"nf": "fu", # Name.Function -> function
46+
"fm": "fu", # Name.Function.Magic -> function
47+
"nb": "bu", # Name.Builtin -> builtin
48+
"bp": "bu", # Name.Builtin.Pseudo -> builtin
49+
"k": "kw", # Keyword -> keyword
50+
"kc": "cn", # Keyword.Constant -> constant (None, True, False) - will be split further
51+
"kd": "kw", # Keyword.Declaration -> keyword
52+
"kn": "kw", # Keyword.Namespace -> keyword
53+
"kr": "kw", # Keyword.Reserved -> keyword
54+
"o": "op", # Operator -> operator
55+
"ow": "op", # Operator.Word -> operator
56+
"p": "", # Punctuation -> no special class
57+
"s": "st", # String -> string
58+
"s1": "st", # String.Single -> string
59+
"s2": "st", # String.Double -> string
60+
"mi": "dv", # Number.Integer -> decimal value
61+
"mf": "fl", # Number.Float -> float
62+
"c": "co", # Comment -> comment
63+
"c1": "co", # Comment.Single -> comment
5264
}
5365

5466

55-
def get_quarto_class(token_type):
56-
"""Get the Quarto highlight class for a Pygments token type."""
57-
# Check exact match first
58-
if token_type in PYGMENTS_TO_QUARTO_CLASS:
59-
return PYGMENTS_TO_QUARTO_CLASS[token_type]
67+
def highlight_signature_with_pygments(html_content):
68+
"""
69+
Re-highlight the main signature block (cb1) with Pygments for better syntax coloring.
6070
61-
# Check parent types
62-
for parent in token_type.split():
63-
if parent in PYGMENTS_TO_QUARTO_CLASS:
64-
return PYGMENTS_TO_QUARTO_CLASS[parent]
71+
This extracts the signature code, highlights it with Pygments, then maps
72+
the Pygments CSS classes to Quarto's highlighting classes for consistency.
73+
"""
74+
# Find the main signature code block (id="cb1")
75+
cb1_pattern = re.compile(
76+
r'(<div class="sourceCode" id="cb1">.*?<code class="sourceCode python">)'
77+
r"(.*?)"
78+
r"(</code>.*?</div>)",
79+
re.DOTALL,
80+
)
6581

66-
# Default to no special class
67-
return None
82+
def replace_signature(match):
83+
prefix = match.group(1)
84+
code_content = match.group(2)
85+
suffix = match.group(3)
6886

87+
# Extract plain text from the HTML spans
88+
# Remove HTML tags but preserve the text content
89+
plain_code = re.sub(r"<[^>]+>", "", code_content)
90+
# Clean up the text (unescape HTML entities)
91+
plain_code = plain_code.replace("&lt;", "<").replace("&gt;", ">").replace("&amp;", "&")
6992

70-
def highlight_signature_with_pygments(signature_text):
71-
"""
72-
Use Pygments to tokenize a Python signature and apply Quarto-compatible highlighting.
93+
# Highlight with Pygments
94+
lexer = PythonLexer()
95+
# Use a custom formatter that generates short class names
96+
formatter = HtmlFormatter(nowrap=True, classprefix="")
7397

74-
This produces syntax highlighting that matches Quarto's code block styling.
75-
"""
76-
lexer = PythonLexer()
77-
tokens = list(lexer.get_tokens(signature_text))
78-
79-
result = []
80-
for token_type, value in tokens:
81-
if not value or value == "\n":
82-
continue
83-
84-
quarto_class = get_quarto_class(token_type)
85-
86-
# HTML-escape the value
87-
escaped_value = (
88-
value.replace("&", "&amp;")
89-
.replace("<", "&lt;")
90-
.replace(">", "&gt;")
91-
.replace('"', "&quot;")
98+
highlighted = highlight(plain_code, lexer, formatter)
99+
100+
# Map Pygments classes to Quarto classes
101+
for pg_class, quarto_class in PYGMENTS_TO_QUARTO_CLASS.items():
102+
if quarto_class:
103+
highlighted = highlighted.replace(f'class="{pg_class}"', f'class="{quarto_class}"')
104+
else:
105+
# Remove empty class attributes
106+
highlighted = re.sub(
107+
rf'<span class="{pg_class}">([^<]*)</span>', r"\1", highlighted
108+
)
109+
110+
# Special handling for the first line: make method/function name stand out
111+
# Pattern: ClassName.method_name( or function_name(
112+
# Replace the name before ( with a function class for better highlighting
113+
first_line_pattern = re.compile(
114+
r'^(<span class="va">)(\w+)(</span>)(<span class="op">\.</span>)?'
115+
r'(<span class="va">)?(\w+)?(</span>)?(\()'
92116
)
93117

94-
if quarto_class:
95-
result.append(f'<span class="{quarto_class}">{escaped_value}</span>')
96-
else:
97-
result.append(escaped_value)
118+
def enhance_first_line(m):
119+
# If there's a dot, it's ClassName.method_name
120+
if m.group(4): # Has dot
121+
class_name = m.group(2)
122+
method_name = m.group(6) or ""
123+
return (
124+
f'<span class="sig-class">{class_name}</span>'
125+
f'<span class="op">.</span>'
126+
f'<span class="sig-name">{method_name}</span>('
127+
)
128+
else:
129+
# Just function_name(
130+
func_name = m.group(2)
131+
return f'<span class="sig-name">{func_name}</span>('
98132

99-
return "".join(result)
133+
highlighted = first_line_pattern.sub(enhance_first_line, highlighted, count=1)
100134

135+
# Differentiate None from True/False
136+
# None gets 'cn-none' class, True/False get 'cn-bool' class
137+
highlighted = highlighted.replace(
138+
'<span class="cn">None</span>', '<span class="cn-none">None</span>'
139+
)
140+
highlighted = highlighted.replace(
141+
'<span class="cn">True</span>', '<span class="cn-bool">True</span>'
142+
)
143+
highlighted = highlighted.replace(
144+
'<span class="cn">False</span>', '<span class="cn-bool">False</span>'
145+
)
101146

102-
def get_source_link_html(item_name):
103-
"""Generate HTML for a source link given an item name."""
104-
if item_name in source_links:
105-
url = source_links[item_name]["url"]
106-
return f'<a href="{url}" class="source-link" target="_blank" rel="noopener">SOURCE</a>'
107-
return ""
147+
# Convert single quotes to double quotes in string literals
148+
# Pygments outputs HTML entities: &#39; for single quote
149+
# Match both s1 (string single) and st (after class mapping) classes
150+
highlighted = re.sub(
151+
r'<span class="(st|s1)">&#39;([^&]*)&#39;</span>',
152+
r'<span class="\1">&quot;\2&quot;</span>',
153+
highlighted,
154+
)
155+
156+
# Wrap each line in a span with proper id for line linking
157+
lines = highlighted.split("\n")
158+
wrapped_lines = []
159+
for i, line in enumerate(lines, 1):
160+
if line: # Skip empty lines at the end
161+
wrapped_lines.append(
162+
f'<span id="cb1-{i}"><a href="#cb1-{i}" aria-hidden="true" tabindex="-1"></a>{line}</span>'
163+
)
164+
165+
new_code = "\n".join(wrapped_lines)
166+
167+
return f"{prefix}{new_code}{suffix}"
168+
169+
return cb1_pattern.sub(replace_signature, html_content)
108170

109171

110172
def format_signature_multiline(html_content):
@@ -224,49 +286,6 @@ def reformat_signature(match):
224286
return signature_pattern.sub(reformat_signature, html_content)
225287

226288

227-
def apply_pygments_highlighting(html_content):
228-
"""
229-
Apply Pygments-based syntax highlighting to Python signatures in HTML.
230-
231-
This function finds signature code blocks and replaces quartodoc's highlighting
232-
with Pygments-generated highlighting for consistent, accurate syntax coloring.
233-
"""
234-
# Pattern to match the main signature code block (cb1)
235-
# This matches the entire sourceCode div containing the signature
236-
cb1_pattern = re.compile(
237-
r'(<div class="sourceCode" id="cb1">.*?<code[^>]*>)'
238-
r"(.*?)"
239-
r"(</code></pre></div>)",
240-
re.DOTALL,
241-
)
242-
243-
def replace_with_pygments(match):
244-
pre_code = match.group(1)
245-
code_content = match.group(2)
246-
post_code = match.group(3)
247-
248-
# Extract plain text from the HTML (strip existing span tags)
249-
plain_text = re.sub(r"<[^>]+>", "", code_content)
250-
# Decode HTML entities
251-
plain_text = (
252-
plain_text.replace("&lt;", "<")
253-
.replace("&gt;", ">")
254-
.replace("&amp;", "&")
255-
.replace("&quot;", '"')
256-
)
257-
258-
# Apply Pygments highlighting
259-
highlighted = highlight_signature_with_pygments(plain_text)
260-
261-
# Wrap in a single span with the same ID structure as original
262-
# The signature code typically starts with <span id="cb1-1">
263-
highlighted_wrapped = f'<span id="cb1-1">{highlighted}</span>'
264-
265-
return f"{pre_code}{highlighted_wrapped}{post_code}"
266-
267-
return cb1_pattern.sub(replace_with_pygments, html_content)
268-
269-
270289
def strip_directives_from_html(html_content):
271290
"""
272291
Remove Great Docs %directive lines from rendered HTML.
@@ -384,12 +403,12 @@ def generate_seealso_html(seealso_items):
384403
# Strip %directive lines from rendered HTML (safety net for docstring directives)
385404
content = strip_directives_from_html(content)
386405

406+
# Re-highlight the signature with Pygments for better syntax coloring
407+
content = highlight_signature_with_pygments(content)
408+
387409
# Format signatures with multiple arguments onto separate lines
388410
content = format_signature_multiline(content)
389411

390-
# Apply Pygments-based syntax highlighting to signatures
391-
content = apply_pygments_highlighting(content)
392-
393412
# Convert back to lines for line-by-line processing
394413
content = content.splitlines(keepends=True)
395414

0 commit comments

Comments
 (0)