|
3 | 3 | import os |
4 | 4 | import re |
5 | 5 |
|
| 6 | +from pygments.lexers import PythonLexer |
| 7 | +from pygments.token import Token |
| 8 | + |
6 | 9 | # Print the working directory |
7 | 10 | print("Current working directory:", os.getcwd()) |
8 | 11 |
|
|
25 | 28 | print("No source links file found, skipping source link injection") |
26 | 29 |
|
27 | 30 |
|
| 31 | +# Mapping from Pygments token types to Quarto/highlight.js class names |
| 32 | +PYGMENTS_TO_QUARTO_CLASS = { |
| 33 | + Token.Keyword: "kw", # Keywords like def, return, if, etc. |
| 34 | + Token.Keyword.Constant: "va", # True, False, None |
| 35 | + Token.Name.Function: "fu", # Function names |
| 36 | + Token.Name.Class: "fu", # Class names (treat like functions) |
| 37 | + Token.Name.Builtin: "bu", # Built-in names like str, int, list, dict |
| 38 | + Token.Name.Builtin.Pseudo: "va", # self, cls |
| 39 | + Token.Name.Decorator: "at", # @decorator |
| 40 | + Token.Name: "op", # Generic names (parameters, variables) |
| 41 | + Token.Operator: "op", # Operators like =, |, etc. |
| 42 | + Token.Punctuation: "op", # Punctuation like (, ), [, ], etc. |
| 43 | + Token.Literal.String: "st", # Strings |
| 44 | + Token.Literal.String.Single: "st", |
| 45 | + Token.Literal.String.Double: "st", |
| 46 | + Token.Literal.String.Doc: "st", # Docstrings |
| 47 | + Token.Literal.Number: "dv", # Numbers |
| 48 | + Token.Literal.Number.Integer: "dv", |
| 49 | + Token.Literal.Number.Float: "fl", |
| 50 | + Token.Comment: "co", # Comments |
| 51 | + Token.Comment.Single: "co", |
| 52 | +} |
| 53 | + |
| 54 | + |
| 55 | +def get_quarto_class(token_type): |
| 56 | + """Get the Quarto highlight class for a Pygments token type.""" |
| 57 | + # Check exact match first |
| 58 | + if token_type in PYGMENTS_TO_QUARTO_CLASS: |
| 59 | + return PYGMENTS_TO_QUARTO_CLASS[token_type] |
| 60 | + |
| 61 | + # Check parent types |
| 62 | + for parent in token_type.split(): |
| 63 | + if parent in PYGMENTS_TO_QUARTO_CLASS: |
| 64 | + return PYGMENTS_TO_QUARTO_CLASS[parent] |
| 65 | + |
| 66 | + # Default to no special class |
| 67 | + return None |
| 68 | + |
| 69 | + |
| 70 | +def highlight_signature_with_pygments(signature_text): |
| 71 | + """ |
| 72 | + Use Pygments to tokenize a Python signature and apply Quarto-compatible highlighting. |
| 73 | +
|
| 74 | + This produces syntax highlighting that matches Quarto's code block styling. |
| 75 | + """ |
| 76 | + lexer = PythonLexer() |
| 77 | + tokens = list(lexer.get_tokens(signature_text)) |
| 78 | + |
| 79 | + result = [] |
| 80 | + for token_type, value in tokens: |
| 81 | + if not value or value == "\n": |
| 82 | + continue |
| 83 | + |
| 84 | + quarto_class = get_quarto_class(token_type) |
| 85 | + |
| 86 | + # HTML-escape the value |
| 87 | + escaped_value = ( |
| 88 | + value.replace("&", "&") |
| 89 | + .replace("<", "<") |
| 90 | + .replace(">", ">") |
| 91 | + .replace('"', """) |
| 92 | + ) |
| 93 | + |
| 94 | + if quarto_class: |
| 95 | + result.append(f'<span class="{quarto_class}">{escaped_value}</span>') |
| 96 | + else: |
| 97 | + result.append(escaped_value) |
| 98 | + |
| 99 | + return "".join(result) |
| 100 | + |
| 101 | + |
28 | 102 | def get_source_link_html(item_name): |
29 | 103 | """Generate HTML for a source link given an item name.""" |
30 | 104 | if item_name in source_links: |
@@ -150,6 +224,49 @@ def reformat_signature(match): |
150 | 224 | return signature_pattern.sub(reformat_signature, html_content) |
151 | 225 |
|
152 | 226 |
|
| 227 | +def apply_pygments_highlighting(html_content): |
| 228 | + """ |
| 229 | + Apply Pygments-based syntax highlighting to Python signatures in HTML. |
| 230 | +
|
| 231 | + This function finds signature code blocks and replaces quartodoc's highlighting |
| 232 | + with Pygments-generated highlighting for consistent, accurate syntax coloring. |
| 233 | + """ |
| 234 | + # Pattern to match the main signature code block (cb1) |
| 235 | + # This matches the entire sourceCode div containing the signature |
| 236 | + cb1_pattern = re.compile( |
| 237 | + r'(<div class="sourceCode" id="cb1">.*?<code[^>]*>)' |
| 238 | + r"(.*?)" |
| 239 | + r"(</code></pre></div>)", |
| 240 | + re.DOTALL, |
| 241 | + ) |
| 242 | + |
| 243 | + def replace_with_pygments(match): |
| 244 | + pre_code = match.group(1) |
| 245 | + code_content = match.group(2) |
| 246 | + post_code = match.group(3) |
| 247 | + |
| 248 | + # Extract plain text from the HTML (strip existing span tags) |
| 249 | + plain_text = re.sub(r"<[^>]+>", "", code_content) |
| 250 | + # Decode HTML entities |
| 251 | + plain_text = ( |
| 252 | + plain_text.replace("<", "<") |
| 253 | + .replace(">", ">") |
| 254 | + .replace("&", "&") |
| 255 | + .replace(""", '"') |
| 256 | + ) |
| 257 | + |
| 258 | + # Apply Pygments highlighting |
| 259 | + highlighted = highlight_signature_with_pygments(plain_text) |
| 260 | + |
| 261 | + # Wrap in a single span with the same ID structure as original |
| 262 | + # The signature code typically starts with <span id="cb1-1"> |
| 263 | + highlighted_wrapped = f'<span id="cb1-1">{highlighted}</span>' |
| 264 | + |
| 265 | + return f"{pre_code}{highlighted_wrapped}{post_code}" |
| 266 | + |
| 267 | + return cb1_pattern.sub(replace_with_pygments, html_content) |
| 268 | + |
| 269 | + |
153 | 270 | def strip_directives_from_html(html_content): |
154 | 271 | """ |
155 | 272 | Remove Great Docs %directive lines from rendered HTML. |
@@ -270,6 +387,9 @@ def generate_seealso_html(seealso_items): |
270 | 387 | # Format signatures with multiple arguments onto separate lines |
271 | 388 | content = format_signature_multiline(content) |
272 | 389 |
|
| 390 | + # Apply Pygments-based syntax highlighting to signatures |
| 391 | + content = apply_pygments_highlighting(content) |
| 392 | + |
273 | 393 | # Convert back to lines for line-by-line processing |
274 | 394 | content = content.splitlines(keepends=True) |
275 | 395 |
|
|
0 commit comments