|
3 | 3 | import os |
4 | 4 | import re |
5 | 5 |
|
| 6 | +from pygments import highlight |
| 7 | +from pygments.formatters import HtmlFormatter |
6 | 8 | from pygments.lexers import PythonLexer |
7 | | -from pygments.token import Token |
8 | 9 |
|
9 | 10 | # Print the working directory |
10 | 11 | print("Current working directory:", os.getcwd()) |
|
28 | 29 | print("No source links file found, skipping source link injection") |
29 | 30 |
|
30 | 31 |
|
31 | | -# Mapping from Pygments token types to Quarto/highlight.js class names |
| 32 | +def get_source_link_html(item_name): |
| 33 | + """Generate HTML for a source link given an item name.""" |
| 34 | + if item_name in source_links: |
| 35 | + url = source_links[item_name]["url"] |
| 36 | + return f'<a href="{url}" class="source-link" target="_blank" rel="noopener">SOURCE</a>' |
| 37 | + return "" |
| 38 | + |
| 39 | + |
| 40 | +# Pygments class to Quarto class mapping |
| 41 | +# Quarto uses different class names than Pygments default |
32 | 42 | PYGMENTS_TO_QUARTO_CLASS = { |
33 | | - Token.Keyword: "kw", # Keywords like def, return, if, etc. |
34 | | - Token.Keyword.Constant: "va", # True, False, None |
35 | | - Token.Name.Function: "fu", # Function names |
36 | | - Token.Name.Class: "fu", # Class names (treat like functions) |
37 | | - Token.Name.Builtin: "bu", # Built-in names like str, int, list, dict |
38 | | - Token.Name.Builtin.Pseudo: "va", # self, cls |
39 | | - Token.Name.Decorator: "at", # @decorator |
40 | | - Token.Name: "op", # Generic names (parameters, variables) |
41 | | - Token.Operator: "op", # Operators like =, |, etc. |
42 | | - Token.Punctuation: "op", # Punctuation like (, ), [, ], etc. |
43 | | - Token.Literal.String: "st", # Strings |
44 | | - Token.Literal.String.Single: "st", |
45 | | - Token.Literal.String.Double: "st", |
46 | | - Token.Literal.String.Doc: "st", # Docstrings |
47 | | - Token.Literal.Number: "dv", # Numbers |
48 | | - Token.Literal.Number.Integer: "dv", |
49 | | - Token.Literal.Number.Float: "fl", |
50 | | - Token.Comment: "co", # Comments |
51 | | - Token.Comment.Single: "co", |
| 43 | + "n": "va", # Name -> variable (generic names) |
| 44 | + "nc": "fu", # Name.Class -> function (we want class names highlighted) |
| 45 | + "nf": "fu", # Name.Function -> function |
| 46 | + "fm": "fu", # Name.Function.Magic -> function |
| 47 | + "nb": "bu", # Name.Builtin -> builtin |
| 48 | + "bp": "bu", # Name.Builtin.Pseudo -> builtin |
| 49 | + "k": "kw", # Keyword -> keyword |
| 50 | + "kc": "cn", # Keyword.Constant -> constant (None, True, False) - will be split further |
| 51 | + "kd": "kw", # Keyword.Declaration -> keyword |
| 52 | + "kn": "kw", # Keyword.Namespace -> keyword |
| 53 | + "kr": "kw", # Keyword.Reserved -> keyword |
| 54 | + "o": "op", # Operator -> operator |
| 55 | + "ow": "op", # Operator.Word -> operator |
| 56 | + "p": "", # Punctuation -> no special class |
| 57 | + "s": "st", # String -> string |
| 58 | + "s1": "st", # String.Single -> string |
| 59 | + "s2": "st", # String.Double -> string |
| 60 | + "mi": "dv", # Number.Integer -> decimal value |
| 61 | + "mf": "fl", # Number.Float -> float |
| 62 | + "c": "co", # Comment -> comment |
| 63 | + "c1": "co", # Comment.Single -> comment |
52 | 64 | } |
53 | 65 |
|
54 | 66 |
|
55 | | -def get_quarto_class(token_type): |
56 | | - """Get the Quarto highlight class for a Pygments token type.""" |
57 | | - # Check exact match first |
58 | | - if token_type in PYGMENTS_TO_QUARTO_CLASS: |
59 | | - return PYGMENTS_TO_QUARTO_CLASS[token_type] |
| 67 | +def highlight_signature_with_pygments(html_content): |
| 68 | + """ |
| 69 | + Re-highlight the main signature block (cb1) with Pygments for better syntax coloring. |
60 | 70 |
|
61 | | - # Check parent types |
62 | | - for parent in token_type.split(): |
63 | | - if parent in PYGMENTS_TO_QUARTO_CLASS: |
64 | | - return PYGMENTS_TO_QUARTO_CLASS[parent] |
| 71 | + This extracts the signature code, highlights it with Pygments, then maps |
| 72 | + the Pygments CSS classes to Quarto's highlighting classes for consistency. |
| 73 | + """ |
| 74 | + # Find the main signature code block (id="cb1") |
| 75 | + cb1_pattern = re.compile( |
| 76 | + r'(<div class="sourceCode" id="cb1">.*?<code class="sourceCode python">)' |
| 77 | + r"(.*?)" |
| 78 | + r"(</code>.*?</div>)", |
| 79 | + re.DOTALL, |
| 80 | + ) |
65 | 81 |
|
66 | | - # Default to no special class |
67 | | - return None |
| 82 | + def replace_signature(match): |
| 83 | + prefix = match.group(1) |
| 84 | + code_content = match.group(2) |
| 85 | + suffix = match.group(3) |
68 | 86 |
|
| 87 | + # Extract plain text from the HTML spans |
| 88 | + # Remove HTML tags but preserve the text content |
| 89 | + plain_code = re.sub(r"<[^>]+>", "", code_content) |
| 90 | + # Clean up the text (unescape HTML entities) |
| 91 | + plain_code = plain_code.replace("<", "<").replace(">", ">").replace("&", "&") |
69 | 92 |
|
70 | | -def highlight_signature_with_pygments(signature_text): |
71 | | - """ |
72 | | - Use Pygments to tokenize a Python signature and apply Quarto-compatible highlighting. |
| 93 | + # Highlight with Pygments |
| 94 | + lexer = PythonLexer() |
| 95 | + # Use a custom formatter that generates short class names |
| 96 | + formatter = HtmlFormatter(nowrap=True, classprefix="") |
73 | 97 |
|
74 | | - This produces syntax highlighting that matches Quarto's code block styling. |
75 | | - """ |
76 | | - lexer = PythonLexer() |
77 | | - tokens = list(lexer.get_tokens(signature_text)) |
78 | | - |
79 | | - result = [] |
80 | | - for token_type, value in tokens: |
81 | | - if not value or value == "\n": |
82 | | - continue |
83 | | - |
84 | | - quarto_class = get_quarto_class(token_type) |
85 | | - |
86 | | - # HTML-escape the value |
87 | | - escaped_value = ( |
88 | | - value.replace("&", "&") |
89 | | - .replace("<", "<") |
90 | | - .replace(">", ">") |
91 | | - .replace('"', """) |
| 98 | + highlighted = highlight(plain_code, lexer, formatter) |
| 99 | + |
| 100 | + # Map Pygments classes to Quarto classes |
| 101 | + for pg_class, quarto_class in PYGMENTS_TO_QUARTO_CLASS.items(): |
| 102 | + if quarto_class: |
| 103 | + highlighted = highlighted.replace(f'class="{pg_class}"', f'class="{quarto_class}"') |
| 104 | + else: |
| 105 | + # Remove empty class attributes |
| 106 | + highlighted = re.sub( |
| 107 | + rf'<span class="{pg_class}">([^<]*)</span>', r"\1", highlighted |
| 108 | + ) |
| 109 | + |
| 110 | + # Special handling for the first line: make method/function name stand out |
| 111 | + # Pattern: ClassName.method_name( or function_name( |
| 112 | + # Replace the name before ( with a function class for better highlighting |
| 113 | + first_line_pattern = re.compile( |
| 114 | + r'^(<span class="va">)(\w+)(</span>)(<span class="op">\.</span>)?' |
| 115 | + r'(<span class="va">)?(\w+)?(</span>)?(\()' |
92 | 116 | ) |
93 | 117 |
|
94 | | - if quarto_class: |
95 | | - result.append(f'<span class="{quarto_class}">{escaped_value}</span>') |
96 | | - else: |
97 | | - result.append(escaped_value) |
| 118 | + def enhance_first_line(m): |
| 119 | + # If there's a dot, it's ClassName.method_name |
| 120 | + if m.group(4): # Has dot |
| 121 | + class_name = m.group(2) |
| 122 | + method_name = m.group(6) or "" |
| 123 | + return ( |
| 124 | + f'<span class="sig-class">{class_name}</span>' |
| 125 | + f'<span class="op">.</span>' |
| 126 | + f'<span class="sig-name">{method_name}</span>(' |
| 127 | + ) |
| 128 | + else: |
| 129 | + # Just function_name( |
| 130 | + func_name = m.group(2) |
| 131 | + return f'<span class="sig-name">{func_name}</span>(' |
98 | 132 |
|
99 | | - return "".join(result) |
| 133 | + highlighted = first_line_pattern.sub(enhance_first_line, highlighted, count=1) |
100 | 134 |
|
| 135 | + # Differentiate None from True/False |
| 136 | + # None gets 'cn-none' class, True/False get 'cn-bool' class |
| 137 | + highlighted = highlighted.replace( |
| 138 | + '<span class="cn">None</span>', '<span class="cn-none">None</span>' |
| 139 | + ) |
| 140 | + highlighted = highlighted.replace( |
| 141 | + '<span class="cn">True</span>', '<span class="cn-bool">True</span>' |
| 142 | + ) |
| 143 | + highlighted = highlighted.replace( |
| 144 | + '<span class="cn">False</span>', '<span class="cn-bool">False</span>' |
| 145 | + ) |
101 | 146 |
|
102 | | -def get_source_link_html(item_name): |
103 | | - """Generate HTML for a source link given an item name.""" |
104 | | - if item_name in source_links: |
105 | | - url = source_links[item_name]["url"] |
106 | | - return f'<a href="{url}" class="source-link" target="_blank" rel="noopener">SOURCE</a>' |
107 | | - return "" |
| 147 | + # Convert single quotes to double quotes in string literals |
| 148 | + # Pygments outputs HTML entities: ' for single quote |
| 149 | + # Match both s1 (string single) and st (after class mapping) classes |
| 150 | + highlighted = re.sub( |
| 151 | + r'<span class="(st|s1)">'([^&]*)'</span>', |
| 152 | + r'<span class="\1">"\2"</span>', |
| 153 | + highlighted, |
| 154 | + ) |
| 155 | + |
| 156 | + # Wrap each line in a span with proper id for line linking |
| 157 | + lines = highlighted.split("\n") |
| 158 | + wrapped_lines = [] |
| 159 | + for i, line in enumerate(lines, 1): |
| 160 | + if line: # Skip empty lines at the end |
| 161 | + wrapped_lines.append( |
| 162 | + f'<span id="cb1-{i}"><a href="#cb1-{i}" aria-hidden="true" tabindex="-1"></a>{line}</span>' |
| 163 | + ) |
| 164 | + |
| 165 | + new_code = "\n".join(wrapped_lines) |
| 166 | + |
| 167 | + return f"{prefix}{new_code}{suffix}" |
| 168 | + |
| 169 | + return cb1_pattern.sub(replace_signature, html_content) |
108 | 170 |
|
109 | 171 |
|
110 | 172 | def format_signature_multiline(html_content): |
@@ -224,49 +286,6 @@ def reformat_signature(match): |
224 | 286 | return signature_pattern.sub(reformat_signature, html_content) |
225 | 287 |
|
226 | 288 |
|
227 | | -def apply_pygments_highlighting(html_content): |
228 | | - """ |
229 | | - Apply Pygments-based syntax highlighting to Python signatures in HTML. |
230 | | -
|
231 | | - This function finds signature code blocks and replaces quartodoc's highlighting |
232 | | - with Pygments-generated highlighting for consistent, accurate syntax coloring. |
233 | | - """ |
234 | | - # Pattern to match the main signature code block (cb1) |
235 | | - # This matches the entire sourceCode div containing the signature |
236 | | - cb1_pattern = re.compile( |
237 | | - r'(<div class="sourceCode" id="cb1">.*?<code[^>]*>)' |
238 | | - r"(.*?)" |
239 | | - r"(</code></pre></div>)", |
240 | | - re.DOTALL, |
241 | | - ) |
242 | | - |
243 | | - def replace_with_pygments(match): |
244 | | - pre_code = match.group(1) |
245 | | - code_content = match.group(2) |
246 | | - post_code = match.group(3) |
247 | | - |
248 | | - # Extract plain text from the HTML (strip existing span tags) |
249 | | - plain_text = re.sub(r"<[^>]+>", "", code_content) |
250 | | - # Decode HTML entities |
251 | | - plain_text = ( |
252 | | - plain_text.replace("<", "<") |
253 | | - .replace(">", ">") |
254 | | - .replace("&", "&") |
255 | | - .replace(""", '"') |
256 | | - ) |
257 | | - |
258 | | - # Apply Pygments highlighting |
259 | | - highlighted = highlight_signature_with_pygments(plain_text) |
260 | | - |
261 | | - # Wrap in a single span with the same ID structure as original |
262 | | - # The signature code typically starts with <span id="cb1-1"> |
263 | | - highlighted_wrapped = f'<span id="cb1-1">{highlighted}</span>' |
264 | | - |
265 | | - return f"{pre_code}{highlighted_wrapped}{post_code}" |
266 | | - |
267 | | - return cb1_pattern.sub(replace_with_pygments, html_content) |
268 | | - |
269 | | - |
270 | 289 | def strip_directives_from_html(html_content): |
271 | 290 | """ |
272 | 291 | Remove Great Docs %directive lines from rendered HTML. |
@@ -384,12 +403,12 @@ def generate_seealso_html(seealso_items): |
384 | 403 | # Strip %directive lines from rendered HTML (safety net for docstring directives) |
385 | 404 | content = strip_directives_from_html(content) |
386 | 405 |
|
| 406 | + # Re-highlight the signature with Pygments for better syntax coloring |
| 407 | + content = highlight_signature_with_pygments(content) |
| 408 | + |
387 | 409 | # Format signatures with multiple arguments onto separate lines |
388 | 410 | content = format_signature_multiline(content) |
389 | 411 |
|
390 | | - # Apply Pygments-based syntax highlighting to signatures |
391 | | - content = apply_pygments_highlighting(content) |
392 | | - |
393 | 412 | # Convert back to lines for line-by-line processing |
394 | 413 | content = content.splitlines(keepends=True) |
395 | 414 |
|
|
0 commit comments