2222from __future__ import annotations
2323
2424from textwrap import dedent
25+ import xml .etree .ElementTree as etree
2526from . import Extension
2627from ..preprocessors import Preprocessor
28+ from ..blockprocessors import BlockProcessor
2729from .codehilite import CodeHilite , CodeHiliteExtension , parse_hl_lines
2830from .attr_list import get_attrs_and_remainder , AttrListExtension
2931from ..util import parseBoolValue
3335
3436if TYPE_CHECKING : # pragma: no cover
3537 from markdown import Markdown
38+ from ..blockparser import BlockParser
3639
3740
3841class FencedCodeExtension (Extension ):
@@ -44,13 +47,116 @@ def __init__(self, **kwargs):
4447 super ().__init__ (** kwargs )
4548
4649 def extendMarkdown (self , md ):
47- """ Add `FencedBlockPreprocessor` to the Markdown instance. """
50+ """ Add fenced code block processors to the Markdown instance. """
4851 md .registerExtension (self )
4952
53+ # Preprocessor handles top-level fenced code blocks efficiently
5054 md .preprocessors .register (FencedBlockPreprocessor (md , self .getConfigs ()), 'fenced_code_block' , 25 )
5155
56+ # BlockProcessor handles fenced code blocks in nested contexts (e.g., blockquotes)
57+ # Priority 75 places it after HashHeaderProcessor (70) and before CodeBlockProcessor (80)
58+ md .parser .blockprocessors .register (
59+ FencedCodeBlockProcessor (md .parser , md , self .getConfigs ()),
60+ 'fenced_code_block' ,
61+ 75
62+ )
5263
53- class FencedBlockPreprocessor (Preprocessor ):
64+
65+ class FencedCodeMixin :
66+ """
67+ Mixin class providing shared functionality for fenced code block processing.
68+
69+ This mixin contains common methods used by both FencedBlockPreprocessor and
70+ FencedCodeBlockProcessor to avoid code duplication.
71+ """
72+
73+ # List of options to convert to boolean values
74+ BOOL_OPTIONS = [
75+ 'linenums' ,
76+ 'guess_lang' ,
77+ 'noclasses' ,
78+ 'use_pygments'
79+ ]
80+
81+ def _check_for_deps (self ) -> None :
82+ """Check for dependent extensions (CodeHilite, AttrList)."""
83+ if not self .checked_for_deps :
84+ for ext in self .md .registeredExtensions :
85+ if isinstance (ext , CodeHiliteExtension ):
86+ self .codehilite_conf = ext .getConfigs ()
87+ if isinstance (ext , AttrListExtension ):
88+ self .use_attr_list = True
89+ self .checked_for_deps = True
90+
91+ def _handle_attrs (self , attrs : Iterable [tuple [str , str ]]) -> tuple [str , list [str ], dict [str , Any ]]:
92+ """Return tuple: `(id, [list, of, classes], {configs})`"""
93+ id_attr = ''
94+ classes = []
95+ configs = {}
96+ for k , v in attrs :
97+ if k == 'id' :
98+ id_attr = v
99+ elif k == '.' :
100+ classes .append (v )
101+ elif k == 'hl_lines' :
102+ configs [k ] = parse_hl_lines (v )
103+ elif k in self .BOOL_OPTIONS :
104+ configs [k ] = parseBoolValue (v , fail_on_errors = False , preserve_none = True )
105+ else :
106+ configs [k ] = v
107+ return id_attr , classes , configs
108+
109+ def _generate_html (
110+ self , code : str , lang : str | None , id_attr : str , classes : list [str ], config : dict [str , Any ]
111+ ) -> str :
112+ """Generate HTML for the fenced code block."""
113+ if self .codehilite_conf and self .codehilite_conf ['use_pygments' ] and config .get ('use_pygments' , True ):
114+ local_config = self .codehilite_conf .copy ()
115+ local_config .update (config )
116+ # Combine classes with `cssclass`. Ensure `cssclass` is at end
117+ # as Pygments appends a suffix under certain circumstances.
118+ # Ignore ID as Pygments does not offer an option to set it.
119+ if classes :
120+ local_config ['css_class' ] = '{} {}' .format (
121+ ' ' .join (classes ),
122+ local_config ['css_class' ]
123+ )
124+ highliter = CodeHilite (
125+ code ,
126+ lang = lang ,
127+ style = local_config .pop ('pygments_style' , 'default' ),
128+ ** local_config
129+ )
130+ return highliter .hilite (shebang = False )
131+ else :
132+ id_str = lang_str = class_str = kv_pairs = ''
133+ if lang :
134+ prefix = self .config .get ('lang_prefix' , 'language-' )
135+ lang_str = f' class="{ prefix } { _escape_attrib_html (lang )} "'
136+ if classes :
137+ class_str = f' class="{ _escape_attrib_html (" " .join (classes ))} "'
138+ if id_attr :
139+ id_str = f' id="{ _escape_attrib_html (id_attr )} "'
140+ if self .use_attr_list and config and not config .get ('use_pygments' , False ):
141+ # Only assign key/value pairs to code element if `attr_list` extension is enabled, key/value
142+ # pairs were defined on the code block, and the `use_pygments` key was not set to `True`. The
143+ # `use_pygments` key could be either set to `False` or not defined. It is omitted from output.
144+ kv_pairs = '' .join (
145+ f' { k } ="{ _escape_attrib_html (v )} "' for k , v in config .items () if k != 'use_pygments'
146+ )
147+ escaped_code = self ._escape (code )
148+ return f'<pre{ id_str } { class_str } ><code{ lang_str } { kv_pairs } >{ escaped_code } </code></pre>'
149+
150+ def _escape (self , txt : str ) -> str :
151+ """Basic HTML escaping."""
152+ txt = txt .replace ('&' , '&' )
153+ txt = txt .replace ('<' , '<' )
154+ txt = txt .replace ('>' , '>' )
155+ txt = txt .replace ('"' , '"' )
156+ return txt
157+
158+
159+ class FencedBlockPreprocessor (FencedCodeMixin , Preprocessor ):
54160 """ Find and extract fenced code blocks. """
55161
56162 FENCED_BLOCK_RE = re .compile (
@@ -72,39 +178,23 @@ def __init__(self, md: Markdown, config: dict[str, Any]):
72178 self .checked_for_deps = False
73179 self .codehilite_conf : dict [str , Any ] = {}
74180 self .use_attr_list = False
75- # List of options to convert to boolean values
76- self .bool_options = [
77- 'linenums' ,
78- 'guess_lang' ,
79- 'noclasses' ,
80- 'use_pygments'
81- ]
82181
83182 def run (self , lines : list [str ]) -> list [str ]:
84183 """ Match and store Fenced Code Blocks in the `HtmlStash`. """
85-
86- # Check for dependent extensions
87- if not self .checked_for_deps :
88- for ext in self .md .registeredExtensions :
89- if isinstance (ext , CodeHiliteExtension ):
90- self .codehilite_conf = ext .getConfigs ()
91- if isinstance (ext , AttrListExtension ):
92- self .use_attr_list = True
93-
94- self .checked_for_deps = True
184+ self ._check_for_deps ()
95185
96186 text = "\n " .join (lines )
97187 index = 0
98188 while 1 :
99189 m = self .FENCED_BLOCK_RE .search (text , index )
100190 if m :
101- lang , id , classes , config = None , '' , [], {}
191+ lang , id_attr , classes , config = None , '' , [], {}
102192 if m .group ('attrs' ):
103193 attrs , remainder = get_attrs_and_remainder (m .group ('attrs' ))
104194 if remainder : # Does not have correctly matching curly braces, so the syntax is invalid.
105195 index = m .end ('attrs' ) # Explicitly skip over this, to prevent an infinite loop.
106196 continue
107- id , classes , config = self .handle_attrs (attrs )
197+ id_attr , classes , config = self ._handle_attrs (attrs )
108198 if len (classes ):
109199 lang = classes .pop (0 )
110200 else :
@@ -114,45 +204,7 @@ def run(self, lines: list[str]) -> list[str]:
114204 # Support `hl_lines` outside of `attrs` for backward-compatibility
115205 config ['hl_lines' ] = parse_hl_lines (m .group ('hl_lines' ))
116206
117- # If `config` is not empty, then the `codehighlite` extension
118- # is enabled, so we call it to highlight the code
119- if self .codehilite_conf and self .codehilite_conf ['use_pygments' ] and config .get ('use_pygments' , True ):
120- local_config = self .codehilite_conf .copy ()
121- local_config .update (config )
122- # Combine classes with `cssclass`. Ensure `cssclass` is at end
123- # as Pygments appends a suffix under certain circumstances.
124- # Ignore ID as Pygments does not offer an option to set it.
125- if classes :
126- local_config ['css_class' ] = '{} {}' .format (
127- ' ' .join (classes ),
128- local_config ['css_class' ]
129- )
130- highliter = CodeHilite (
131- m .group ('code' ),
132- lang = lang ,
133- style = local_config .pop ('pygments_style' , 'default' ),
134- ** local_config
135- )
136-
137- code = highliter .hilite (shebang = False )
138- else :
139- id_attr = lang_attr = class_attr = kv_pairs = ''
140- if lang :
141- prefix = self .config .get ('lang_prefix' , 'language-' )
142- lang_attr = f' class="{ prefix } { _escape_attrib_html (lang )} "'
143- if classes :
144- class_attr = f' class="{ _escape_attrib_html (" " .join (classes ))} "'
145- if id :
146- id_attr = f' id="{ _escape_attrib_html (id )} "'
147- if self .use_attr_list and config and not config .get ('use_pygments' , False ):
148- # Only assign key/value pairs to code element if `attr_list` extension is enabled, key/value
149- # pairs were defined on the code block, and the `use_pygments` key was not set to `True`. The
150- # `use_pygments` key could be either set to `False` or not defined. It is omitted from output.
151- kv_pairs = '' .join (
152- f' { k } ="{ _escape_attrib_html (v )} "' for k , v in config .items () if k != 'use_pygments'
153- )
154- code = self ._escape (m .group ('code' ))
155- code = f'<pre{ id_attr } { class_attr } ><code{ lang_attr } { kv_pairs } >{ code } </code></pre>'
207+ code = self ._generate_html (m .group ('code' ), lang , id_attr , classes , config )
156208
157209 placeholder = self .md .htmlStash .store (code )
158210 text = f'{ text [:m .start ()]} \n { placeholder } \n { text [m .end ():]} '
@@ -162,31 +214,140 @@ def run(self, lines: list[str]) -> list[str]:
162214 break
163215 return text .split ("\n " )
164216
217+ # Keep handle_attrs as public method for backward compatibility
165218 def handle_attrs (self , attrs : Iterable [tuple [str , str ]]) -> tuple [str , list [str ], dict [str , Any ]]:
166219 """ Return tuple: `(id, [list, of, classes], {configs})` """
167- id = ''
168- classes = []
169- configs = {}
170- for k , v in attrs :
171- if k == 'id' :
172- id = v
173- elif k == '.' :
174- classes .append (v )
175- elif k == 'hl_lines' :
176- configs [k ] = parse_hl_lines (v )
177- elif k in self .bool_options :
178- configs [k ] = parseBoolValue (v , fail_on_errors = False , preserve_none = True )
179- else :
180- configs [k ] = v
181- return id , classes , configs
220+ return self ._handle_attrs (attrs )
182221
183- def _escape (self , txt : str ) -> str :
184- """ basic html escaping """
185- txt = txt .replace ('&' , '&' )
186- txt = txt .replace ('<' , '<' )
187- txt = txt .replace ('>' , '>' )
188- txt = txt .replace ('"' , '"' )
189- return txt
222+
223+ class FencedCodeBlockProcessor (FencedCodeMixin , BlockProcessor ):
224+ """
225+ Process fenced code blocks in nested contexts (e.g., blockquotes).
226+
227+ This BlockProcessor complements FencedBlockPreprocessor by handling
228+ fenced code blocks that are only revealed after block-level processing
229+ (such as stripping '>' from blockquotes).
230+ """
231+
232+ # Pattern to detect start of fenced code block
233+ FENCED_START_RE = re .compile (
234+ r'^(?P<fence>(?:~{3,}|`{3,}))[ ]*' # opening fence
235+ r'((\{(?P<attrs>[^\n]*)\})|' # optional {attrs} or
236+ r'(\.?(?P<lang>[\w#.+-]*)[ ]*)?' # optional (.)lang
237+ r'(hl_lines=(?P<quot>"|\')' # optional hl_lines
238+ r'(?P<hl_lines>.*?)(?P=quot)[ ]*)?)?$'
239+ )
240+
241+ # Pattern to detect end of fenced code block
242+ FENCED_END_RE = re .compile (r'^(?P<fence>(?:~{3,}|`{3,}))[ ]*$' )
243+
244+ def __init__ (self , parser : BlockParser , md : Markdown , config : dict [str , Any ]):
245+ super ().__init__ (parser )
246+ self .md = md
247+ self .config = config
248+ self .checked_for_deps = False
249+ self .codehilite_conf : dict [str , Any ] = {}
250+ self .use_attr_list = False
251+
252+ def test (self , parent : etree .Element , block : str ) -> bool :
253+ """Test if block starts with a fenced code opening."""
254+ return self .FENCED_START_RE .match (block .split ('\n ' , 1 )[0 ]) is not None
255+
256+ def run (self , parent : etree .Element , blocks : list [str ]) -> bool | None :
257+ """Process the fenced code block."""
258+ self ._check_for_deps ()
259+
260+ block = blocks .pop (0 )
261+ lines = block .split ('\n ' )
262+ first_line = lines [0 ]
263+ m_start = self .FENCED_START_RE .match (first_line )
264+
265+ if not m_start :
266+ # Should not happen since test() passed, but be safe
267+ blocks .insert (0 , block )
268+ return False
269+
270+ opening_fence = m_start .group ('fence' )
271+ fence_char = opening_fence [0 ]
272+ fence_len = len (opening_fence )
273+
274+ # Extract language/attrs from opening fence
275+ lang , id_attr , classes , config = None , '' , [], {}
276+ if m_start .group ('attrs' ):
277+ attrs , remainder = get_attrs_and_remainder (m_start .group ('attrs' ))
278+ if remainder :
279+ # Invalid attrs syntax, don't process as fenced code
280+ blocks .insert (0 , block )
281+ return False
282+ id_attr , classes , config = self ._handle_attrs (attrs )
283+ if len (classes ):
284+ lang = classes .pop (0 )
285+ else :
286+ if m_start .group ('lang' ):
287+ lang = m_start .group ('lang' )
288+ if m_start .group ('hl_lines' ):
289+ config ['hl_lines' ] = parse_hl_lines (m_start .group ('hl_lines' ))
290+
291+ # Find the closing fence
292+ code_lines : list [str ] = []
293+ found_end = False
294+
295+ # Check remaining lines in current block
296+ for i , line in enumerate (lines [1 :], start = 1 ):
297+ m_end = self .FENCED_END_RE .match (line )
298+ if m_end :
299+ end_fence = m_end .group ('fence' )
300+ # Closing fence must use same char and be at least as long
301+ if end_fence [0 ] == fence_char and len (end_fence ) >= fence_len :
302+ found_end = True
303+ # Any content after closing fence in this block?
304+ if i + 1 < len (lines ):
305+ remainder = '\n ' .join (lines [i + 1 :])
306+ if remainder .strip ():
307+ blocks .insert (0 , remainder )
308+ break
309+ code_lines .append (line )
310+
311+ # If not found in current block, consume subsequent blocks
312+ while not found_end and blocks :
313+ next_block = blocks .pop (0 )
314+ next_lines = next_block .split ('\n ' )
315+ for i , line in enumerate (next_lines ):
316+ m_end = self .FENCED_END_RE .match (line )
317+ if m_end :
318+ end_fence = m_end .group ('fence' )
319+ if end_fence [0 ] == fence_char and len (end_fence ) >= fence_len :
320+ found_end = True
321+ # Any content after closing fence?
322+ if i + 1 < len (next_lines ):
323+ remainder = '\n ' .join (next_lines [i + 1 :])
324+ if remainder .strip ():
325+ blocks .insert (0 , remainder )
326+ break
327+ code_lines .append (line )
328+ if not found_end :
329+ # Add blank line between blocks (they were separated by \n\n)
330+ code_lines .append ('' )
331+
332+ if not found_end :
333+ # No closing fence found, treat as regular content
334+ blocks .insert (0 , block )
335+ return False
336+
337+ # Build code content
338+ code_content = '\n ' .join (code_lines )
339+ if code_content and not code_content .endswith ('\n ' ):
340+ code_content += '\n '
341+
342+ # Generate HTML and store in HtmlStash
343+ html = self ._generate_html (code_content , lang , id_attr , classes , config )
344+ placeholder = self .md .htmlStash .store (html )
345+
346+ # Create placeholder element
347+ p = etree .SubElement (parent , 'p' )
348+ p .text = placeholder
349+
350+ return True
190351
191352
192353def makeExtension (** kwargs ): # pragma: no cover
0 commit comments