Skip to content

Commit 4339b68

Browse files
committed
Fix fenced code blocks not rendering inside blockquotes
The fenced code preprocessor only runs once before block parsing, so fenced code blocks inside blockquotes were not detected after the blockquote processor stripped the '>' prefixes. Add a new FencedCodeBlockProcessor (BlockProcessor) that handles fenced code blocks in nested contexts like blockquotes. Extract common functionality into FencedCodeMixin to avoid code duplication between the preprocessor and block processor.
1 parent 9933a0a commit 4339b68

File tree

3 files changed

+368
-82
lines changed

3 files changed

+368
-82
lines changed

docs/changelog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@ See the [Contributing Guide](contributing.md) for details.
1717
* Ensure nested elements inside inline comments are properly unescaped (#1571).
1818
* Make the docs build successfully with mkdocstrings-python 2.0 (#1575).
1919
* Fix infinite loop when multiple bogus or unclosed HTML comments appear in input (#1578).
20+
* Fix fenced code blocks not rendering inside blockquotes.
2021

2122
## [3.10.0] - 2025-11-03
2223

markdown/extensions/fenced_code.py

Lines changed: 243 additions & 82 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,10 @@
2222
from __future__ import annotations
2323

2424
from textwrap import dedent
25+
import xml.etree.ElementTree as etree
2526
from . import Extension
2627
from ..preprocessors import Preprocessor
28+
from ..blockprocessors import BlockProcessor
2729
from .codehilite import CodeHilite, CodeHiliteExtension, parse_hl_lines
2830
from .attr_list import get_attrs_and_remainder, AttrListExtension
2931
from ..util import parseBoolValue
@@ -33,6 +35,7 @@
3335

3436
if TYPE_CHECKING: # pragma: no cover
3537
from markdown import Markdown
38+
from ..blockparser import BlockParser
3639

3740

3841
class FencedCodeExtension(Extension):
@@ -44,13 +47,116 @@ def __init__(self, **kwargs):
4447
super().__init__(**kwargs)
4548

4649
def extendMarkdown(self, md):
47-
""" Add `FencedBlockPreprocessor` to the Markdown instance. """
50+
""" Add fenced code block processors to the Markdown instance. """
4851
md.registerExtension(self)
4952

53+
# Preprocessor handles top-level fenced code blocks efficiently
5054
md.preprocessors.register(FencedBlockPreprocessor(md, self.getConfigs()), 'fenced_code_block', 25)
5155

56+
# BlockProcessor handles fenced code blocks in nested contexts (e.g., blockquotes)
57+
# Priority 75 places it after HashHeaderProcessor (70) and before CodeBlockProcessor (80)
58+
md.parser.blockprocessors.register(
59+
FencedCodeBlockProcessor(md.parser, md, self.getConfigs()),
60+
'fenced_code_block',
61+
75
62+
)
5263

53-
class FencedBlockPreprocessor(Preprocessor):
64+
65+
class FencedCodeMixin:
66+
"""
67+
Mixin class providing shared functionality for fenced code block processing.
68+
69+
This mixin contains common methods used by both FencedBlockPreprocessor and
70+
FencedCodeBlockProcessor to avoid code duplication.
71+
"""
72+
73+
# List of options to convert to boolean values
74+
BOOL_OPTIONS = [
75+
'linenums',
76+
'guess_lang',
77+
'noclasses',
78+
'use_pygments'
79+
]
80+
81+
def _check_for_deps(self) -> None:
82+
"""Check for dependent extensions (CodeHilite, AttrList)."""
83+
if not self.checked_for_deps:
84+
for ext in self.md.registeredExtensions:
85+
if isinstance(ext, CodeHiliteExtension):
86+
self.codehilite_conf = ext.getConfigs()
87+
if isinstance(ext, AttrListExtension):
88+
self.use_attr_list = True
89+
self.checked_for_deps = True
90+
91+
def _handle_attrs(self, attrs: Iterable[tuple[str, str]]) -> tuple[str, list[str], dict[str, Any]]:
92+
"""Return tuple: `(id, [list, of, classes], {configs})`"""
93+
id_attr = ''
94+
classes = []
95+
configs = {}
96+
for k, v in attrs:
97+
if k == 'id':
98+
id_attr = v
99+
elif k == '.':
100+
classes.append(v)
101+
elif k == 'hl_lines':
102+
configs[k] = parse_hl_lines(v)
103+
elif k in self.BOOL_OPTIONS:
104+
configs[k] = parseBoolValue(v, fail_on_errors=False, preserve_none=True)
105+
else:
106+
configs[k] = v
107+
return id_attr, classes, configs
108+
109+
def _generate_html(
110+
self, code: str, lang: str | None, id_attr: str, classes: list[str], config: dict[str, Any]
111+
) -> str:
112+
"""Generate HTML for the fenced code block."""
113+
if self.codehilite_conf and self.codehilite_conf['use_pygments'] and config.get('use_pygments', True):
114+
local_config = self.codehilite_conf.copy()
115+
local_config.update(config)
116+
# Combine classes with `cssclass`. Ensure `cssclass` is at end
117+
# as Pygments appends a suffix under certain circumstances.
118+
# Ignore ID as Pygments does not offer an option to set it.
119+
if classes:
120+
local_config['css_class'] = '{} {}'.format(
121+
' '.join(classes),
122+
local_config['css_class']
123+
)
124+
highliter = CodeHilite(
125+
code,
126+
lang=lang,
127+
style=local_config.pop('pygments_style', 'default'),
128+
**local_config
129+
)
130+
return highliter.hilite(shebang=False)
131+
else:
132+
id_str = lang_str = class_str = kv_pairs = ''
133+
if lang:
134+
prefix = self.config.get('lang_prefix', 'language-')
135+
lang_str = f' class="{prefix}{_escape_attrib_html(lang)}"'
136+
if classes:
137+
class_str = f' class="{_escape_attrib_html(" ".join(classes))}"'
138+
if id_attr:
139+
id_str = f' id="{_escape_attrib_html(id_attr)}"'
140+
if self.use_attr_list and config and not config.get('use_pygments', False):
141+
# Only assign key/value pairs to code element if `attr_list` extension is enabled, key/value
142+
# pairs were defined on the code block, and the `use_pygments` key was not set to `True`. The
143+
# `use_pygments` key could be either set to `False` or not defined. It is omitted from output.
144+
kv_pairs = ''.join(
145+
f' {k}="{_escape_attrib_html(v)}"' for k, v in config.items() if k != 'use_pygments'
146+
)
147+
escaped_code = self._escape(code)
148+
return f'<pre{id_str}{class_str}><code{lang_str}{kv_pairs}>{escaped_code}</code></pre>'
149+
150+
def _escape(self, txt: str) -> str:
151+
"""Basic HTML escaping."""
152+
txt = txt.replace('&', '&amp;')
153+
txt = txt.replace('<', '&lt;')
154+
txt = txt.replace('>', '&gt;')
155+
txt = txt.replace('"', '&quot;')
156+
return txt
157+
158+
159+
class FencedBlockPreprocessor(FencedCodeMixin, Preprocessor):
54160
""" Find and extract fenced code blocks. """
55161

56162
FENCED_BLOCK_RE = re.compile(
@@ -72,39 +178,23 @@ def __init__(self, md: Markdown, config: dict[str, Any]):
72178
self.checked_for_deps = False
73179
self.codehilite_conf: dict[str, Any] = {}
74180
self.use_attr_list = False
75-
# List of options to convert to boolean values
76-
self.bool_options = [
77-
'linenums',
78-
'guess_lang',
79-
'noclasses',
80-
'use_pygments'
81-
]
82181

83182
def run(self, lines: list[str]) -> list[str]:
84183
""" Match and store Fenced Code Blocks in the `HtmlStash`. """
85-
86-
# Check for dependent extensions
87-
if not self.checked_for_deps:
88-
for ext in self.md.registeredExtensions:
89-
if isinstance(ext, CodeHiliteExtension):
90-
self.codehilite_conf = ext.getConfigs()
91-
if isinstance(ext, AttrListExtension):
92-
self.use_attr_list = True
93-
94-
self.checked_for_deps = True
184+
self._check_for_deps()
95185

96186
text = "\n".join(lines)
97187
index = 0
98188
while 1:
99189
m = self.FENCED_BLOCK_RE.search(text, index)
100190
if m:
101-
lang, id, classes, config = None, '', [], {}
191+
lang, id_attr, classes, config = None, '', [], {}
102192
if m.group('attrs'):
103193
attrs, remainder = get_attrs_and_remainder(m.group('attrs'))
104194
if remainder: # Does not have correctly matching curly braces, so the syntax is invalid.
105195
index = m.end('attrs') # Explicitly skip over this, to prevent an infinite loop.
106196
continue
107-
id, classes, config = self.handle_attrs(attrs)
197+
id_attr, classes, config = self._handle_attrs(attrs)
108198
if len(classes):
109199
lang = classes.pop(0)
110200
else:
@@ -114,45 +204,7 @@ def run(self, lines: list[str]) -> list[str]:
114204
# Support `hl_lines` outside of `attrs` for backward-compatibility
115205
config['hl_lines'] = parse_hl_lines(m.group('hl_lines'))
116206

117-
# If `config` is not empty, then the `codehighlite` extension
118-
# is enabled, so we call it to highlight the code
119-
if self.codehilite_conf and self.codehilite_conf['use_pygments'] and config.get('use_pygments', True):
120-
local_config = self.codehilite_conf.copy()
121-
local_config.update(config)
122-
# Combine classes with `cssclass`. Ensure `cssclass` is at end
123-
# as Pygments appends a suffix under certain circumstances.
124-
# Ignore ID as Pygments does not offer an option to set it.
125-
if classes:
126-
local_config['css_class'] = '{} {}'.format(
127-
' '.join(classes),
128-
local_config['css_class']
129-
)
130-
highliter = CodeHilite(
131-
m.group('code'),
132-
lang=lang,
133-
style=local_config.pop('pygments_style', 'default'),
134-
**local_config
135-
)
136-
137-
code = highliter.hilite(shebang=False)
138-
else:
139-
id_attr = lang_attr = class_attr = kv_pairs = ''
140-
if lang:
141-
prefix = self.config.get('lang_prefix', 'language-')
142-
lang_attr = f' class="{prefix}{_escape_attrib_html(lang)}"'
143-
if classes:
144-
class_attr = f' class="{_escape_attrib_html(" ".join(classes))}"'
145-
if id:
146-
id_attr = f' id="{_escape_attrib_html(id)}"'
147-
if self.use_attr_list and config and not config.get('use_pygments', False):
148-
# Only assign key/value pairs to code element if `attr_list` extension is enabled, key/value
149-
# pairs were defined on the code block, and the `use_pygments` key was not set to `True`. The
150-
# `use_pygments` key could be either set to `False` or not defined. It is omitted from output.
151-
kv_pairs = ''.join(
152-
f' {k}="{_escape_attrib_html(v)}"' for k, v in config.items() if k != 'use_pygments'
153-
)
154-
code = self._escape(m.group('code'))
155-
code = f'<pre{id_attr}{class_attr}><code{lang_attr}{kv_pairs}>{code}</code></pre>'
207+
code = self._generate_html(m.group('code'), lang, id_attr, classes, config)
156208

157209
placeholder = self.md.htmlStash.store(code)
158210
text = f'{text[:m.start()]}\n{placeholder}\n{text[m.end():]}'
@@ -162,31 +214,140 @@ def run(self, lines: list[str]) -> list[str]:
162214
break
163215
return text.split("\n")
164216

217+
# Keep handle_attrs as public method for backward compatibility
165218
def handle_attrs(self, attrs: Iterable[tuple[str, str]]) -> tuple[str, list[str], dict[str, Any]]:
166219
""" Return tuple: `(id, [list, of, classes], {configs})` """
167-
id = ''
168-
classes = []
169-
configs = {}
170-
for k, v in attrs:
171-
if k == 'id':
172-
id = v
173-
elif k == '.':
174-
classes.append(v)
175-
elif k == 'hl_lines':
176-
configs[k] = parse_hl_lines(v)
177-
elif k in self.bool_options:
178-
configs[k] = parseBoolValue(v, fail_on_errors=False, preserve_none=True)
179-
else:
180-
configs[k] = v
181-
return id, classes, configs
220+
return self._handle_attrs(attrs)
182221

183-
def _escape(self, txt: str) -> str:
184-
""" basic html escaping """
185-
txt = txt.replace('&', '&amp;')
186-
txt = txt.replace('<', '&lt;')
187-
txt = txt.replace('>', '&gt;')
188-
txt = txt.replace('"', '&quot;')
189-
return txt
222+
223+
class FencedCodeBlockProcessor(FencedCodeMixin, BlockProcessor):
224+
"""
225+
Process fenced code blocks in nested contexts (e.g., blockquotes).
226+
227+
This BlockProcessor complements FencedBlockPreprocessor by handling
228+
fenced code blocks that are only revealed after block-level processing
229+
(such as stripping '>' from blockquotes).
230+
"""
231+
232+
# Pattern to detect start of fenced code block
233+
FENCED_START_RE = re.compile(
234+
r'^(?P<fence>(?:~{3,}|`{3,}))[ ]*' # opening fence
235+
r'((\{(?P<attrs>[^\n]*)\})|' # optional {attrs} or
236+
r'(\.?(?P<lang>[\w#.+-]*)[ ]*)?' # optional (.)lang
237+
r'(hl_lines=(?P<quot>"|\')' # optional hl_lines
238+
r'(?P<hl_lines>.*?)(?P=quot)[ ]*)?)?$'
239+
)
240+
241+
# Pattern to detect end of fenced code block
242+
FENCED_END_RE = re.compile(r'^(?P<fence>(?:~{3,}|`{3,}))[ ]*$')
243+
244+
def __init__(self, parser: BlockParser, md: Markdown, config: dict[str, Any]):
245+
super().__init__(parser)
246+
self.md = md
247+
self.config = config
248+
self.checked_for_deps = False
249+
self.codehilite_conf: dict[str, Any] = {}
250+
self.use_attr_list = False
251+
252+
def test(self, parent: etree.Element, block: str) -> bool:
253+
"""Test if block starts with a fenced code opening."""
254+
return self.FENCED_START_RE.match(block.split('\n', 1)[0]) is not None
255+
256+
def run(self, parent: etree.Element, blocks: list[str]) -> bool | None:
257+
"""Process the fenced code block."""
258+
self._check_for_deps()
259+
260+
block = blocks.pop(0)
261+
lines = block.split('\n')
262+
first_line = lines[0]
263+
m_start = self.FENCED_START_RE.match(first_line)
264+
265+
if not m_start:
266+
# Should not happen since test() passed, but be safe
267+
blocks.insert(0, block)
268+
return False
269+
270+
opening_fence = m_start.group('fence')
271+
fence_char = opening_fence[0]
272+
fence_len = len(opening_fence)
273+
274+
# Extract language/attrs from opening fence
275+
lang, id_attr, classes, config = None, '', [], {}
276+
if m_start.group('attrs'):
277+
attrs, remainder = get_attrs_and_remainder(m_start.group('attrs'))
278+
if remainder:
279+
# Invalid attrs syntax, don't process as fenced code
280+
blocks.insert(0, block)
281+
return False
282+
id_attr, classes, config = self._handle_attrs(attrs)
283+
if len(classes):
284+
lang = classes.pop(0)
285+
else:
286+
if m_start.group('lang'):
287+
lang = m_start.group('lang')
288+
if m_start.group('hl_lines'):
289+
config['hl_lines'] = parse_hl_lines(m_start.group('hl_lines'))
290+
291+
# Find the closing fence
292+
code_lines: list[str] = []
293+
found_end = False
294+
295+
# Check remaining lines in current block
296+
for i, line in enumerate(lines[1:], start=1):
297+
m_end = self.FENCED_END_RE.match(line)
298+
if m_end:
299+
end_fence = m_end.group('fence')
300+
# Closing fence must use same char and be at least as long
301+
if end_fence[0] == fence_char and len(end_fence) >= fence_len:
302+
found_end = True
303+
# Any content after closing fence in this block?
304+
if i + 1 < len(lines):
305+
remainder = '\n'.join(lines[i + 1:])
306+
if remainder.strip():
307+
blocks.insert(0, remainder)
308+
break
309+
code_lines.append(line)
310+
311+
# If not found in current block, consume subsequent blocks
312+
while not found_end and blocks:
313+
next_block = blocks.pop(0)
314+
next_lines = next_block.split('\n')
315+
for i, line in enumerate(next_lines):
316+
m_end = self.FENCED_END_RE.match(line)
317+
if m_end:
318+
end_fence = m_end.group('fence')
319+
if end_fence[0] == fence_char and len(end_fence) >= fence_len:
320+
found_end = True
321+
# Any content after closing fence?
322+
if i + 1 < len(next_lines):
323+
remainder = '\n'.join(next_lines[i + 1:])
324+
if remainder.strip():
325+
blocks.insert(0, remainder)
326+
break
327+
code_lines.append(line)
328+
if not found_end:
329+
# Add blank line between blocks (they were separated by \n\n)
330+
code_lines.append('')
331+
332+
if not found_end:
333+
# No closing fence found, treat as regular content
334+
blocks.insert(0, block)
335+
return False
336+
337+
# Build code content
338+
code_content = '\n'.join(code_lines)
339+
if code_content and not code_content.endswith('\n'):
340+
code_content += '\n'
341+
342+
# Generate HTML and store in HtmlStash
343+
html = self._generate_html(code_content, lang, id_attr, classes, config)
344+
placeholder = self.md.htmlStash.store(html)
345+
346+
# Create placeholder element
347+
p = etree.SubElement(parent, 'p')
348+
p.text = placeholder
349+
350+
return True
190351

191352

192353
def makeExtension(**kwargs): # pragma: no cover

0 commit comments

Comments
 (0)