Skip to content

Commit 7c9ad59

Browse files
committed
🛠️ fix(parser): restore list ref defs and quote fences
1 parent 73782e8 commit 7c9ad59

2 files changed

Lines changed: 57 additions & 3 deletions

File tree

sdiff/parser.py

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,11 @@ def _convert_block_token(self, token):
7272
return [self._convert_paragraph_or_heading(token.get('children', []))]
7373
if token_type == 'block_html':
7474
return self._convert_block_html(token)
75-
if token_type in {'thematic_break', 'block_quote', 'block_code', 'fenced_code'}:
75+
if token_type == 'block_quote':
76+
return self._convert_block_quote(token)
77+
if token_type == 'block_code':
78+
return self._convert_block_code(token)
79+
if token_type == 'thematic_break':
7680
return self._convert_passthrough_block(token)
7781
return self._convert_passthrough_block(token)
7882

@@ -109,6 +113,37 @@ def _convert_passthrough_block(self, token):
109113
return [Paragraph([Text(mistune.escape(raw))])]
110114
return []
111115

116+
def _convert_block_quote(self, token):
117+
children = token.get('children', [])
118+
if not children:
119+
return []
120+
content = self._render_inline_children(children)
121+
if not content.strip():
122+
return []
123+
lines = content.splitlines()
124+
quoted = '\n'.join([f'> {line}' if line.strip() else '>' for line in lines])
125+
return [Paragraph([Text(mistune.escape(quoted))])]
126+
127+
def _convert_block_code(self, token):
128+
raw = token.get('raw') or ''
129+
marker = token.get('marker') or '```'
130+
fence = marker if marker else '```'
131+
content = raw.rstrip('\n')
132+
code_block = f'{fence}\n{content}\n{fence}'
133+
return [Paragraph([Text(mistune.escape(code_block))])]
134+
135+
def _render_inline_children(self, children):
136+
parts = []
137+
for child in children:
138+
child_type = child.get('type')
139+
if child_type in {'paragraph', 'block_text'}:
140+
parts.append(self._flatten_inline_text(child.get('children', [])))
141+
else:
142+
raw = child.get('raw') or child.get('text') or ''
143+
if raw:
144+
parts.append(raw)
145+
return '\n'.join([part for part in parts if part is not None])
146+
112147
def _convert_list_item(self, token):
113148
item = ListItem()
114149
for child in token.get('children', []):
@@ -132,7 +167,8 @@ def flush_buffer():
132167
for token in tokens:
133168
token_type = token.get('type')
134169
if token_type in {'text', 'inline_html', 'block_html'}:
135-
buffer += token.get('raw', '')
170+
raw = token.get('raw', '')
171+
buffer += self._reference_definitions.get(raw, raw)
136172
elif token_type == 'codespan':
137173
buffer += f"`{token.get('raw') or token.get('text') or ''}`"
138174
elif token_type == 'softbreak':
@@ -180,7 +216,8 @@ def _flatten_inline_text(self, tokens: Iterable[dict]):
180216
for token in tokens:
181217
token_type = token.get('type')
182218
if token_type in {'text', 'inline_html', 'block_html'}:
183-
parts.append(token.get('raw') or token.get('text') or '')
219+
raw = token.get('raw') or token.get('text') or ''
220+
parts.append(self._reference_definitions.get(raw, raw))
184221
elif token_type == 'codespan':
185222
parts.append(f"`{token.get('raw') or token.get('text') or ''}`")
186223
elif token_type in _INLINE_MARKERS:
@@ -207,6 +244,9 @@ def _convert_paragraph_or_heading(self, inline_tokens: Iterable[dict]):
207244
return Paragraph(self._convert_inline_tokens(inline_tokens))
208245

209246
def _convert_list_block_nodes(self, inline_tokens: Iterable[dict]):
247+
ref_text = self._reference_definition_text(inline_tokens)
248+
if ref_text is not None:
249+
return [Text(ref_text)]
210250
heading = self._heading_from_inline(inline_tokens)
211251
if heading:
212252
return [heading]

tests/test_parser.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,12 @@ def test_reference_definition_preserved(self):
9595
self.assertEqual('[API][id]', link.text)
9696
self.assertEqual('[id]: https://example.com', tree.nodes[1].nodes[0].text)
9797

98+
def test_reference_definition_inside_list_item_preserved(self):
99+
data = '- item\n [id]: https://example.com'
100+
tree = self._parse(data)
101+
list_item = tree.nodes[0].nodes[0]
102+
self.assertIn('[id]: https://example.com', list_item.nodes[0].text)
103+
98104
def test_reference_links_with_whitespace_and_empty_id(self):
99105
data = 'See [API][] and [Ref] [id].\n\n[API]: https://example.com\n[id]: https://example.com'
100106
tree = self._parse(data)
@@ -122,6 +128,14 @@ def test_softbreak_preserves_space(self):
122128
actual = self._parse('hello\nworld')
123129
self.assertEqual('hello world', actual.nodes[0].nodes[0].text)
124130

131+
def test_block_quote_preserves_marker(self):
132+
actual = self._parse('> quote')
133+
self.assertEqual('> quote', actual.nodes[0].nodes[0].text)
134+
135+
def test_fenced_code_preserves_fences(self):
136+
actual = self._parse('```\ncode\n```')
137+
self.assertEqual('```\ncode\n```', actual.nodes[0].nodes[0].text)
138+
125139
def test_ordered_list_parses_as_ordered(self):
126140
tree = self._parse('1. one\n2. two')
127141
list_node = tree.nodes[0]

0 commit comments

Comments
 (0)