Skip to content

Commit 2c8cc22

Browse files
committed
🛠️ fix(parser): preserve link titles and wrap list parsers
1 parent 73f3144 commit 2c8cc22

2 files changed

Lines changed: 48 additions & 6 deletions

File tree

sdiff/parser.py

Lines changed: 27 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -123,13 +123,17 @@ def flush_buffer():
123123
elif token_type == 'link':
124124
flush_buffer()
125125
text = self._flatten_inline_text(token.get('children', []))
126-
url = token.get('attrs', {}).get('url', '')
127-
nodes.append(Link(f"[{text}]({url})"))
126+
attrs = token.get('attrs', {})
127+
url = attrs.get('url', '')
128+
title = attrs.get('title')
129+
nodes.append(Link(_format_link_markup(text, url, title)))
128130
elif token_type == 'image':
129131
flush_buffer()
130132
alt = token.get('attrs', {}).get('alt') or self._flatten_inline_text(token.get('children', []))
131-
url = token.get('attrs', {}).get('url', '')
132-
nodes.append(Image(f"![{alt}]({url})"))
133+
attrs = token.get('attrs', {})
134+
url = attrs.get('url', '')
135+
title = attrs.get('title')
136+
nodes.append(Image(_format_image_markup(alt, url, title)))
133137
else:
134138
flush_buffer()
135139
children = token.get('children', [])
@@ -290,6 +294,21 @@ def _append_text(nodes, text):
290294
nodes.append(Text(text))
291295

292296

297+
def _format_title(title: str) -> str:
298+
if title is None:
299+
return ''
300+
escaped = title.replace('"', '\\"')
301+
return f' "{escaped}"'
302+
303+
304+
def _format_link_markup(text: str, url: str, title: str | None) -> str:
305+
return f'[{text}]({url}{_format_title(title)})'
306+
307+
308+
def _format_image_markup(alt: str, url: str, title: str | None) -> str:
309+
return f'![{alt}]({url}{_format_title(title)})'
310+
311+
293312
def _is_block_html(raw: str) -> bool:
294313
stripped = raw.lstrip()
295314
if stripped.startswith('<!--'):
@@ -363,4 +382,7 @@ def parse(text, parser_cls: type[MdParser] = MdParser):
363382
parser = parser_cls()
364383
if hasattr(parser, '_set_reference_definitions'):
365384
parser._set_reference_definitions(reference_definitions)
366-
return parser.parse(text)
385+
result = parser.parse(text)
386+
if isinstance(result, list):
387+
return Root(result)
388+
return result

tests/test_parser.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from unittest import TestCase
22
from sdiff import parser, MdParser, ZendeskHelpMdParser
3-
from sdiff.model import ZendeskHelpSteps
3+
from sdiff.model import Paragraph, Root, Text, ZendeskHelpSteps
44

55

66
class ParserTestCase(TestCase):
@@ -76,6 +76,14 @@ def test_link_label_with_codespan(self):
7676
actual = self._parse('[use `foo`](url)')
7777
self.assertEqual('[use `foo`](url)', actual.nodes[0].nodes[0].text)
7878

79+
def test_link_title_preserved(self):
80+
actual = self._parse('[label](https://example.com "Title Here")')
81+
self.assertEqual('[label](https://example.com "Title Here")', actual.nodes[0].nodes[0].text)
82+
83+
def test_image_title_preserved(self):
84+
actual = self._parse('![alt](https://img "Img Title")')
85+
self.assertEqual('![alt](https://img "Img Title")', actual.nodes[0].nodes[0].text)
86+
7987
def test_reference_definition_preserved(self):
8088
data = 'See [API][id].\n\n[id]: https://example.com'
8189
tree = self._parse(data)
@@ -215,3 +223,15 @@ def test_remove_ltr_rtl_marks(self):
215223
text = 'a\u200eb\u200f'
216224
actual = parser._remove_ltr_rtl_marks(text)
217225
self.assertEqual('ab', actual)
226+
227+
228+
class DummyParser:
229+
def parse(self, text):
230+
return [Paragraph([Text(text)])]
231+
232+
233+
class TestParseWrapper(TestCase):
234+
def test_wraps_list_parser_output(self):
235+
tree = parser.parse('hello', parser_cls=DummyParser)
236+
self.assertIsInstance(tree, Root)
237+
self.assertEqual('pt', tree.print_all())

0 commit comments

Comments
 (0)