Skip to content

Commit de3a00d

Browse files
committed
More HTML fixes
- Have `parse_comment` handle bad `<!--` cases. - `handle_comment` only needs to handle the `</` case for comments. Provide a more reliable way to calculate the index to check for this case. - Ensure `override_comment_update` is reset, just in case. Resolves #1586
1 parent 9933a0a commit de3a00d

2 files changed

Lines changed: 26 additions & 19 deletions

File tree

markdown/htmlparser.py

Lines changed: 18 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@ def reset(self):
126126
self.cleandoc: list[str] = []
127127
self.lineno_start_cache = [0]
128128
self.override_comment_start = 0
129+
self.override_comment_update = False
129130

130131
super().reset()
131132

@@ -275,11 +276,11 @@ def handle_entityref(self, name: str):
275276

276277
def handle_comment(self, data: str):
277278
# Check if the comment is unclosed, if so, we need to override position
278-
i = self.line_offset + self.offset + len(data) + 4
279-
if self.rawdata[i:i + 3] != '-->':
279+
j = len(self.rawdata) - len(data)
280+
i = j - 2
281+
if self.rawdata[i:j] == '</':
280282
self.handle_data('<')
281-
pos = self.line_offset + self.offset
282-
self.override_comment_start = pos - 1 if self.rawdata[pos - 1:pos] == '<' else pos
283+
self.override_comment_start = i
283284
self.override_comment_update = True
284285
return
285286
self.handle_empty_tag('<!--{}-->'.format(data), is_block=True)
@@ -309,21 +310,19 @@ def parse_pi(self, i: int) -> int:
309310
self.handle_data('<?')
310311
return i + 2
311312

312-
if not hasattr(htmlparser, 'commentabruptclose'):
313-
# Internal -- parse comment, return length or -1 if not terminated
314-
# see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
315-
def parse_comment(self, i, report=True):
316-
rawdata = self.rawdata
317-
assert rawdata.startswith('<!--', i), 'unexpected call to parse_comment()'
318-
match = commentclose.search(rawdata, i+4)
319-
if not match:
320-
match = commentabruptclose.match(rawdata, i+4)
321-
if not match:
322-
return -1
323-
if report:
324-
j = match.start()
325-
self.handle_comment(rawdata[i+4: j])
326-
return match.end()
313+
# Internal -- parse comment, return length or -1 if not terminated
314+
# see https://html.spec.whatwg.org/multipage/parsing.html#comment-start-state
315+
def parse_comment(self, i, report=True):
316+
rawdata = self.rawdata
317+
assert rawdata.startswith('<!--', i), 'unexpected call to parse_comment()'
318+
match = commentclose.search(rawdata, i+4)
319+
if not match:
320+
self.handle_data('<')
321+
return i + 1
322+
if report:
323+
j = match.start()
324+
self.handle_comment(rawdata[i+4: j])
325+
return match.end()
327326

328327
def parse_html_declaration(self, i: int) -> int:
329328
if self.at_line_start() or self.intail:

tests/test_syntax/blocks/test_html_blocks.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1698,3 +1698,11 @@ def test_multiple_unclosed_comments_no_hang(self):
16981698
'<!-- and <!--',
16991699
'<p>&lt;!-- and &lt;!--</p>'
17001700
)
1701+
1702+
def test_no_hang_issue_1586(self):
1703+
"""Test no hange condition for issue #1586."""
1704+
1705+
self.assertMarkdownRenders(
1706+
'Test `<!--[if mso]>` and `<!--[if !mso]>`',
1707+
'<p>Test <code>&lt;!--[if mso]&gt;</code> and <code>&lt;!--[if !mso]&gt;</code></p>'
1708+
)

0 commit comments

Comments
 (0)