Skip to content

Commit bb61471

Browse files
committed
feat: use bleach to enforce allowed tags
1 parent 80e4979 commit bb61471

2 files changed

Lines changed: 17 additions & 3 deletions

File tree

versions/templatetags/whats_new_extras.py

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
import re
22

3+
import bleach
34
from django import template
45
from django.utils.html import escape
56
from django.utils.safestring import mark_safe
@@ -11,6 +12,10 @@
1112
# same position — e.g. `a**b**c` stays a code span, the inner ** is not bolded.
1213
_INLINE_RE = re.compile(r"`([^`]+)`|\*\*([^*]+?)\*\*")
1314

15+
# The only tags inline_markdown is allowed to emit. bleach (already a project
16+
# dependency via wagtail-markdown) is the source of truth for this allowlist.
17+
_ALLOWED_TAGS = ["code", "strong"]
18+
1419

1520
def _replace_span(match):
1621
if match.group(1) is not None:
@@ -23,9 +28,12 @@ def inline_markdown(value):
2328
"""Render inline-code (`...`) and bold (**...**) markdown spans as HTML.
2429
2530
Scoped to what `WHATS_NEW_SYSTEM_PROMPT` permits in description bullets:
26-
code identifiers in single backticks and double-asterisk bold. Everything
27-
else in the input is HTML-escaped.
31+
code identifiers in single backticks and double-asterisk bold. The input is
32+
escaped first so raw markup becomes inert text, the two permitted spans are
33+
converted, then `bleach.clean` enforces the allowlist on the result.
2834
"""
2935
if not value:
3036
return ""
31-
return mark_safe(_INLINE_RE.sub(_replace_span, escape(value)))
37+
html = _INLINE_RE.sub(_replace_span, escape(value))
38+
cleaned = bleach.clean(html, tags=_ALLOWED_TAGS, attributes={}, strip=True)
39+
return mark_safe(cleaned)

versions/tests/test_templatetags.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,3 +62,9 @@ def test_inline_markdown_single_asterisks_left_alone():
6262
def test_inline_markdown_does_not_bold_inside_code_span():
6363
"""`**` inside a code span stays literal — a code span wins over bold."""
6464
assert inline_markdown("`a**b**c`") == "<code>a**b**c</code>"
65+
66+
67+
def test_inline_markdown_strips_disallowed_tags_via_bleach():
68+
"""bleach enforces the <code>/<strong> allowlist; an unescaped angle
69+
bracket inside a bold span is neutralized without dropping content."""
70+
assert inline_markdown("**a<b** test") == "<strong>a&lt;b</strong> test"

0 commit comments

Comments
 (0)