Skip to content

Commit cb537f1

Browse files
committed
chore: switch to mistletoe for markdown rendering
This gives us more control on the renderer and avoids not really maintained django-markupfield.
1 parent 425a5a2 commit cb537f1

10 files changed

Lines changed: 263 additions & 19 deletions

File tree

.github/workflows/test.yml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,11 +82,13 @@ jobs:
8282
run: |
8383
# First version with database backends tests
8484
git checkout 7ba420b18ec3c7d32fb71823335e068f15b96855
85+
uv pip install --system -r requirements.txt
8586
./manage.py migrate
8687
./manage.py shell -c 'customer = Customer.objects.create(user_id=-1); payment = Payment.objects.create(customer=customer, amount=1); Payment.objects.create(customer=customer, amount=1, repeat=payment)'
8788
EXISTING_PK=$(./manage.py dumpdata payments.Payment | jq '.[].pk' | tr -d - | sort)
8889
EXISTING_REPEAT=$(./manage.py dumpdata payments.Payment | jq '.[].fields.repeat' | tr -d - | sort)
8990
git checkout "$GITHUB_SHA"
91+
uv pip install --system -r requirements-dev.txt
9092
coverage run ./manage.py migrate
9193
NEW_PK=$(./manage.py dumpdata payments.Payment | jq '.[].pk' | tr -d - | sort)
9294
NEW_REPEAT=$(./manage.py dumpdata payments.Payment | jq '.[].fields.repeat' | tr -d - | sort)

pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,6 @@ module = [
9191
"weblate_language_data.*",
9292
"appconf.*",
9393
"zammad_py.*",
94-
"markupfield.*",
9594
"weasyprint.*"
9695
]
9796

requirements.txt

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ django-appconf==1.2.0
66
django-compressor==4.6.0
77
django-countries==8.2.0
88
django-macros==0.4.0
9-
django-markupfield==2.0.1
109
django-redis==6.0.0
1110
django-stubs-ext==6.0.3
1211
django-vies==6.3.0
@@ -15,7 +14,7 @@ fiobank==4.1.1
1514
hiredis==3.3.1
1615
html2text==2025.4.15
1716
lxml==6.1.0
18-
Markdown==3.10.2
17+
mistletoe==1.5.1
1918
mysqlclient==2.2.8
2019
paramiko==4.0.0
2120
Pillow==12.2.0

weblate_web/markup.py

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
#
2+
# Copyright © Michal Čihař <michal@weblate.org>
3+
#
4+
# This file is part of Weblate <https://weblate.org/>
5+
#
6+
# This program is free software: you can redistribute it and/or modify
7+
# it under the terms of the GNU General Public License as published by
8+
# the Free Software Foundation, either version 3 of the License, or
9+
# (at your option) any later version.
10+
#
11+
# This program is distributed in the hope that it will be useful,
12+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
# GNU General Public License for more details.
15+
#
16+
# You should have received a copy of the GNU General Public License
17+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
18+
#
19+
20+
from __future__ import annotations
21+
22+
import html
23+
24+
# pylint: disable=protected-access
25+
import re
26+
27+
import mistletoe
28+
from django.utils.html import linebreaks
29+
from mistletoe import span_token
30+
31+
32+
class SkipHtmlSpan(span_token.HtmlSpan):
33+
"""Strip raw HTML tags from Markdown input."""
34+
35+
pattern = re.compile( # pylint: disable=protected-access
36+
f"{span_token._open_tag}|{span_token._closing_tag}"
37+
)
38+
parse_inner = False
39+
content: str
40+
41+
def __init__(self, match) -> None:
42+
super().__init__(match)
43+
self.content = ""
44+
45+
46+
class PlainAutoLink(span_token.AutoLink):
47+
"""Autolink only plain HTTP(S) URLs."""
48+
49+
pattern = re.compile(
50+
r"\b(https?://[A-Za-z0-9.!#$%&'*+/=?^_`{|}()~:-]*"
51+
r"[A-Za-z0-9/#%&=+_~:-])(?=\W|$)"
52+
)
53+
54+
55+
class SafeHtmlRenderer(mistletoe.HtmlRenderer):
56+
"""Render Markdown while rejecting raw HTML and unsafe URLs."""
57+
58+
_allowed_url_re = re.compile(r"^https?://", re.IGNORECASE)
59+
_allowed_email_re = re.compile(
60+
r"^(mailto:)?[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$"
61+
)
62+
63+
def __init__(self) -> None:
64+
super().__init__(SkipHtmlSpan, PlainAutoLink, process_html_tokens=False)
65+
66+
def render_skip_html_span(self, token: SkipHtmlSpan) -> str:
67+
return token.content
68+
69+
def render_plain_auto_link(self, token: PlainAutoLink) -> str:
70+
return self.render_auto_link(token)
71+
72+
def render_link(self, token: span_token.Link) -> str:
73+
if self.check_url(token.target):
74+
return super().render_link(token)
75+
return self.escape_html_text(f"[{self.render_to_plain(token)}]({token.target})")
76+
77+
def render_auto_link(self, token: span_token.AutoLink | PlainAutoLink) -> str:
78+
if self.check_url(token.target) or self.check_email(token.target):
79+
return super().render_auto_link(token)
80+
return self.escape_html_text(f"<{token.target}>")
81+
82+
def render_image(self, token: span_token.Image) -> str:
83+
if self.check_url(token.src):
84+
title = f' title="{html.escape(token.title)}"' if token.title else ""
85+
return (
86+
f'<img src="{self.escape_url(token.src)}" '
87+
f'alt="{self.render_to_plain(token)}"{title} />'
88+
)
89+
return self.escape_html_text(f"![{self.render_to_plain(token)}]({token.src})")
90+
91+
def check_url(self, url: str) -> bool:
92+
return bool(self._allowed_url_re.match(url))
93+
94+
def check_email(self, email: str) -> bool:
95+
return bool(self._allowed_email_re.match(email))
96+
97+
98+
def render_markdown(text: str) -> str:
99+
"""Render Markdown as safe HTML."""
100+
try:
101+
with SafeHtmlRenderer() as renderer:
102+
return renderer.render(mistletoe.Document(text))
103+
except Exception: # pylint: disable=broad-exception-caught
104+
return linebreaks(text, autoescape=True)

weblate_web/migrations/0001_squashed_0030_service_note.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
# Generated by Django 3.0.4 on 2020-03-13 12:24
22

33
import django.db.models.deletion
4-
import markupfield.fields
54
from django.conf import settings
65
from django.db import migrations, models
76

@@ -179,7 +178,7 @@ class Migration(migrations.Migration):
179178
("title", models.CharField(max_length=100)),
180179
("slug", models.SlugField(unique=True)),
181180
("timestamp", models.DateTimeField(db_index=True)),
182-
("body", markupfield.fields.MarkupField(rendered_field=True)),
181+
("body", models.TextField()),
183182
(
184183
"body_markup_type",
185184
models.CharField(
Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
#
2+
# Copyright © Michal Čihař <michal@weblate.org>
3+
#
4+
# This file is part of Weblate <https://weblate.org/>
5+
#
6+
# This program is free software: you can redistribute it and/or modify
7+
# it under the terms of the GNU General Public License as published by
8+
# the Free Software Foundation, either version 3 of the License, or
9+
# (at your option) any later version.
10+
#
11+
# This program is distributed in the hope that it will be useful,
12+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
13+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14+
# GNU General Public License for more details.
15+
#
16+
# You should have received a copy of the GNU General Public License
17+
# along with this program. If not, see <https://www.gnu.org/licenses/>.
18+
#
19+
20+
from __future__ import annotations
21+
22+
from django.db import migrations, models
23+
24+
25+
class Migration(migrations.Migration):
26+
dependencies = [
27+
("weblate_web", "0046_alter_service_site_url_lock"),
28+
]
29+
30+
operations = [
31+
migrations.SeparateDatabaseAndState(
32+
state_operations=[
33+
migrations.RenameField(
34+
model_name="post",
35+
old_name="_body_rendered",
36+
new_name="body_rendered",
37+
),
38+
migrations.AlterField(
39+
model_name="post",
40+
name="body_rendered",
41+
field=models.TextField(
42+
blank=True, db_column="_body_rendered", editable=False
43+
),
44+
),
45+
],
46+
),
47+
migrations.RemoveField(
48+
model_name="post",
49+
name="body_markup_type",
50+
),
51+
]

weblate_web/models.py

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,6 @@
4040
from django.utils.functional import cached_property
4141
from django.utils.translation import gettext as _
4242
from django.utils.translation import gettext_lazy, override, pgettext_lazy
43-
from markupfield.fields import MarkupField
4443
from PIL import Image as PILImage
4544

4645
from weblate_web.const import DEFAULT_UPCOMING_PAYMENT_NOTIFICATION_DAYS
@@ -55,6 +54,7 @@
5554
from weblate_web.zammad import create_dedicated_hosting_ticket
5655

5756
from .hetzner import create_storage_folder, create_storage_subaccount, generate_ssh_url
57+
from .markup import render_markdown
5858
from .packages import (
5959
DEDICATED_LIMIT,
6060
DEDICATED_PREFIX,
@@ -464,7 +464,10 @@ class Post(models.Model):
464464
User, editable=False, on_delete=models.deletion.SET_NULL, null=True
465465
)
466466
topic = models.CharField(max_length=100, db_index=True, choices=TOPICS, default="")
467-
body = MarkupField(default_markup_type="markdown")
467+
body = models.TextField()
468+
body_rendered = models.TextField(
469+
blank=True, db_column="_body_rendered", editable=False
470+
)
468471
summary = models.TextField(
469472
blank=True, help_text="Will be generated from first body paragraph if empty"
470473
)
@@ -493,23 +496,31 @@ def save( # type: ignore[override]
493496
using=None,
494497
update_fields=None,
495498
) -> None:
496-
# Need to save first as rendered value is available only then
497-
super().save(
498-
force_insert=force_insert,
499-
force_update=force_update,
500-
using=using,
501-
update_fields=update_fields,
502-
)
499+
self.body_rendered = render_markdown(self.body)
500+
generated_summary = False
503501
if not self.summary:
504502
h2t = html2text.HTML2Text()
505503
h2t.body_width = 0
506504
h2t.ignore_images = True
507505
h2t.ignore_links = True
508506
h2t.ignore_emphasis = True
509-
text = h2t.handle(self.body.rendered) # pylint: disable=no-member
507+
text = h2t.handle(self.body_rendered)
510508
self.summary = text.splitlines()[0]
511-
if self.summary:
512-
super().save(update_fields=["summary"])
509+
generated_summary = bool(self.summary)
510+
511+
if update_fields is not None:
512+
update_fields = set(update_fields)
513+
if "body" in update_fields:
514+
update_fields.add("body_rendered")
515+
if generated_summary:
516+
update_fields.add("summary")
517+
518+
super().save(
519+
force_insert=force_insert,
520+
force_update=force_update,
521+
using=using,
522+
update_fields=update_fields,
523+
)
513524

514525
def get_absolute_url(self):
515526
return reverse("post", kwargs={"slug": self.slug})

weblate_web/templates/weblate_web/post_detail.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ <h1 class="section-title bold" lang="en" dir="ltr">{{ object.title }}</h1>
4343
<br />
4444
{{ object.timestamp|naturalday }}
4545
</div>
46-
<div lang="en" dir="ltr" class="entry">{{ object.body }}</div>
46+
<div lang="en" dir="ltr" class="entry">{{ object.body_rendered|safe }}</div>
4747
</div>
4848
<div class="clear"></div>
4949
<div class="post-footer">

weblate_web/tests.py

Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1714,6 +1714,85 @@ def test_future(self) -> None:
17141714
response = self.client.get(future.get_absolute_url(), follow=True)
17151715
self.assertEqual(response.status_code, 404)
17161716

1717+
def test_archive_escapes_title_and_summary(self) -> None:
1718+
payload = '"><img src=x onerror=prompt(document.domain)>'
1719+
post = Post.objects.create(
1720+
title=payload, slug="xss-title", body="safe", timestamp=timezone.now()
1721+
)
1722+
post.summary = payload
1723+
post.save(update_fields=["summary"])
1724+
1725+
response = self.client.get("/news/", follow=True)
1726+
1727+
self.assertNotContains(response, "<img src=x onerror=prompt(document.domain)>")
1728+
self.assertContains(
1729+
response, "&lt;img src=x onerror=prompt(document.domain)&gt;"
1730+
)
1731+
1732+
def test_body_strips_raw_html(self) -> None:
1733+
payload = '"><img src=x onerror=prompt(document.domain)>'
1734+
post = self.create_post(title="xss-post", body=payload)
1735+
1736+
response = self.client.get(post.get_absolute_url(), follow=True)
1737+
1738+
self.assertNotContains(response, "src=x")
1739+
self.assertNotContains(response, "onerror")
1740+
self.assertContains(response, "&quot;&gt;")
1741+
1742+
def test_body_escapes_unsafe_link(self) -> None:
1743+
post = self.create_post(title="xss-link", body="[link](javascript:alert(1))")
1744+
1745+
response = self.client.get(post.get_absolute_url(), follow=True)
1746+
1747+
self.assertNotContains(response, 'href="javascript:alert(1)"')
1748+
self.assertContains(response, "[link](javascript:alert(1))")
1749+
1750+
def test_body_plain_autolink_boundaries(self) -> None:
1751+
post = self.create_post(
1752+
title="plain-autolink",
1753+
body="Links: https://example.com). and https://example.com/",
1754+
)
1755+
1756+
self.assertIn(
1757+
'<a href="https://example.com">https://example.com</a>).',
1758+
post.body_rendered,
1759+
)
1760+
self.assertIn(
1761+
'<a href="https://example.com/">https://example.com/</a>',
1762+
post.body_rendered,
1763+
)
1764+
1765+
def test_body_escapes_image_url(self) -> None:
1766+
post = self.create_post(
1767+
title="xss-image",
1768+
body='![logo](<https://example.com/" onerror="alert(1)>)',
1769+
)
1770+
1771+
self.assertNotIn('src="https://example.com/" onerror=', post.body_rendered)
1772+
self.assertNotIn(' onerror="alert(1)"', post.body_rendered)
1773+
self.assertIn('src="https://example.com/%22%20onerror=', post.body_rendered)
1774+
1775+
def test_body_escapes_image_alt(self) -> None:
1776+
post = self.create_post(
1777+
title="xss-image-alt",
1778+
body='![" onerror="alert(1)](https://example.com/logo.png)',
1779+
)
1780+
1781+
self.assertNotIn('alt="" onerror="alert(1)"', post.body_rendered)
1782+
self.assertNotIn(' onerror="alert(1)"', post.body_rendered)
1783+
self.assertIn('alt="&quot; onerror=&quot;alert(1)"', post.body_rendered)
1784+
1785+
def test_feed_uses_sanitized_body(self) -> None:
1786+
self.create_post(
1787+
title="xss-feed",
1788+
body='"><img src=x onerror=prompt(document.domain)>',
1789+
)
1790+
1791+
response = self.client.get("/feed/")
1792+
1793+
self.assertNotContains(response, "<img")
1794+
self.assertNotContains(response, "onerror")
1795+
17171796

17181797
class APITest(UserTestCase):
17191798
def test_hosted(self) -> None:

weblate_web/urls.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,7 @@ def item_title(self, item):
8888
return item.title
8989

9090
def item_description(self, item):
91-
return item.body.rendered
91+
return item.body_rendered
9292

9393
def item_pubdate(self, item):
9494
return item.timestamp

0 commit comments

Comments
 (0)