Skip to content

Commit 4e43342

Browse files
PttCodingManclaude
andcommitted
fix: harden security and async correctness
- media: extend SVG upload filter to also block <use>, <animate>, <set>; these elements can pull external SVG (xlink:href) or mutate href to javascript: at runtime, slipping past the existing script/foreign object check - media: add a comment in the unauthenticated /api/media/{filename} fallback explaining the asymmetry with /api/pages — this branch is the support path for images embedded in is_public pages and is intentionally not gated by ANONYMOUS_READ - auth_router: add a 0.5s asyncio.sleep on failed login on top of the existing in-memory counter; the counter resets on process restart, but the sleep cost compounds even across restarts so brute-force throughput stays bounded - notifications: clarify the webhook dispatch model — validate_webhook_url is already re-checked just before each httpx.post so DNS-rebinding rebinds are caught at dispatch (the prior comment understated this); the residual TOCTOU window can't be closed without breaking HTTPS cert verification, which is acceptable for an admin-only surface - export: run _inline_media_srcs + html rendering and zip compression on a thread; both do synchronous file I/O / CPU work that blocked the event loop for the duration of the export Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 53939d9 commit 4e43342

4 files changed

Lines changed: 46 additions & 11 deletions

File tree

backend/app/routers/auth_router.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import asyncio
12
import logging
23
import time
34
from collections import defaultdict
@@ -91,6 +92,11 @@ async def login(body: LoginRequest, request: Request, response: Response):
9192
logger.warning(
9293
"login failed for user=%r ip=%s", body.username, client_ip(request)
9394
)
95+
# The in-memory counter resets on process restart, so add a fixed
96+
# latency cost on top of bcrypt to keep brute-force throughput low
97+
# even in restart-loop scenarios. Tiny enough that a real user
98+
# mistyping their password barely notices.
99+
await asyncio.sleep(0.5)
94100
raise HTTPException(status_code=401, detail="Invalid credentials")
95101

96102
logger.info("login ok user=%s role=%s", user["username"], user["role"])

backend/app/routers/export.py

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import asyncio
12
import base64
23
import html as _html
34
import io
@@ -255,7 +256,12 @@ async def export_page(
255256
page = dict(rows[0])
256257
if await resolve_page_permission(db, user, page["id"]) == "none":
257258
raise HTTPException(status_code=404, detail="Page not found")
258-
html_content = _inline_media_srcs(md_to_simple_html(page["content_md"]))
259+
# Render + inline media in a thread: _inline_media_srcs reads files
260+
# synchronously and md_to_simple_html does CPU work. Pages with many
261+
# embedded images would otherwise stall the event loop.
262+
html_content = await asyncio.to_thread(
263+
lambda: _inline_media_srcs(md_to_simple_html(page["content_md"]))
264+
)
259265
# Escape title/slug: they're rendered into <title>, <h1>, and a meta
260266
# breadcrumb as raw text, so a page title containing e.g. `<script>` would
261267
# execute when the exported HTML is opened locally (file:// context).
@@ -316,12 +322,17 @@ async def export_site(
316322
"SELECT id, slug, title, content_md FROM pages WHERE deleted_at IS NULL ORDER BY title"
317323
)
318324

319-
buf = io.BytesIO()
320-
with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
321-
for filename, content in build_site_files(pages):
322-
zf.writestr(filename, content)
325+
# File reads (_inline_media_srcs), HTML rendering, and zip compression
326+
# are all blocking; do the whole bundle off the event loop.
327+
def _build_zip() -> io.BytesIO:
328+
out = io.BytesIO()
329+
with zipfile.ZipFile(out, "w", zipfile.ZIP_DEFLATED) as zf:
330+
for filename, content in build_site_files(pages):
331+
zf.writestr(filename, content)
332+
out.seek(0)
333+
return out
323334

324-
buf.seek(0)
335+
buf = await asyncio.to_thread(_build_zip)
325336
return StreamingResponse(
326337
buf,
327338
media_type="application/zip",

backend/app/routers/media.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,14 @@
2323
# still force `Content-Disposition: attachment` + `X-Content-Type-Options:
2424
# nosniff` when serving SVG, so a crafted file can never run as HTML in the
2525
# app's origin even if it slips through this filter.
26+
# - <script>, <foreignObject>, <iframe>: direct script injection
27+
# - <use>: can pull external SVG via xlink:href / href
28+
# - <animate>, <set>: can mutate href to javascript: at runtime
29+
# - on*= attributes: inline event handlers
30+
# - javascript: URI in any attribute (covers <a href="javascript:...">)
2631
_SVG_SCRIPT_RE = re.compile(
27-
rb"<\s*script|on[a-z]+\s*=|javascript:|<\s*foreignObject|<\s*iframe",
32+
rb"<\s*(?:script|use|animate|set|foreignObject|iframe)[\s>/]"
33+
rb"|on[a-z]+\s*=|javascript:",
2834
re.IGNORECASE,
2935
)
3036

@@ -291,7 +297,13 @@ async def get_media(filename: str, request: Request):
291297
user = anonymous_user()
292298
else:
293299
# Flag off → fall back to the legacy is_public path so existing
294-
# share-a-public-page deployments keep working.
300+
# share-a-public-page deployments keep working. This is the
301+
# asymmetric piece: /api/pages/{slug} returns 401 for the same
302+
# unauthenticated request, but a public page is served by
303+
# routers/public.py which embeds <img src="/api/media/...">,
304+
# and those tags must resolve without a session. Keep the two
305+
# paths aligned on what counts as "public": only media that
306+
# lives on a page with is_public=1.
295307
if not await _media_is_public(db, filename):
296308
raise HTTPException(status_code=404, detail="File not found")
297309
return _safe_media_response(filepath)

backend/app/services/notifications.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -151,9 +151,15 @@ async def _post_many(urls: list[str], payload: dict) -> None:
151151

152152

153153
async def _post_one(client: httpx.AsyncClient, url: str, payload: dict) -> None:
154-
# Re-check at dispatch time — the stored URL's hostname may have started
155-
# resolving to an internal address since it was saved. Silently drop
156-
# rather than contribute to the attacker's oracle.
154+
# Defence in depth against DNS rebinding. validate_webhook_url ran at
155+
# store-time, but TTL=0 lets an attacker flip the answer between then
156+
# and now; re-validate immediately before each request so the resolved
157+
# IP we accept is a fresh one. Doing this just before client.post
158+
# narrows (but cannot fully close) the TOCTOU window — httpx will
159+
# resolve the host again at connect time. Closing that final gap needs
160+
# socket-level peer validation, which breaks SNI/cert verification for
161+
# HTTPS webhooks; given webhooks are admin-only and the attack surface
162+
# is post-compromise pivot, the narrowed window is acceptable.
157163
try:
158164
validate_webhook_url(url)
159165
except ValueError as exc:

0 commit comments

Comments
 (0)