Skip to content

Commit c1a4271

Browse files
committed
security: close 2 new CodeQL alerts (ReDoS + HTTP response splitting)
py/polynomial-redos (wiki_export.py:142) The frontmatter key:value matcher r"^([A-Za-z_][\w-]*)\s*:\s*(.*)$" has two repetition steps whose ranges overlap on adversarial input like "A:" followed by N tabs, producing O(n²) backtracking. Replaced with str.partition(":") + a tight bounded key validator r"^[A-Za-z_][\w-]{0,62}$". No quantifier overlap, no backtracking hazard. 100 000-tab input now processes in sub-millisecond time. Added a 1000-char per-line sanity cap as defence in depth. py/http-response-splitting (http_viz_server.py:352 + http_standalone mirror) The Content-Disposition filename was built by literal f-string interpolation of a user-controlled path component. CRLF in the filename would enable header injection (Set-Cookie, cache poisoning, etc.). Sanitized to [A-Za-z0-9._-] only (re.sub r"[^\w.-]", "_") and capped at 200 chars before the header. The format suffix gets the same treatment (guard on our own constant map, but belt-and-braces against future changes).
1 parent efa501c commit c1a4271

3 files changed

Lines changed: 38 additions & 12 deletions

File tree

mcp_server/handlers/wiki_export.py

Lines changed: 19 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,16 @@ def _read_body(wiki_root: Path, rel_path: str | None, body: str | None) -> str:
118118
return content
119119

120120

121+
# Tight key validator — anchored, bounded length, no backtracking.
122+
# Replaces an earlier full-line regex that CodeQL flagged as
123+
# polynomial-redos (py/polynomial-redos): `\s*` + `.*$` combined on
124+
# an unbounded user-provided line could backtrack quadratically on
125+
# adversarial inputs such as "A:\t\t\t…". Using str.partition() for
126+
# the split eliminates the overlapping quantifiers entirely.
127+
_FM_KEY_RE = re.compile(r"^[A-Za-z_][\w-]{0,62}$")
128+
_FM_MAX_LINE = 1000 # sanity cap — valid frontmatter keys never approach this
129+
130+
121131
def _split_frontmatter(markdown: str) -> tuple[dict, str]:
122132
"""Parse a best-effort frontmatter block out of ``markdown``.
123133
@@ -137,16 +147,17 @@ def _split_frontmatter(markdown: str) -> tuple[dict, str]:
137147
body = markdown[end + 4 :].lstrip("\n")
138148
fields: dict[str, str] = {}
139149
for line in raw.splitlines():
140-
# Match `key: value` with value running to end-of-line; values
141-
# can contain any character including further colons.
142-
m = re.match(r"^([A-Za-z_][\w-]*)\s*:\s*(.*)$", line)
143-
if not m:
150+
if len(line) > _FM_MAX_LINE or ":" not in line:
151+
continue
152+
key, _, value = line.partition(":")
153+
key = key.strip()
154+
value = value.strip()
155+
if not _FM_KEY_RE.match(key):
144156
continue
145-
k, v = m.group(1), m.group(2).strip()
146157
# Strip surrounding quotes if present; otherwise keep as-is.
147-
if len(v) >= 2 and v[0] == v[-1] and v[0] in ("'", '"'):
148-
v = v[1:-1]
149-
fields[k] = v
158+
if len(value) >= 2 and value[0] == value[-1] and value[0] in ("'", '"'):
159+
value = value[1:-1]
160+
fields[key] = value
150161
return fields, body
151162

152163

mcp_server/server/http_standalone.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -446,12 +446,19 @@ def _serve_wiki_export(self):
446446
self.wfile.write(body)
447447
return
448448
data = base64.b64decode(result["content_base64"])
449-
filename = (rel_path.split("/")[-1] or "page").rsplit(".", 1)[0]
449+
# Sanitize — rel_path is user-controlled, mustn't
450+
# carry \r\n into the response header (CodeQL
451+
# py/http-response-splitting).
452+
import re as _re_safe
453+
454+
raw_name = (rel_path.split("/")[-1] or "page").rsplit(".", 1)[0]
455+
filename = _re_safe.sub(r"[^\w.-]", "_", raw_name)[:200] or "page"
456+
fmt_out = _re_safe.sub(r"[^\w]", "", result["format"])[:16]
450457
self.send_response(200)
451458
self.send_header("Content-Type", result["mime"])
452459
self.send_header(
453460
"Content-Disposition",
454-
f'attachment; filename="{filename}.{result["format"]}"',
461+
f'attachment; filename="{filename}.{fmt_out}"',
455462
)
456463
self.send_header("Content-Length", str(len(data)))
457464
self.send_header("Access-Control-Allow-Origin", "*")

mcp_server/server/http_viz_server.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -344,12 +344,20 @@ def _serve_wiki_export(self):
344344
send_json_response(self, result)
345345
return
346346
data = base64.b64decode(result["content_base64"])
347-
filename = (rel_path.split("/")[-1] or "page").rsplit(".", 1)[0]
347+
# Sanitize filename before it flows into the
348+
# Content-Disposition header — rel_path is user-
349+
# controlled via the ?path= query string. CodeQL
350+
# py/http-response-splitting: any \r\n in the value
351+
# would allow header injection. Allow only safe
352+
# filesystem chars and cap length.
353+
raw_name = (rel_path.split("/")[-1] or "page").rsplit(".", 1)[0]
354+
filename = re.sub(r"[^\w.-]", "_", raw_name)[:200] or "page"
355+
fmt_out = re.sub(r"[^\w]", "", result["format"])[:16]
348356
self.send_response(200)
349357
self.send_header("Content-Type", result["mime"])
350358
self.send_header(
351359
"Content-Disposition",
352-
f'attachment; filename="{filename}.{result["format"]}"',
360+
f'attachment; filename="{filename}.{fmt_out}"',
353361
)
354362
self.send_header("Content-Length", str(len(data)))
355363
self.send_header("Access-Control-Allow-Origin", "*")

0 commit comments

Comments
 (0)