Skip to content

Commit 1500ca3

Browse files
committed
fix: correct lenient query matching for +, fragments, and standalone {&var}
Three fixes to the two-phase query matching path: Replaced parse_qs with a manual &/= split using unquote(). parse_qs follows application/x-www-form-urlencoded semantics where + decodes to space, but RFC 6570 and RFC 3986 treat + as a literal sub-delim. A client sending ?q=C++ previously got 'C '; the path-portion decoder (unquote) already handled this correctly, so the two code paths disagreed. Fragment is now stripped before splitting on ?. A URI like logs://api?level=error#section1 previously returned level='error#section1' via the lenient path while the strict-regex path correctly stopped at #. _split_query_tail now requires the trailing tail to start with a {?...} expression. A standalone {&page} expands with an & prefix (no ?), so partition('?') found no split and the path regex failed. Such templates now fall through to strict regex which handles them correctly. Also extends the path-portion check to bail on {?...} expressions left in the path, not just literal ?.
1 parent 7891fd9 commit 1500ca3

File tree

2 files changed

+54
-11
lines changed

2 files changed

+54
-11
lines changed

src/mcp/shared/uri_template.py

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,7 @@
3939
from collections.abc import Mapping, Sequence
4040
from dataclasses import dataclass, field
4141
from typing import Literal, cast
42-
from urllib.parse import parse_qs, quote, unquote
42+
from urllib.parse import quote, unquote
4343

4444
__all__ = ["InvalidUriTemplate", "Operator", "UriTemplate", "Variable"]
4545

@@ -441,22 +441,24 @@ def match(self, uri: str, *, max_uri_length: int = DEFAULT_MAX_URI_LENGTH) -> di
441441
return None
442442

443443
if self._query_variables:
444-
# Two-phase: regex matches the path, parse_qs handles the
445-
# query. Query params may be partial, reordered, or include
446-
# extras; absent params stay absent so downstream defaults
447-
# can apply.
448-
path, _, query = uri.partition("?")
444+
# Two-phase: regex matches the path, the query is split and
445+
# decoded manually. Query params may be partial, reordered,
446+
# or include extras; absent params stay absent so downstream
447+
# defaults can apply. Fragment is stripped first since the
448+
# template's {?...} tail never describes a fragment.
449+
before_fragment, _, _ = uri.partition("#")
450+
path, _, query = before_fragment.partition("?")
449451
m = self._pattern.fullmatch(path)
450452
if m is None:
451453
return None
452454
result = _extract_path(m, self._path_variables)
453455
if result is None:
454456
return None
455457
if query:
456-
parsed = parse_qs(query, keep_blank_values=True)
458+
parsed = _parse_query(query)
457459
for var in self._query_variables:
458460
if var.name in parsed:
459-
result[var.name] = parsed[var.name][0]
461+
result[var.name] = parsed[var.name]
460462
return result
461463

462464
m = self._pattern.fullmatch(uri)
@@ -468,6 +470,26 @@ def __str__(self) -> str:
468470
return self.template
469471

470472

473+
def _parse_query(query: str) -> dict[str, str]:
474+
"""Parse a query string into a name→value mapping.
475+
476+
Unlike ``urllib.parse.parse_qs``, this follows RFC 3986 semantics:
477+
``+`` is a literal sub-delim, not a space. Form-urlencoding treats
478+
``+`` as space for HTML form submissions, but RFC 6570 and MCP
479+
resource URIs follow RFC 3986 where only ``%20`` encodes a space.
480+
481+
Duplicate keys keep the first value. Pairs without ``=`` are
482+
treated as empty-valued.
483+
"""
484+
result: dict[str, str] = {}
485+
for pair in query.split("&"):
486+
name, _, value = pair.partition("=")
487+
name = unquote(name)
488+
if name and name not in result:
489+
result[name] = unquote(value)
490+
return result
491+
492+
471493
def _extract_path(m: re.Match[str], variables: Sequence[Variable]) -> dict[str, str | list[str]] | None:
472494
"""Decode regex capture groups into a variable-name mapping.
473495
@@ -535,10 +557,22 @@ def _split_query_tail(parts: list[_Part]) -> tuple[list[_Part], list[Variable]]:
535557
if split == len(parts):
536558
return parts, []
537559

538-
# If the path portion contains a literal ?, the URI's ? won't align
539-
# with our template split. Fall back to strict regex.
560+
# The tail must start with a {?...} expression so that expand()
561+
# emits a ? the URI can split on. A standalone {&page} expands
562+
# with an & prefix, which partition("?") won't find.
563+
first = parts[split]
564+
assert isinstance(first, _Expression)
565+
if first.operator != "?":
566+
return parts, []
567+
568+
# If the path portion contains a literal ? or a {?...} expression,
569+
# the URI's ? split won't align with our template boundary. Fall
570+
# back to strict regex.
540571
for part in parts[:split]:
541-
if isinstance(part, str) and "?" in part:
572+
if isinstance(part, str):
573+
if "?" in part:
574+
return parts, []
575+
elif part.operator == "?":
542576
return parts, []
543577

544578
query_vars: list[Variable] = []

tests/shared/test_uri_template.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -437,8 +437,17 @@ def test_expand_rejects_invalid_value_types(value: object):
437437
("search{?q}", "search?q=mcp&utm=x&ref=y", {"q": "mcp"}),
438438
# URL-encoded query values are decoded
439439
("search{?q}", "search?q=hello%20world", {"q": "hello world"}),
440+
# + is a literal sub-delim per RFC 3986, not a space (form-encoding)
441+
("search{?q}", "search?q=C++", {"q": "C++"}),
442+
("search{?q}", "search?q=1.0+build.5", {"q": "1.0+build.5"}),
443+
# Fragment is stripped before query parsing
444+
("logs://{service}{?level}", "logs://api?level=error#section1", {"service": "api", "level": "error"}),
445+
("search{?q}", "search#frag", {}),
440446
# Multiple ?/& expressions collected together
441447
("api{?v}{&page,limit}", "api?limit=10&v=2", {"v": "2", "limit": "10"}),
448+
# Standalone {&var} falls through to strict regex (expands with
449+
# & prefix, no ? for lenient matching to split on)
450+
("api{&page}", "api&page=2", {"page": "2"}),
442451
# Level 3: query continuation with literal ? falls back to
443452
# strict regex (template-order, all-present required)
444453
("?a=1{&b}", "?a=1&b=2", {"b": "2"}),

0 commit comments

Comments
 (0)