Skip to content

Commit 25d12a4

Browse files
committed
feat: extract parameter descriptions from docstrings into JSON schema
Parses Google, NumPy, and Sphinx docstring styles with auto-detection. Injects descriptions into the Pydantic Field used for JSON schema generation, so tools get richer metadata for LLM consumption. Explicit Field(description=...) annotations always take precedence. Closes #226
1 parent d5b9155 commit 25d12a4

File tree

3 files changed

+302
-0
lines changed

3 files changed

+302
-0
lines changed
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
"""Extract parameter descriptions from function docstrings.
2+
3+
Auto-detects Google, NumPy, and Sphinx styles.
4+
"""
5+
6+
from __future__ import annotations
7+
8+
import re
9+
10+
_GOOGLE_SECTION_RE = re.compile(
11+
r"(?:Args|Arguments|Parameters)\s*:\s*\n"
12+
r"(.*?)"
13+
r"(?:\n\s*\n|\n\s*(?:Returns|Raises|Yields|Note|Example)|\Z)",
14+
re.DOTALL,
15+
)
16+
_GOOGLE_PARAM_RE = re.compile(r"^(\s+)(\w+)\s*(?:\([^)]*\))?\s*:\s*(.*)")
17+
18+
_NUMPY_SECTION_RE = re.compile(
19+
r"Parameters\s*\n\s*-{3,}\s*\n"
20+
r"(.*?)"
21+
r"(?:\n\s*(?:Returns|Raises|Yields|See Also|Note|Example)\s*\n\s*-{3,}|\Z)",
22+
re.DOTALL,
23+
)
24+
_NUMPY_PARAM_RE = re.compile(r"^\s*(\w+)\s*:\s*.*")
25+
26+
_SPHINX_PARAM_RE = re.compile(
27+
r":param\s+(?:\w+\s+)?(\w+)\s*:\s*(.+?)(?=\n\s*:|$)",
28+
re.DOTALL,
29+
)
30+
31+
_NUMPY_SEPARATOR_RE = re.compile(r"-{3,}")
32+
33+
34+
def parse_docstring_params(docstring: str | None) -> dict[str, str]:
35+
"""Extract parameter name→description mapping from a docstring."""
36+
if not docstring:
37+
return {}
38+
39+
if _NUMPY_SEPARATOR_RE.search(docstring):
40+
parsers = (_parse_numpy, _parse_google, _parse_sphinx)
41+
else:
42+
parsers = (_parse_google, _parse_sphinx, _parse_numpy)
43+
44+
for parser in parsers:
45+
result = parser(docstring)
46+
if result:
47+
return result
48+
return {}
49+
50+
51+
def _collect_indented_block(
52+
lines: list[str],
53+
param_re: re.Pattern[str],
54+
*,
55+
extract_desc_from_header: bool = True,
56+
) -> dict[str, str]:
57+
"""Walk *lines* and collect param→description pairs.
58+
59+
A parameter header is any line matching *param_re* whose indent is
60+
≤ the previous header's indent. Everything indented deeper is treated
61+
as a continuation of the current description.
62+
"""
63+
params: dict[str, str] = {}
64+
current_param: str | None = None
65+
desc_parts: list[str] = []
66+
header_indent = 999
67+
68+
for line in lines:
69+
stripped = line.rstrip()
70+
if not stripped:
71+
continue
72+
73+
indent = len(line) - len(line.lstrip())
74+
m = param_re.match(line)
75+
76+
if m and indent <= header_indent:
77+
if current_param is not None:
78+
params[current_param] = " ".join(desc_parts).strip()
79+
80+
header_indent = indent
81+
current_param = m.group(2) if m.lastindex and m.lastindex >= 2 else m.group(1)
82+
83+
if extract_desc_from_header and m.lastindex and m.lastindex >= 3:
84+
tail = m.group(3).strip()
85+
desc_parts = [tail] if tail else []
86+
else:
87+
desc_parts = []
88+
elif current_param is not None and indent > header_indent:
89+
desc_parts.append(stripped.strip())
90+
91+
if current_param is not None:
92+
params[current_param] = " ".join(desc_parts).strip()
93+
return params
94+
95+
96+
def _parse_google(docstring: str) -> dict[str, str]:
97+
match = _GOOGLE_SECTION_RE.search(docstring)
98+
if not match:
99+
return {}
100+
return _collect_indented_block(
101+
match.group(1).split("\n"),
102+
_GOOGLE_PARAM_RE,
103+
extract_desc_from_header=True,
104+
)
105+
106+
107+
def _parse_numpy(docstring: str) -> dict[str, str]:
108+
match = _NUMPY_SECTION_RE.search(docstring)
109+
if not match:
110+
return {}
111+
return _collect_indented_block(
112+
match.group(1).split("\n"),
113+
_NUMPY_PARAM_RE,
114+
extract_desc_from_header=False,
115+
)
116+
117+
118+
def _parse_sphinx(docstring: str) -> dict[str, str]:
119+
return {m.group(1): " ".join(m.group(2).split()).strip() for m in _SPHINX_PARAM_RE.finditer(docstring)}

src/mcp/server/mcpserver/utilities/func_metadata.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
)
2323

2424
from mcp.server.mcpserver.exceptions import InvalidSignature
25+
from mcp.server.mcpserver.utilities.docstring_parser import parse_docstring_params
2526
from mcp.server.mcpserver.utilities.logging import get_logger
2627
from mcp.server.mcpserver.utilities.types import Audio, Image
2728
from mcp.types import CallToolResult, ContentBlock, TextContent
@@ -215,6 +216,7 @@ def func_metadata(
215216
# model_rebuild right before using it 🤷
216217
raise InvalidSignature(f"Unable to evaluate type annotations for callable {func.__name__!r}") from e
217218
params = sig.parameters
219+
docstring_params = parse_docstring_params(func.__doc__)
218220
dynamic_pydantic_model_params: dict[str, Any] = {}
219221
for param in params.values():
220222
if param.name.startswith("_"): # pragma: no cover
@@ -227,6 +229,11 @@ def func_metadata(
227229
field_kwargs: dict[str, Any] = {}
228230
field_metadata: list[Any] = []
229231

232+
# Add description from docstring if available and not already
233+
# provided via Annotated[..., Field(description=...)]
234+
if param.name in docstring_params and not _has_field_description(annotation):
235+
field_kwargs["description"] = docstring_params[param.name]
236+
230237
if param.annotation is inspect.Parameter.empty:
231238
field_metadata.append(WithJsonSchema({"title": param.name, "type": "string"}))
232239
# Check if the parameter name conflicts with BaseModel attributes
@@ -489,6 +496,20 @@ class DictModel(RootModel[dict_annotation]):
489496
return DictModel
490497

491498

499+
def _has_field_description(annotation: Any) -> bool:
500+
"""Check if an annotation already has a Field with a description.
501+
502+
This avoids overwriting explicit Field(description=...) with
503+
a docstring-derived description.
504+
"""
505+
if get_origin(annotation) is not Annotated:
506+
return False
507+
for arg in get_args(annotation):
508+
if isinstance(arg, FieldInfo) and arg.description is not None:
509+
return True
510+
return False
511+
512+
492513
def _convert_to_content(result: Any) -> Sequence[ContentBlock]:
493514
"""Convert a result to a sequence of content objects.
494515
Lines changed: 162 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,162 @@
1+
"""Tests for docstring parameter description parsing."""
2+
3+
from typing import Annotated
4+
5+
import pytest
6+
from pydantic import Field
7+
8+
from mcp.server.mcpserver.utilities.docstring_parser import parse_docstring_params
9+
from mcp.server.mcpserver.utilities.func_metadata import func_metadata
10+
11+
12+
class TestGoogleStyle:
13+
def test_basic(self):
14+
doc = """Do something.
15+
16+
Args:
17+
name: The name of the thing.
18+
count: How many times.
19+
"""
20+
assert parse_docstring_params(doc) == {
21+
"name": "The name of the thing.",
22+
"count": "How many times.",
23+
}
24+
25+
def test_with_type_annotations(self):
26+
doc = """Do something.
27+
28+
Args:
29+
name (str): The name of the thing.
30+
count (int): How many times.
31+
"""
32+
assert parse_docstring_params(doc) == {
33+
"name": "The name of the thing.",
34+
"count": "How many times.",
35+
}
36+
37+
def test_multiline_description(self):
38+
doc = """Do something.
39+
40+
Args:
41+
name: The name of the thing.
42+
This is a longer description
43+
that spans multiple lines.
44+
count: How many times.
45+
"""
46+
result = parse_docstring_params(doc)
47+
assert "longer description" in result["name"]
48+
assert result["count"] == "How many times."
49+
50+
@pytest.mark.parametrize("keyword", ["Args", "Arguments", "Parameters"])
51+
def test_section_keywords(self, keyword: str) -> None:
52+
doc = f"""Do something.
53+
54+
{keyword}:
55+
name: The name.
56+
"""
57+
assert parse_docstring_params(doc) == {"name": "The name."}
58+
59+
def test_stops_at_returns(self):
60+
doc = """Do something.
61+
62+
Args:
63+
name: The name.
64+
65+
Returns:
66+
The result.
67+
"""
68+
assert parse_docstring_params(doc) == {"name": "The name."}
69+
70+
71+
class TestNumpyStyle:
72+
def test_basic(self):
73+
doc = """Do something.
74+
75+
Parameters
76+
----------
77+
name : str
78+
The name of the thing.
79+
count : int
80+
How many times.
81+
"""
82+
assert parse_docstring_params(doc) == {
83+
"name": "The name of the thing.",
84+
"count": "How many times.",
85+
}
86+
87+
def test_multiline(self):
88+
doc = """Do something.
89+
90+
Parameters
91+
----------
92+
name : str
93+
The name of the thing.
94+
More details here.
95+
"""
96+
assert "More details" in parse_docstring_params(doc)["name"]
97+
98+
99+
class TestSphinxStyle:
100+
def test_basic(self):
101+
doc = """Do something.
102+
103+
:param name: The name of the thing.
104+
:param count: How many times.
105+
"""
106+
assert parse_docstring_params(doc) == {
107+
"name": "The name of the thing.",
108+
"count": "How many times.",
109+
}
110+
111+
def test_with_type(self):
112+
doc = """Do something.
113+
114+
:param str name: The name of the thing.
115+
"""
116+
assert parse_docstring_params(doc) == {"name": "The name of the thing."}
117+
118+
119+
class TestEdgeCases:
120+
@pytest.mark.parametrize("doc", [None, "", "Just a description."])
121+
def test_returns_empty(self, doc: str | None) -> None:
122+
assert parse_docstring_params(doc) == {}
123+
124+
125+
class TestFuncMetadataIntegration:
126+
def test_descriptions_appear_in_schema(self):
127+
def my_tool(name: str, count: int = 5) -> str:
128+
"""A tool.
129+
130+
Args:
131+
name: The name to process.
132+
count: Number of repetitions.
133+
"""
134+
return name * count
135+
136+
schema = func_metadata(my_tool).arg_model.model_json_schema()
137+
assert schema["properties"]["name"]["description"] == "The name to process."
138+
assert schema["properties"]["count"]["description"] == "Number of repetitions."
139+
140+
def test_explicit_field_takes_precedence(self):
141+
def my_tool(
142+
name: Annotated[str, Field(description="Explicit")],
143+
count: int = 5,
144+
) -> str:
145+
"""A tool.
146+
147+
Args:
148+
name: Should be ignored.
149+
count: From docstring.
150+
"""
151+
return name * count
152+
153+
schema = func_metadata(my_tool).arg_model.model_json_schema()
154+
assert schema["properties"]["name"]["description"] == "Explicit"
155+
assert schema["properties"]["count"]["description"] == "From docstring."
156+
157+
def test_no_docstring(self):
158+
def my_tool(name: str) -> str:
159+
return name
160+
161+
schema = func_metadata(my_tool).arg_model.model_json_schema()
162+
assert "name" in schema["properties"]

0 commit comments

Comments
 (0)