python-sdk/src/mcp/server/mcpserver/utilities/docstring_parser.py at f0946aed7eb93d132786ff675676c7af94ba4df8 · modelcontextprotocol/python-sdk · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
"""Extract parameter descriptions from function docstrings.

Auto-detects Google, NumPy, and Sphinx styles.
"""

from __future__ import annotations

import re

_GOOGLE_SECTION_RE = re.compile(
    r"(?:Args|Arguments|Parameters)\s*:\s*\n"
    r"(.*?)"
    r"(?:\n\s*\n|\n\s*(?:Returns|Raises|Yields|Note|Example)|\Z)",
    re.DOTALL,
)
_GOOGLE_PARAM_RE = re.compile(r"^(\s+)(\w+)\s*(?:\([^)]*\))?\s*:\s*(.*)")

_NUMPY_SECTION_RE = re.compile(
    r"Parameters\s*\n\s*-{3,}\s*\n"
    r"(.*?)"
    r"(?:\n\s*(?:Returns|Raises|Yields|See Also|Note|Example)\s*\n\s*-{3,}|\Z)",
    re.DOTALL,
)
_NUMPY_PARAM_RE = re.compile(r"^\s*(\w+)\s*:\s*.*")

_SPHINX_PARAM_RE = re.compile(
    r":param\s+(?:\w+\s+)?(\w+)\s*:\s*(.+?)(?=\n\s*:|$)",
    re.DOTALL,
)

_NUMPY_SEPARATOR_RE = re.compile(r"-{3,}")


def parse_docstring_params(docstring: str | None) -> dict[str, str]:
    """Extract parameter name→description mapping from a docstring."""
    if not docstring:
        return {}

    if _NUMPY_SEPARATOR_RE.search(docstring):
        parsers = (_parse_numpy, _parse_google, _parse_sphinx)
    else:
        parsers = (_parse_google, _parse_sphinx, _parse_numpy)

    for parser in parsers:
        result = parser(docstring)
        if result:
            return result
    return {}


def _collect_indented_block(
    lines: list[str],
    param_re: re.Pattern[str],
    *,
    extract_desc_from_header: bool = True,
) -> dict[str, str]:
    """Walk *lines* and collect param→description pairs.

    A parameter header is any line matching *param_re* whose indent is
    ≤ the previous header's indent.  Everything indented deeper is treated
    as a continuation of the current description.
    """
    params: dict[str, str] = {}
    current_param: str | None = None
    desc_parts: list[str] = []
    header_indent = 999

    for line in lines:
        stripped = line.rstrip()
        if not stripped:
            continue

        indent = len(line) - len(line.lstrip())
        m = param_re.match(line)

        if m and indent <= header_indent:
            if current_param is not None:
                params[current_param] = " ".join(desc_parts).strip()

            header_indent = indent
            current_param = m.group(2) if m.lastindex and m.lastindex >= 2 else m.group(1)

            if extract_desc_from_header and m.lastindex and m.lastindex >= 3:
                tail = m.group(3).strip()
                desc_parts = [tail] if tail else []
            else:
                desc_parts = []
        elif current_param is not None and indent > header_indent:
            desc_parts.append(stripped.strip())

    if current_param is not None:
        params[current_param] = " ".join(desc_parts).strip()
    return params


def _parse_google(docstring: str) -> dict[str, str]:
    match = _GOOGLE_SECTION_RE.search(docstring)
    if not match:
        return {}
    return _collect_indented_block(
        match.group(1).split("\n"),
        _GOOGLE_PARAM_RE,
        extract_desc_from_header=True,
    )


def _parse_numpy(docstring: str) -> dict[str, str]:
    match = _NUMPY_SECTION_RE.search(docstring)
    if not match:
        return {}
    return _collect_indented_block(
        match.group(1).split("\n"),
        _NUMPY_PARAM_RE,
        extract_desc_from_header=False,
    )


def _parse_sphinx(docstring: str) -> dict[str, str]:
    return {m.group(1): " ".join(m.group(2).split()).strip() for m in _SPHINX_PARAM_RE.finditer(docstring)}