-
Notifications
You must be signed in to change notification settings - Fork 3.3k
feat: extract parameter descriptions from docstrings into tool JSON schemas #2295
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
a868f7c
8127f1e
735bc50
fadbe04
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,14 +1,18 @@ | ||
| import functools | ||
| import inspect | ||
| import json | ||
| from collections.abc import Awaitable, Callable, Sequence | ||
| import logging | ||
| import re | ||
| from collections.abc import Awaitable, Callable, Iterator, Sequence | ||
| from contextlib import contextmanager | ||
| from itertools import chain | ||
| from types import GenericAlias | ||
| from typing import Annotated, Any, cast, get_args, get_origin, get_type_hints | ||
| from typing import Annotated, Any, Literal, cast, get_args, get_origin, get_type_hints | ||
|
|
||
| import anyio | ||
| import anyio.to_thread | ||
| import pydantic_core | ||
| from griffe import Docstring, DocstringSectionKind | ||
| from pydantic import BaseModel, ConfigDict, Field, WithJsonSchema, create_model | ||
| from pydantic.fields import FieldInfo | ||
| from pydantic.json_schema import GenerateJsonSchema, JsonSchemaWarningKind | ||
|
|
@@ -167,6 +171,126 @@ def pre_parse_json(self, data: dict[str, Any]) -> dict[str, Any]: | |
| ) | ||
|
|
||
|
|
||
| _DocstringStyle = Literal["google", "numpy", "sphinx"] | ||
|
|
||
| # Patterns to infer docstring style, adapted from pydantic-ai. | ||
| # Each entry is (pattern_template, replacement_keywords, style). | ||
| _DOCSTRING_STYLE_PATTERNS: list[tuple[str, list[str], _DocstringStyle]] = [ | ||
| ( | ||
| r"\n[ \t]*:{0}([ \t]+\w+)*:([ \t]+.+)?\n", | ||
| [ | ||
| "param", | ||
| "parameter", | ||
| "arg", | ||
| "argument", | ||
| "key", | ||
| "keyword", | ||
| "type", | ||
| "var", | ||
| "ivar", | ||
| "cvar", | ||
| "vartype", | ||
| "returns", | ||
| "return", | ||
| "rtype", | ||
| "raises", | ||
| "raise", | ||
| "except", | ||
| "exception", | ||
| ], | ||
| "sphinx", | ||
| ), | ||
| ( | ||
| r"\n[ \t]*{0}:([ \t]+.+)?\n[ \t]+.+", | ||
| [ | ||
| "args", | ||
| "arguments", | ||
| "params", | ||
| "parameters", | ||
| "keyword args", | ||
| "keyword arguments", | ||
| "raises", | ||
| "exceptions", | ||
| "returns", | ||
| "yields", | ||
| "receives", | ||
| "examples", | ||
| "attributes", | ||
| ], | ||
| "google", | ||
| ), | ||
| ( | ||
| r"\n[ \t]*{0}\n[ \t]*---+\n", | ||
| [ | ||
| "deprecated", | ||
| "parameters", | ||
| "other parameters", | ||
| "returns", | ||
| "yields", | ||
| "receives", | ||
| "raises", | ||
| "warns", | ||
| "attributes", | ||
| ], | ||
| "numpy", | ||
| ), | ||
| ] | ||
|
|
||
|
|
||
| def _infer_docstring_style(doc: str) -> _DocstringStyle: | ||
| """Infer the docstring style from its content.""" | ||
| for pattern, replacements, style in _DOCSTRING_STYLE_PATTERNS: | ||
| matches = ( | ||
| re.search(pattern.format(replacement), doc, re.IGNORECASE | re.MULTILINE) for replacement in replacements | ||
| ) | ||
| if any(matches): | ||
| return style | ||
| return "google" | ||
|
|
||
|
|
||
| @contextmanager | ||
| def _suppress_griffe_logging() -> Iterator[None]: | ||
| """Temporarily suppress griffe's verbose logging.""" | ||
| old_level = logging.root.getEffectiveLevel() | ||
| logging.root.setLevel(logging.ERROR) | ||
| yield | ||
| logging.root.setLevel(old_level) | ||
|
|
||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This sets the root logger level, which will suppress all logging globally (not just griffe's) for the duration of the context manager. It's also not thread-safe — concurrent code that logs during this window will silently lose messages. Should target the griffe logger specifically: @contextmanager
def _suppress_griffe_logging() -> Iterator[None]:
logger = logging.getLogger("_griffe")
old_level = logger.getEffectiveLevel()
logger.setLevel(logging.ERROR)
yield
logger.setLevel(old_level)( There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Maybe on a old version? On latest it's >>> import griffe
>>> griffe.logger._logger.name
'griffe' |
||
|
|
||
| def _parse_docstring_params(func: Callable[..., Any]) -> dict[str, str]: | ||
| """Parse a function's docstring to extract parameter descriptions. | ||
|
|
||
| Supports Google, NumPy, and Sphinx-style docstrings with automatic format detection. | ||
|
|
||
| Returns: | ||
| A dict mapping parameter names to their descriptions. | ||
| """ | ||
| doc = func.__doc__ | ||
| if not doc: | ||
| return {} | ||
|
|
||
| docstring_style = _infer_docstring_style(doc) | ||
| docstring = Docstring(doc, lineno=1, parser=docstring_style) | ||
|
|
||
| with _suppress_griffe_logging(): | ||
| sections = docstring.parse() | ||
|
|
||
| for section in sections: | ||
| if section.kind == DocstringSectionKind.parameters: | ||
| return {p.name: p.description for p in section.value} | ||
|
|
||
| return {} | ||
|
|
||
|
|
||
| def _annotation_has_description(annotation: Any) -> bool: | ||
| """Check if an Annotated type already includes a Field with a description.""" | ||
| if get_origin(annotation) is Annotated: | ||
| for arg in get_args(annotation)[1:]: | ||
| if isinstance(arg, FieldInfo) and arg.description is not None: | ||
| return True | ||
| return False | ||
|
|
||
|
|
||
| def func_metadata( | ||
| func: Callable[..., Any], | ||
| skip_names: Sequence[str] = (), | ||
|
|
@@ -215,6 +339,7 @@ def func_metadata( | |
| # model_rebuild right before using it 🤷 | ||
| raise InvalidSignature(f"Unable to evaluate type annotations for callable {func.__name__!r}") from e | ||
| params = sig.parameters | ||
| docstring_descriptions = _parse_docstring_params(func) | ||
| dynamic_pydantic_model_params: dict[str, Any] = {} | ||
| for param in params.values(): | ||
| if param.name.startswith("_"): # pragma: no cover | ||
|
|
@@ -229,6 +354,15 @@ def func_metadata( | |
|
|
||
| if param.annotation is inspect.Parameter.empty: | ||
| field_metadata.append(WithJsonSchema({"title": param.name, "type": "string"})) | ||
|
|
||
| # Add description from docstring if no explicit Field description exists | ||
| if param.name in docstring_descriptions: | ||
| has_explicit_desc = _annotation_has_description(annotation) or ( | ||
| isinstance(param.default, FieldInfo) and param.default.description is not None | ||
| ) | ||
| if not has_explicit_desc: | ||
| field_kwargs["description"] = docstring_descriptions[param.name] | ||
|
|
||
| # Check if the parameter name conflicts with BaseModel attributes | ||
| # This is necessary because Pydantic warns about shadowing parent attributes | ||
| if hasattr(BaseModel, field_name) and callable(getattr(BaseModel, field_name)): | ||
|
|
||
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
griffepulls in transitive deps (colorama, etc.) and is a non-trivial addition to the dependency tree for what's ultimately a convenience feature (docstring → description). Most users who care about tool descriptions are already usingField(description=...)orAnnotated.Worth considering:
mcp[docstrings]), with a graceful fallback when not installedArgs:covers the vast majority of cases)Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
You can depend on
griffelibto avoid pulling CLI dependencies such as colorama. Anyway you seem to have opted for a no-deps approach which is totally fine to me 😄 Just wanted to provide details 🙂By the way most Griffe dependents missed that our
infer_docstring_stylefunction is now public: https://mkdocstrings.github.io/griffe/reference/api/docstrings/parsers/#griffe.infer_docstring_style. You could also get inspiration from our own patterns.