-
Notifications
You must be signed in to change notification settings - Fork 6
fix: sanitize non-identifier field names in MCP/OpenAPI tool schemas #99
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from 1 commit
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -41,6 +41,7 @@ | |
| from pydantic import ( | ||
| AliasChoices, | ||
| BaseModel, | ||
| ConfigDict, | ||
| create_model, | ||
| Field, | ||
| ValidationError, | ||
|
|
@@ -1396,6 +1397,7 @@ def _create_function_with_signature( | |
| args_schema, "__agentrun_argument_aliases__", {} | ||
| ) | ||
| if alias_map: | ||
| existing_param_names = {p.name for p in parameters} | ||
| for alias, canonical in alias_map.items(): | ||
| canonical_field = args_schema.model_fields.get(canonical) | ||
| alias_annotation = ( | ||
|
|
@@ -1408,9 +1410,20 @@ def _create_function_with_signature( | |
| and alias_annotation is not None | ||
| ): | ||
| alias_annotation = Optional[alias_annotation] | ||
| # 防御性 sanitize: alias 同样要落到 inspect.Parameter 上, 非法字符 | ||
| # (如 ``x-access-id``)会触发 ValueError。当前 alias 仅由 | ||
| # ``_maybe_add_body_alias`` 写入 "query", 但未来可能扩展。 | ||
| alias_name = ( | ||
| alias | ||
| if alias.isidentifier() | ||
| else _sanitize_python_identifier(alias) | ||
| ) | ||
| if alias_name in existing_param_names: | ||
| continue | ||
| existing_param_names.add(alias_name) | ||
| parameters.append( | ||
| inspect.Parameter( | ||
| alias, | ||
| alias_name, | ||
| inspect.Parameter.KEYWORD_ONLY, | ||
| default=None, | ||
| annotation=alias_annotation, | ||
|
|
@@ -1425,7 +1438,9 @@ def impl(**kwargs): | |
| if args_schema is not None: | ||
| try: | ||
| parsed = args_schema(**normalized_kwargs) | ||
| payload = parsed.model_dump(mode="python", exclude_unset=True) | ||
| payload = parsed.model_dump( | ||
| mode="python", exclude_unset=True, by_alias=True | ||
| ) | ||
| except ValidationError as exc: | ||
| raise ValueError( | ||
| f"Invalid arguments for tool '{tool_name}': {exc}" | ||
|
|
@@ -1674,6 +1689,33 @@ def _build_openapi_schema( | |
| return schema, tuple(body_field_names), alias_map | ||
|
|
||
|
|
||
| _PY_KEYWORDS: Set[str] = set() | ||
|
|
||
|
|
||
| def _sanitize_python_identifier(name: str) -> str: | ||
| """将任意字符串转换为合法的 Python 标识符 | ||
|
|
||
| 用于把 JSON Schema 中含 ``-`` / ``.`` 等字符的字段名(例如 ``x-access-id``) | ||
| 映射成 Pydantic / ``inspect.Parameter`` 都能接受的字段名。原始名通过 alias | ||
| 继续保留在 JSON Schema 和实际调用中。 | ||
| """ | ||
| import keyword | ||
|
|
||
| if not _PY_KEYWORDS: | ||
| _PY_KEYWORDS.update(keyword.kwlist) | ||
|
|
||
| sanitized = re.sub(r"[^0-9a-zA-Z_]", "_", name) | ||
| sanitized = sanitized.lstrip("_") | ||
| if not sanitized: | ||
| sanitized = "field" | ||
| if sanitized[0].isdigit(): | ||
| # Pydantic 不允许字段名以下划线开头, 因此用字母前缀. | ||
|
Collaborator
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed in e97f1ba — updated the comment to clarify it's the digit-leading branch and explain why we must use a letter prefix ( |
||
| sanitized = "field_" + sanitized | ||
| if sanitized in _PY_KEYWORDS: | ||
| sanitized = sanitized + "_" | ||
| return sanitized | ||
|
|
||
|
|
||
| def _json_schema_to_pydantic( | ||
| name: str, | ||
| schema: Optional[Dict[str, Any]], | ||
|
|
@@ -1688,40 +1730,74 @@ def _json_schema_to_pydantic( | |
|
|
||
| required_fields = set(schema.get("required", [])) | ||
| fields = {} | ||
| needs_populate_by_name = False | ||
| used_py_names: Set[str] = set() | ||
|
|
||
| for field_name, field_schema in properties.items(): | ||
| if not isinstance(field_schema, dict): | ||
| continue | ||
|
|
||
| # 把含非法字符(如 ``x-access-id``)或保留字(``class``)的字段名映射到 | ||
| # 合法的 Python 标识符, 通过 alias 保留原名以便 JSON Schema 输出和 | ||
| # 调用真实 MCP 工具时使用。 | ||
| import keyword as _kw | ||
|
|
||
| if field_name.isidentifier() and not _kw.iskeyword(field_name): | ||
| py_name = field_name | ||
| else: | ||
| py_name = _sanitize_python_identifier(field_name) | ||
| if py_name in used_py_names: | ||
| suffix = 2 | ||
| while f"{py_name}_{suffix}" in used_py_names: | ||
| suffix += 1 | ||
| py_name = f"{py_name}_{suffix}" | ||
| used_py_names.add(py_name) | ||
| if py_name != field_name: | ||
| needs_populate_by_name = True | ||
|
|
||
| # 映射类型 | ||
| field_type = _json_type_to_python(field_schema) | ||
| description = field_schema.get("description", "") | ||
| default = field_schema.get("default") | ||
| aliases = field_schema.get("x-aliases") | ||
| field_kwargs: Dict[str, Any] = {"description": description} | ||
|
|
||
| # 用 ``alias`` 同时作用于 JSON Schema 输出和 by_alias dump, | ||
| # 让 LLM/调用端看到的字段名仍是原始名(如 ``x-access-id``)。 | ||
| if py_name != field_name: | ||
| field_kwargs["alias"] = field_name | ||
| if aliases: | ||
| if not isinstance(aliases, (list, tuple)): | ||
| aliases = [aliases] | ||
| field_kwargs["validation_alias"] = AliasChoices( | ||
| field_name, *aliases | ||
| ) | ||
| alias_choices: List[str] = [field_name] | ||
| if py_name != field_name: | ||
| alias_choices.append(py_name) | ||
| for alias in aliases: | ||
| if alias and alias not in alias_choices: | ||
| alias_choices.append(alias) | ||
| field_kwargs["validation_alias"] = AliasChoices(*alias_choices) | ||
|
|
||
| # 构建字段定义 | ||
| if field_name in required_fields: | ||
| # 必填字段 | ||
| fields[field_name] = (field_type, Field(**field_kwargs)) | ||
| fields[py_name] = (field_type, Field(**field_kwargs)) | ||
| else: | ||
| # 可选字段 | ||
| from typing import Optional as TypingOptional | ||
|
|
||
| fields[field_name] = ( | ||
| fields[py_name] = ( | ||
| TypingOptional[field_type], | ||
| Field(default=default, **field_kwargs), | ||
| ) | ||
|
|
||
| # 创建模型,清理名称 | ||
| model_name = re.sub(r"[^0-9a-zA-Z]", "", name.title()) | ||
| return create_model(model_name or "Args", **fields) # type: ignore | ||
| model_kwargs: Dict[str, Any] = {} | ||
| if needs_populate_by_name: | ||
| model_kwargs["__config__"] = ConfigDict(populate_by_name=True) | ||
| return create_model( # type: ignore | ||
| model_name or "Args", **model_kwargs, **fields | ||
| ) | ||
|
|
||
|
|
||
| def _json_type_to_python(field_schema: Dict[str, Any]) -> type: | ||
|
|
||
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch — fixed in e97f1ba. When an alias is sanitized, we now also add the sanitized form to
__agentrun_argument_aliases__so that_normalize_tool_argumentscan translate it back to the canonical field. Added a regression test (test_call_via_sanitized_alias_name_routes_to_canonical) covering the end-to-end "call via sanitized alias → route to canonical → send original name to MCP" path.