|
7 | 7 | ) |
8 | 8 | from langchain_core.runnables import RunnableConfig |
9 | 9 | from langchain_core.tools import StructuredTool |
10 | | -from langchain_core.tools.base import _get_runnable_config_param |
| 10 | +from langchain_core.tools.base import ArgsSchema, _get_runnable_config_param |
| 11 | +from langchain_core.utils.pydantic import get_fields |
| 12 | +from pydantic import BaseModel |
11 | 13 |
|
12 | 14 |
|
13 | 15 | class BaseUiPathStructuredTool(StructuredTool): |
14 | 16 | """Base class for UiPath structured tools. |
15 | 17 |
|
16 | | - Extends LangChain's StructuredTool to override the _run and _arun methods. |
17 | | - The only difference is that the self reference variable is renamed, to avoid conflicts with payload keys. |
| 18 | + Extends LangChain's StructuredTool with two categories of override: |
18 | 19 |
|
19 | | - DO NOT CHANGE ANYTHING IN THESE METHODS. |
20 | | - There are tests that verify the implementations against the upstream LangChain implementations. |
| 20 | + 1. Upstream-pinned: _run, _arun. These mirror StructuredTool's implementation |
| 21 | + verbatim except the self parameter is renamed to avoid colliding with a |
| 22 | + payload key literally named 'self'. DO NOT CHANGE — bytecode-pin tests |
| 23 | + catch upstream drift and require re-syncing when LangChain changes. |
21 | 24 |
|
| 25 | + 2. Intentional divergence: _parse_input, tool_call_schema. These work around |
| 26 | + LangChain's handling of field aliases whose names shadow inherited |
| 27 | + BaseModel members (schema, copy, validate, dict, json). See PC-4332 and |
| 28 | + the docstrings on those methods. |
22 | 29 | """ |
23 | 30 |
|
24 | 31 | def _run( |
@@ -84,3 +91,87 @@ async def _arun( |
84 | 91 | return await super()._arun( |
85 | 92 | *args, config=config, run_manager=run_manager, **kwargs |
86 | 93 | ) |
| 94 | + |
| 95 | + def _parse_input( |
| 96 | + self, tool_input: str | dict[str, Any], tool_call_id: str | None |
| 97 | + ) -> str | dict[str, Any]: |
| 98 | + """Parse and validate tool input, resolving aliased fields by Python name. |
| 99 | +
|
| 100 | + Unlike _run/_arun, this method intentionally diverges from upstream. |
| 101 | +
|
| 102 | + Upstream StructuredTool._parse_input builds the kwargs dict via |
| 103 | + getattr(validated_instance, alias). For aliases that shadow inherited |
| 104 | + BaseModel members (e.g. 'schema', 'copy', 'validate', 'dict', 'json'), |
| 105 | + this returns the inherited method instead of the aliased field value. |
| 106 | + Fields produced by jsonschema-pydantic-converter for reserved JSON property |
| 107 | + names use exactly such aliases (schema -> schema_ with alias='schema'). |
| 108 | + """ |
| 109 | + parsed = super()._parse_input(tool_input, tool_call_id) |
| 110 | + if not isinstance(parsed, dict) or not isinstance(tool_input, dict): |
| 111 | + return parsed |
| 112 | + |
| 113 | + input_args = self.args_schema |
| 114 | + if not (isinstance(input_args, type) and issubclass(input_args, BaseModel)): |
| 115 | + return parsed |
| 116 | + |
| 117 | + fields = get_fields(input_args) |
| 118 | + alias_to_name = { |
| 119 | + field.alias: name |
| 120 | + for name, field in fields.items() |
| 121 | + if field.alias and field.alias != name |
| 122 | + } |
| 123 | + if not alias_to_name: |
| 124 | + return parsed |
| 125 | + |
| 126 | + result = input_args.model_validate(tool_input) |
| 127 | + for alias, python_name in alias_to_name.items(): |
| 128 | + if alias in parsed: |
| 129 | + parsed[alias] = getattr(result, python_name) |
| 130 | + return parsed |
| 131 | + |
| 132 | + @property |
| 133 | + def tool_call_schema(self) -> ArgsSchema: |
| 134 | + """Return the LLM-facing schema with reserved-name aliases preserved. |
| 135 | +
|
| 136 | + Unlike _run/_arun, this property intentionally diverges from upstream. |
| 137 | +
|
| 138 | + Upstream BaseTool.tool_call_schema rebuilds a subset Pydantic model via |
| 139 | + _create_subset_model_v2, which constructs a fresh FieldInfoV2 for each |
| 140 | + field copying only description/default/metadata -- aliases and the source |
| 141 | + model's ConfigDict (serialize_by_alias, populate_by_name) are dropped. |
| 142 | + For fields produced by jsonschema-pydantic-converter (schema_ aliased to |
| 143 | + 'schema'), that causes the LLM to see and emit the Python-safe name |
| 144 | + (schema_) instead of the user-facing property ('schema'). |
| 145 | + """ |
| 146 | + # Upstream builds a fresh subset class per property access (no caching |
| 147 | + # in BaseTool.tool_call_schema), so mutating field info and config here |
| 148 | + # is local to this call. If upstream ever adds caching this override |
| 149 | + # must be revisited to avoid cross-instance state leakage. |
| 150 | + subset = super().tool_call_schema |
| 151 | + source = self.args_schema |
| 152 | + if not ( |
| 153 | + isinstance(subset, type) |
| 154 | + and issubclass(subset, BaseModel) |
| 155 | + and isinstance(source, type) |
| 156 | + and issubclass(source, BaseModel) |
| 157 | + ): |
| 158 | + return subset |
| 159 | + |
| 160 | + changed = False |
| 161 | + for name, subset_field in subset.model_fields.items(): |
| 162 | + source_field = source.model_fields.get(name) |
| 163 | + if source_field is None or not source_field.alias: |
| 164 | + continue |
| 165 | + if source_field.alias == name: |
| 166 | + continue |
| 167 | + subset_field.alias = source_field.alias |
| 168 | + subset_field.validation_alias = source_field.validation_alias |
| 169 | + subset_field.serialization_alias = source_field.serialization_alias |
| 170 | + changed = True |
| 171 | + |
| 172 | + if changed: |
| 173 | + subset.model_config["serialize_by_alias"] = True |
| 174 | + subset.model_config["populate_by_name"] = True |
| 175 | + subset.model_rebuild(force=True) |
| 176 | + |
| 177 | + return subset |
0 commit comments