11"""Ixp extraction tool."""
22
3- from typing import Any
3+ import uuid
4+ from typing import Any , Optional
45
56from langchain .tools import BaseTool
67from langchain_core .messages import ToolCall , ToolMessage
78from langchain_core .tools import StructuredTool
89from langgraph .types import Command , interrupt
10+ from pydantic import BaseModel , Field
911from uipath .agent .models .agent import AgentIxpExtractionResourceConfig
1012from uipath .eval .mocks import mockable
11- from uipath .platform .attachments import Attachment
1213from uipath .platform .common import DocumentExtraction
1314from uipath .platform .documents import ExtractionResponseIXP
1415
@@ -26,6 +27,34 @@ class StructuredToolWithWrapper(StructuredToolWithOutputType, ToolWrapperMixin):
2627 pass
2728
2829
30+ class ExtractionToolInputSchema (BaseModel ):
31+ """Alias-free mirror of `Attachment` used as the tool's args_schema.
32+
33+ We don't use `Attachment` directly because its fields carry aliases
34+ (`id` -> `ID`, `full_name` -> `FullName`, ...) and LangChain mishandles
35+ aliased fields in two places (see PR #796):
36+
37+ 1. `BaseTool._parse_input()` extracts each field with `getattr(model, key)`,
38+ where `key` is the alias. For aliases that collide with built-in model
39+ attributes (e.g. `schema`), this returns the built-in instead of the
40+ field value, so downstream `kwargs.get("id") / kwargs.get("full_name")`
41+ came back as `None`.
42+ 2. `tool_call_schema` rebuilds a subset of the model by copying each field
43+ but drops alias and serialization options, so the rebuilt schema no
44+ longer matches what the LLM emits.
45+
46+ Until LangChain fixes both, exposing an alias-free schema with field
47+ names matching `Attachment`'s python names sidesteps the issue. Keep the
48+ fields here in sync with `Attachment` — the test
49+ `test_extraction_tool_has_attachment_input_schema` enforces this.
50+ """
51+
52+ id : uuid .UUID
53+ full_name : str
54+ mime_type : str
55+ metadata : Optional [dict [str , Any ]] = Field (None )
56+
57+
2958def create_ixp_extraction_tool (
3059 resource : AgentIxpExtractionResourceConfig ,
3160) -> StructuredTool :
@@ -38,27 +67,21 @@ def create_ixp_extraction_tool(
3867 @mockable (
3968 name = resource .name ,
4069 description = resource .description ,
41- input_schema = Attachment .model_json_schema (),
70+ input_schema = ExtractionToolInputSchema .model_json_schema (),
4271 output_schema = ExtractionResponseIXP .model_json_schema (),
4372 example_calls = resource .properties .example_calls ,
4473 )
4574 async def extraction_tool_fn (** kwargs : Any ) -> ExtractionResponseIXP :
4675 from uipath .platform import UiPath
4776
77+ attachment = ExtractionToolInputSchema .model_validate (kwargs )
4878 uipath = UiPath ()
4979
50- attachment_id = kwargs .get ("id" )
51- attachment_full_name = kwargs .get ("full_name" )
52-
53- # TODO: attachment_mime_type is currently not used anywhere (attachment_full_name will also be obsolete once attachments api is onboarded)
54- # should we use them somewhere else? otherwise input_schema should only contain the file id
55- # attachment_mime_type = kwargs.get("mime_type")
56-
5780 # TODO: current workaround. DocumentExtraction model should support attachment_id and use the
5881 # start_ixp_extraction_from_attachment sdk method once support is added
5982
6083 attachment_local_file_path = await uipath .attachments .download_async (
61- key = attachment_id , destination_path = attachment_full_name
84+ key = attachment . id , destination_path = attachment . full_name
6285 )
6386 document_extraction_response = interrupt (
6487 DocumentExtraction (
@@ -95,7 +118,7 @@ async def extraction_tool_wrapper(
95118 tool = StructuredToolWithWrapper (
96119 name = tool_name ,
97120 description = resource .description ,
98- args_schema = Attachment ,
121+ args_schema = ExtractionToolInputSchema ,
99122 coroutine = extraction_tool_fn ,
100123 output_type = ExtractionResponseIXP ,
101124 metadata = {
0 commit comments