Skip to content

Commit 9115760

Browse files
fix: ixp extraction tool
1 parent 87852f5 commit 9115760

4 files changed

Lines changed: 81 additions & 17 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "uipath-langchain"
3-
version = "0.10.11"
3+
version = "0.10.12"
44
description = "Python SDK that enables developers to build and deploy LangGraph agents to the UiPath Cloud Platform"
55
readme = { file = "README.md", content-type = "text/markdown" }
66
requires-python = ">=3.11"

src/uipath_langchain/agent/tools/extraction_tool.py

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,15 @@
11
"""Ixp extraction tool."""
22

3-
from typing import Any
3+
import uuid
4+
from typing import Any, Optional
45

56
from langchain.tools import BaseTool
67
from langchain_core.messages import ToolCall, ToolMessage
78
from langchain_core.tools import StructuredTool
89
from langgraph.types import Command, interrupt
10+
from pydantic import BaseModel, Field
911
from uipath.agent.models.agent import AgentIxpExtractionResourceConfig
1012
from uipath.eval.mocks import mockable
11-
from uipath.platform.attachments import Attachment
1213
from uipath.platform.common import DocumentExtraction
1314
from uipath.platform.documents import ExtractionResponseIXP
1415

@@ -26,6 +27,34 @@ class StructuredToolWithWrapper(StructuredToolWithOutputType, ToolWrapperMixin):
2627
pass
2728

2829

30+
class ExtractionToolInputSchema(BaseModel):
31+
"""Alias-free mirror of `Attachment` used as the tool's args_schema.
32+
33+
We don't use `Attachment` directly because its fields carry aliases
34+
(`id` -> `ID`, `full_name` -> `FullName`, ...) and LangChain mishandles
35+
aliased fields in two places (see PR #796):
36+
37+
1. `BaseTool._parse_input()` extracts each field with `getattr(model, key)`,
38+
where `key` is the alias. For aliases that collide with built-in model
39+
attributes (e.g. `schema`), this returns the built-in instead of the
40+
field value, so downstream `kwargs.get("id") / kwargs.get("full_name")`
41+
came back as `None`.
42+
2. `tool_call_schema` rebuilds a subset of the model by copying each field
43+
but drops alias and serialization options, so the rebuilt schema no
44+
longer matches what the LLM emits.
45+
46+
Until LangChain fixes both, exposing an alias-free schema with field
47+
names matching `Attachment`'s python names sidesteps the issue. Keep the
48+
fields here in sync with `Attachment` — the test
49+
`test_extraction_tool_has_attachment_input_schema` enforces this.
50+
"""
51+
52+
id: uuid.UUID
53+
full_name: str
54+
mime_type: str
55+
metadata: Optional[dict[str, Any]] = Field(None)
56+
57+
2958
def create_ixp_extraction_tool(
3059
resource: AgentIxpExtractionResourceConfig,
3160
) -> StructuredTool:
@@ -38,27 +67,21 @@ def create_ixp_extraction_tool(
3867
@mockable(
3968
name=resource.name,
4069
description=resource.description,
41-
input_schema=Attachment.model_json_schema(),
70+
input_schema=ExtractionToolInputSchema.model_json_schema(),
4271
output_schema=ExtractionResponseIXP.model_json_schema(),
4372
example_calls=resource.properties.example_calls,
4473
)
4574
async def extraction_tool_fn(**kwargs: Any) -> ExtractionResponseIXP:
4675
from uipath.platform import UiPath
4776

77+
attachment = ExtractionToolInputSchema.model_validate(kwargs)
4878
uipath = UiPath()
4979

50-
attachment_id = kwargs.get("id")
51-
attachment_full_name = kwargs.get("full_name")
52-
53-
# TODO: attachment_mime_type is currently not used anywhere (attachment_full_name will also be obsolete once attachments api is onboarded)
54-
# should we use them somewhere else? otherwise input_schema should only contain the file id
55-
# attachment_mime_type = kwargs.get("mime_type")
56-
5780
# TODO: current workaround. DocumentExtraction model should support attachment_id and use the
5881
# start_ixp_extraction_from_attachment sdk method once support is added
5982

6083
attachment_local_file_path = await uipath.attachments.download_async(
61-
key=attachment_id, destination_path=attachment_full_name
84+
key=attachment.id, destination_path=attachment.full_name
6285
)
6386
document_extraction_response = interrupt(
6487
DocumentExtraction(
@@ -95,7 +118,7 @@ async def extraction_tool_wrapper(
95118
tool = StructuredToolWithWrapper(
96119
name=tool_name,
97120
description=resource.description,
98-
args_schema=Attachment,
121+
args_schema=ExtractionToolInputSchema,
99122
coroutine=extraction_tool_fn,
100123
output_type=ExtractionResponseIXP,
101124
metadata={

tests/agent/tools/test_extraction_tool.py

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111
from uipath.platform.attachments import Attachment
1212
from uipath.platform.documents import ExtractionResponseIXP
1313

14-
from uipath_langchain.agent.tools.extraction_tool import create_ixp_extraction_tool
14+
from uipath_langchain.agent.tools.extraction_tool import (
15+
ExtractionToolInputSchema,
16+
create_ixp_extraction_tool,
17+
)
1518

1619

1720
class TestExtractionToolMetadata:
@@ -76,10 +79,15 @@ def test_extraction_tool_has_correct_description(self, extraction_resource):
7679
assert tool.description == "Extract data from files"
7780

7881
def test_extraction_tool_has_attachment_input_schema(self, extraction_resource):
79-
"""Test that extraction tool uses Attachment as input schema."""
82+
"""Test that extraction tool's input schema mirrors Attachment fields."""
8083
tool = create_ixp_extraction_tool(extraction_resource)
8184

82-
assert tool.args_schema == Attachment
85+
schema_fields = tool.args_schema.model_fields
86+
attachment_fields = Attachment.model_fields
87+
88+
assert schema_fields.keys() == attachment_fields.keys()
89+
for name, attachment_field in attachment_fields.items():
90+
assert schema_fields[name].annotation == attachment_field.annotation
8391

8492
def test_extraction_tool_has_extraction_response_output_type(
8593
self, extraction_resource
@@ -235,6 +243,39 @@ async def test_extraction_tool_propagates_download_exception(
235243

236244
assert "Download failed" in str(exc_info.value)
237245

246+
@pytest.mark.asyncio
247+
@patch("uipath.platform.UiPath")
248+
@patch("uipath_langchain.agent.tools.extraction_tool.interrupt")
249+
async def test_extraction_tool_handles_alias_keyed_input(
250+
self, mock_interrupt, mock_uipath_class, extraction_resource
251+
):
252+
"""The LLM emits Attachment fields by alias (ID/FullName/MimeType) — the
253+
same shape Attachment.model_dump(by_alias=True) produces. download_async
254+
must still be called with the populated UUID, not key=None.
255+
"""
256+
mock_client = MagicMock()
257+
mock_uipath_class.return_value = mock_client
258+
mock_client.attachments.download_async = AsyncMock(
259+
return_value="/path/to/document.pdf"
260+
)
261+
mock_interrupt.return_value = {"extracted_data": {"field1": "value1"}}
262+
263+
tool = create_ixp_extraction_tool(extraction_resource)
264+
265+
attachment = ExtractionToolInputSchema(
266+
id=UUID("fa93f4ca-bd3f-473a-93e5-e6e5b5a8f27f"),
267+
full_name="document.pdf",
268+
mime_type="application/pdf",
269+
)
270+
aliased_input = attachment.model_dump()
271+
272+
await tool.ainvoke(aliased_input)
273+
274+
mock_client.attachments.download_async.assert_called_once_with(
275+
key=UUID("fa93f4ca-bd3f-473a-93e5-e6e5b5a8f27f"),
276+
destination_path="document.pdf",
277+
)
278+
238279

239280
class TestExtractionToolNameSanitization:
240281
"""Test that extraction tool names are properly sanitized."""

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)