Skip to content

Commit b95fbdb

Browse files
committed
feat: add tracing for PII file masking
1 parent 74784ac commit b95fbdb

7 files changed

Lines changed: 488 additions & 60 deletions

File tree

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "uipath-langchain"
3-
version = "0.10.8"
3+
version = "0.10.9"
44
description = "Python SDK that enables developers to build and deploy LangGraph agents to the UiPath Cloud Platform"
55
readme = { file = "README.md", content-type = "text/markdown" }
66
requires-python = ">=3.11"

src/uipath_langchain/agent/multimodal/types.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,3 +24,6 @@ class FileInfo:
2424
url: str
2525
name: str
2626
mime_type: str
27+
masked_attachment_url: str | None = None
28+
attachment_id: str | None = None
29+
masked_attachment_id: str | None = None

src/uipath_langchain/agent/tools/internal_tools/analyze_files_tool.py

Lines changed: 212 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import json
12
import logging
23
import mimetypes
34
import uuid
@@ -12,14 +13,21 @@
1213
HumanMessage,
1314
SystemMessage,
1415
)
15-
from langchain_core.runnables.config import var_child_runnable_config
16+
from langchain_core.runnables.config import RunnableConfig, var_child_runnable_config
1617
from langchain_core.tools import StructuredTool
18+
from opentelemetry import trace as otel_trace
1719
from uipath.agent.models.agent import (
1820
AgentInternalToolResourceConfig,
1921
)
22+
from uipath.core.tracing.span_utils import UiPathSpanUtils
2023
from uipath.eval.mocks import mockable
2124
from uipath.platform import UiPath
2225
from uipath.runtime.errors import UiPathErrorCategory
26+
from uipath.tracing import (
27+
AttachmentDirection,
28+
AttachmentProvider,
29+
SpanAttachment,
30+
)
2331

2432
from uipath_langchain.agent.exceptions import (
2533
AgentRuntimeError,
@@ -30,7 +38,10 @@
3038
build_file_content_blocks_for,
3139
)
3240
from uipath_langchain.agent.react.jsonschema_pydantic_converter import create_model
33-
from uipath_langchain.agent.tools.internal_tools.pii_masker import PiiMasker
41+
from uipath_langchain.agent.tools.internal_tools.pii_masker import (
42+
PiiMasker,
43+
_masked_name_for,
44+
)
3445
from uipath_langchain.agent.tools.structured_tool_with_argument_properties import (
3546
StructuredToolWithArgumentProperties,
3647
)
@@ -48,6 +59,168 @@
4859
"based on the extracted information."
4960
)
5061

62+
# Langchain config metadata key carrying the JSON-serialized SpanAttachment list
63+
# that should render on the llmCall span. The LLMOps callback in uipath-agents
64+
# reads this and stamps it on the llmCall span as the ``attachments`` attribute.
65+
LLM_CALL_ATTACHMENTS_METADATA_KEY = "uipath_llm_call_attachments"
66+
67+
68+
def _original_attachment_id(file: FileInfo) -> str:
69+
"""Return the id to use for the original file in trace attachments.
70+
71+
Prefers the orchestrator attachment UUID when present; falls back to a
72+
UUID derived from the file URL for files that did not come from
73+
orchestrator (defensive, should not happen in production paths).
74+
"""
75+
if file.attachment_id:
76+
return file.attachment_id
77+
return str(uuid.uuid5(uuid.NAMESPACE_URL, file.url))
78+
79+
80+
def _masked_attachment_id(masked_url: str) -> str:
81+
"""Derive a stable GUID from the masked URL for trace attachments.
82+
83+
The LLMOps traces endpoint validates ``Attachment.Id`` as ``System.Guid``.
84+
Masked files aren't orchestrator-tracked, so we synthesize a deterministic
85+
UUID from the redacted blob URL to satisfy the schema while keeping the id
86+
stable across re-runs.
87+
"""
88+
return str(uuid.uuid5(uuid.NAMESPACE_URL, masked_url))
89+
90+
91+
def _set_span_attachments(
92+
span: otel_trace.Span, attachments: list[SpanAttachment]
93+
) -> None:
94+
"""Write a :class:`SpanAttachment` list as a JSON string on the given OTel span."""
95+
if not attachments or span is None or not span.is_recording():
96+
return
97+
try:
98+
span.set_attribute(
99+
"attachments",
100+
json.dumps([att.model_dump(by_alias=True) for att in attachments]),
101+
)
102+
except Exception:
103+
logger.exception("Failed to emit trace attachments")
104+
105+
106+
def _llm_call_attachments_payload(files: list[FileInfo]) -> str | None:
107+
"""Build the JSON attachments payload for the llmCall span.
108+
109+
Each entry represents the file version actually sent to the model: the
110+
masked copy when PII masking ran (keyed by the orchestrator UUID from the
111+
re-upload when available, uuid5 fallback otherwise), else the original
112+
orchestrator attachment. Direction is ``IN`` because the file is an input
113+
to the LLM.
114+
"""
115+
if not files:
116+
return None
117+
attachments: list[SpanAttachment] = []
118+
for file in files:
119+
if file.masked_attachment_url:
120+
att_id = file.masked_attachment_id or _masked_attachment_id(
121+
file.masked_attachment_url
122+
)
123+
name = _masked_name_for(file.name)
124+
else:
125+
att_id = _original_attachment_id(file)
126+
name = file.name
127+
attachments.append(
128+
SpanAttachment(
129+
id=att_id,
130+
file_name=name,
131+
mime_type=file.mime_type,
132+
provider=AttachmentProvider.ORCHESTRATOR,
133+
direction=AttachmentDirection.IN,
134+
)
135+
)
136+
return json.dumps([att.model_dump(by_alias=True) for att in attachments])
137+
138+
139+
def _config_with_llm_call_attachments(
140+
config: RunnableConfig | None, files: list[FileInfo]
141+
) -> RunnableConfig | None:
142+
"""Return a runnable config carrying the llmCall attachments payload.
143+
144+
The LLMOps callback in ``uipath-agents`` reads the payload from
145+
``metadata[LLM_CALL_ATTACHMENTS_METADATA_KEY]`` and stamps it as the
146+
``attachments`` attribute on the llmCall span — so the file actually sent
147+
to the model (masked copy when PII masking ran, original otherwise)
148+
renders as a downloadable attachment on the LLM-call boundary in the
149+
trace UI, mirroring how the PII Masking span renders its files.
150+
"""
151+
payload = _llm_call_attachments_payload(files)
152+
if not payload:
153+
return config
154+
new_config = cast(RunnableConfig, dict(config) if config else {})
155+
metadata = dict(new_config.get("metadata") or {})
156+
metadata[LLM_CALL_ATTACHMENTS_METADATA_KEY] = payload
157+
new_config["metadata"] = metadata
158+
return new_config
159+
160+
161+
def _emit_pii_masking_attachments(span: otel_trace.Span, files: list[FileInfo]) -> None:
162+
"""Emit originals (IN) and masked copies (OUT) on the given PII Masking span.
163+
164+
Originals are keyed by the orchestrator attachment UUID; masked copies are
165+
keyed by the real orchestrator UUID from the re-upload when available, or
166+
a uuid5 derived from the redacted URL as a fallback.
167+
"""
168+
if not files:
169+
return
170+
attachments: list[SpanAttachment] = []
171+
input_files: list[dict[str, Any]] = []
172+
output_files: list[dict[str, Any]] = []
173+
174+
for file in files:
175+
original_id = _original_attachment_id(file)
176+
attachments.append(
177+
SpanAttachment(
178+
id=original_id,
179+
file_name=file.name,
180+
mime_type=file.mime_type,
181+
provider=AttachmentProvider.ORCHESTRATOR,
182+
direction=AttachmentDirection.IN,
183+
)
184+
)
185+
input_files.append(
186+
{"id": original_id, "fileName": file.name, "mimeType": file.mime_type}
187+
)
188+
189+
if file.masked_attachment_url:
190+
# Prefer the real orchestrator UUID from the re-upload so the UI
191+
# can download the file; fall back to the synthesized uuid5.
192+
masked_id = file.masked_attachment_id or _masked_attachment_id(
193+
file.masked_attachment_url
194+
)
195+
masked_name = _masked_name_for(file.name)
196+
attachments.append(
197+
SpanAttachment(
198+
id=masked_id,
199+
file_name=masked_name,
200+
mime_type=file.mime_type,
201+
provider=AttachmentProvider.ORCHESTRATOR,
202+
direction=AttachmentDirection.OUT,
203+
)
204+
)
205+
output_files.append(
206+
{"id": masked_id, "fileName": masked_name, "mimeType": file.mime_type}
207+
)
208+
209+
_set_span_attachments(span, attachments)
210+
211+
if span is not None and span.is_recording():
212+
try:
213+
input_payload = json.dumps({"files": input_files})
214+
output_payload = json.dumps({"files": output_files})
215+
span.set_attribute("input", input_payload)
216+
span.set_attribute("input.value", input_payload)
217+
span.set_attribute("input.mime_type", "application/json")
218+
span.set_attribute("output", output_payload)
219+
span.set_attribute("output.value", output_payload)
220+
span.set_attribute("output.mime_type", "application/json")
221+
except Exception:
222+
logger.exception("Failed to set PII Masking input/output attributes")
223+
51224

52225
def create_analyze_file_tool(
53226
resource: AgentInternalToolResourceConfig, llm: BaseChatModel
@@ -95,16 +268,30 @@ async def tool_fn(**kwargs: Any):
95268

96269
masker: PiiMasker | None = None
97270
if client is not None and PiiMasker.is_policy_enabled(policy):
98-
masker = PiiMasker(client, policy)
99-
try:
100-
analysis_task, files = await masker.apply(analysis_task, files)
101-
except Exception as exc:
102-
raise AgentRuntimeError(
103-
code=AgentRuntimeErrorCode.UNEXPECTED_ERROR,
104-
title="PII masking failed",
105-
detail=f"PII detection raised: {exc!r}",
106-
category=UiPathErrorCategory.SYSTEM,
107-
) from exc
271+
# Reconcile OTel current span with the LangChain/LangGraph external
272+
# span provider so the new span is parented under the active tool
273+
# call span and shares its trace id.
274+
parent_ctx = UiPathSpanUtils.get_parent_context()
275+
tracer = otel_trace.get_tracer(__name__)
276+
with tracer.start_as_current_span(
277+
"PII Masking", context=parent_ctx
278+
) as pii_span:
279+
# Required for the LLMOps exporter's span filter to keep this span.
280+
pii_span.set_attribute("uipath.custom_instrumentation", True)
281+
pii_span.set_attribute("span_type", "piiMasking")
282+
pii_span.set_attribute("type", "piiMasking")
283+
masker = PiiMasker(client, policy)
284+
try:
285+
analysis_task, files = await masker.apply(analysis_task, files)
286+
_emit_pii_masking_attachments(pii_span, files)
287+
except Exception as exc:
288+
pii_span.record_exception(exc)
289+
raise AgentRuntimeError(
290+
code=AgentRuntimeErrorCode.UNEXPECTED_ERROR,
291+
title="PII masking failed",
292+
detail=f"PII detection raised: {exc!r}",
293+
category=UiPathErrorCategory.SYSTEM,
294+
) from exc
108295

109296
try:
110297
human_message = HumanMessage(content=analysis_task)
@@ -122,6 +309,7 @@ async def tool_fn(**kwargs: Any):
122309
cast(AnyMessage, human_message_with_files),
123310
]
124311
config = var_child_runnable_config.get(None)
312+
config = _config_with_llm_call_attachments(config, files)
125313
result = await non_streaming_llm.ainvoke(messages, config=config)
126314

127315
del messages, human_message_with_files, files
@@ -198,6 +386,7 @@ async def _resolve_job_attachment_arguments(
198386
url=blob_info.uri,
199387
name=blob_info.name,
200388
mime_type=mime_type,
389+
attachment_id=str(attachment_id),
201390
)
202391
file_infos.append(file_info)
203392

@@ -222,7 +411,17 @@ async def add_files_to_message(
222411

223412
file_content_blocks: list[DataContentBlock] = []
224413
for file in files:
225-
blocks = await build_file_content_blocks_for(file)
414+
# Prefer the redacted URL + pii_masked_ name for LLM content when PII masking ran.
415+
llm_file = (
416+
FileInfo(
417+
url=file.masked_attachment_url,
418+
name=_masked_name_for(file.name),
419+
mime_type=file.mime_type,
420+
)
421+
if file.masked_attachment_url
422+
else file
423+
)
424+
blocks = await build_file_content_blocks_for(llm_file)
226425
file_content_blocks.extend(blocks)
227426
return append_content_blocks_to_message(
228427
message, cast(list[ContentBlock], file_content_blocks)

0 commit comments

Comments
 (0)