Skip to content

Commit 9a5d2bb

Browse files
yashwagle1claude
andauthored
move pii masking service to llmops (#1701)
Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 6e1cfe5 commit 9a5d2bb

11 files changed

Lines changed: 591 additions & 25 deletions

File tree

packages/uipath-platform/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "uipath-platform"
3-
version = "0.1.60"
3+
version = "0.1.61"
44
description = "HTTP client library for programmatic access to UiPath Platform"
55
readme = { file = "README.md", content-type = "text/markdown" }
66
requires-python = ">=3.11"

packages/uipath-platform/src/uipath/platform/_uipath.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
ProcessesService,
3636
QueuesService,
3737
)
38+
from .pii_detection import PiiDetectionService
3839
from .resource_catalog import ResourceCatalogService
3940
from .semantic_proxy import SemanticProxyService
4041

@@ -184,6 +185,10 @@ def orchestrator_setup(self) -> OrchestratorSetupService:
184185
def automation_ops(self) -> AutomationOpsService:
185186
return AutomationOpsService(self._config, self._execution_context)
186187

188+
@property
189+
def pii_detection(self) -> PiiDetectionService:
190+
return PiiDetectionService(self._config, self._execution_context)
191+
187192
@property
188193
def semantic_proxy(self) -> SemanticProxyService:
189194
return SemanticProxyService(self._config, self._execution_context)
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
"""PiiDetection service package.
2+
3+
Provides the ``PiiDetectionService`` client, Pydantic request/response models for
4+
the PII detection endpoint, and utilities for rehydrating masked text with
5+
original PII values after LLM processing.
6+
"""
7+
8+
from ._pii_detection_service import PiiDetectionService
9+
from .pii_detection import (
10+
PiiDetectionRequest,
11+
PiiDetectionResponse,
12+
PiiDocument,
13+
PiiDocumentResult,
14+
PiiEntity,
15+
PiiEntityThreshold,
16+
PiiFile,
17+
PiiFileResult,
18+
)
19+
from .pii_utilities import (
20+
rehydrate_from_pii_entities,
21+
rehydrate_from_pii_response,
22+
)
23+
24+
__all__ = [
25+
"PiiDetectionRequest",
26+
"PiiDetectionResponse",
27+
"PiiDetectionService",
28+
"PiiDocument",
29+
"PiiDocumentResult",
30+
"PiiEntity",
31+
"PiiEntityThreshold",
32+
"PiiFile",
33+
"PiiFileResult",
34+
"rehydrate_from_pii_entities",
35+
"rehydrate_from_pii_response",
36+
]
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
"""PiiDetection service for UiPath Platform.
2+
3+
Provides methods for detecting PII in documents and files.
4+
"""
5+
6+
from uipath.core.tracing import traced
7+
8+
from ..common._base_service import BaseService
9+
from ..common._config import UiPathApiConfig
10+
from ..common._execution_context import UiPathExecutionContext
11+
from ..common._models import Endpoint, RequestSpec
12+
from .pii_detection import PiiDetectionRequest, PiiDetectionResponse
13+
14+
_PII_DETECTION_ENDPOINT = Endpoint("llmopstenant_/api/pii-detection")
15+
16+
# PII detection over documents/files can be slow, so override the default
17+
# httpx client timeout (30s) with a longer per-request timeout.
18+
_PII_DETECTION_TIMEOUT = 290.0
19+
20+
21+
class PiiDetectionService(BaseService):
22+
"""Service for detecting PII via UiPath."""
23+
24+
def __init__(
25+
self,
26+
config: UiPathApiConfig,
27+
execution_context: UiPathExecutionContext,
28+
) -> None:
29+
super().__init__(config=config, execution_context=execution_context)
30+
31+
@traced(name="pii_detection_detect_pii", run_type="uipath")
32+
def detect_pii(self, request: PiiDetectionRequest) -> PiiDetectionResponse:
33+
"""Detect PII in the provided documents and/or files.
34+
35+
Args:
36+
request: The PII detection request payload.
37+
38+
Returns:
39+
The PII detection response.
40+
"""
41+
spec = self._pii_detection_spec(request)
42+
response = self.request(
43+
spec.method,
44+
url=spec.endpoint,
45+
json=spec.json,
46+
headers=spec.headers,
47+
scoped="tenant",
48+
timeout=_PII_DETECTION_TIMEOUT,
49+
)
50+
return PiiDetectionResponse.model_validate(response.json())
51+
52+
@traced(name="pii_detection_detect_pii", run_type="uipath")
53+
async def detect_pii_async(
54+
self, request: PiiDetectionRequest
55+
) -> PiiDetectionResponse:
56+
"""Detect PII in the provided documents and/or files (async).
57+
58+
Args:
59+
request: The PII detection request payload.
60+
61+
Returns:
62+
The PII detection response.
63+
"""
64+
spec = self._pii_detection_spec(request)
65+
response = await self.request_async(
66+
spec.method,
67+
url=spec.endpoint,
68+
json=spec.json,
69+
headers=spec.headers,
70+
scoped="tenant",
71+
timeout=_PII_DETECTION_TIMEOUT,
72+
)
73+
return PiiDetectionResponse.model_validate(response.json())
74+
75+
def _pii_detection_spec(self, request: PiiDetectionRequest) -> RequestSpec:
76+
return RequestSpec(
77+
method="POST",
78+
endpoint=_PII_DETECTION_ENDPOINT,
79+
json=request.model_dump(by_alias=True, exclude_none=True),
80+
)
Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
"""Public Pydantic models for the PiiDetection service."""
2+
3+
from typing import Optional
4+
5+
from pydantic import BaseModel, ConfigDict, Field
6+
7+
8+
class PiiDocument(BaseModel):
9+
"""A text document to scan for PII."""
10+
11+
id: str
12+
role: str
13+
document: str
14+
15+
16+
class PiiFile(BaseModel):
17+
"""A file reference to scan for PII."""
18+
19+
model_config = ConfigDict(populate_by_name=True)
20+
21+
file_name: str = Field(alias="fileName")
22+
file_url: str = Field(alias="fileUrl")
23+
file_type: str = Field(alias="fileType")
24+
25+
26+
class PiiEntityThreshold(BaseModel):
27+
"""Per-entity confidence threshold override."""
28+
29+
model_config = ConfigDict(populate_by_name=True)
30+
31+
category: str = Field(alias="pii-entity-category")
32+
confidence_threshold: float = Field(alias="pii-entity-confidence-threshold")
33+
34+
35+
class PiiDetectionRequest(BaseModel):
36+
"""Request payload for the PII detection endpoint."""
37+
38+
model_config = ConfigDict(populate_by_name=True)
39+
40+
documents: Optional[list[PiiDocument]] = None
41+
files: Optional[list[PiiFile]] = None
42+
language_code: Optional[str] = Field(default=None, alias="languageCode")
43+
confidence_threshold: Optional[float] = Field(
44+
default=None, alias="confidenceThreshold"
45+
)
46+
entity_thresholds: Optional[list[PiiEntityThreshold]] = Field(
47+
default=None, alias="entityThresholds"
48+
)
49+
50+
51+
class PiiEntity(BaseModel):
52+
"""A single detected PII entity."""
53+
54+
model_config = ConfigDict(populate_by_name=True)
55+
56+
pii_text: str = Field(alias="piiText")
57+
replacement_text: str = Field(alias="replacementText")
58+
pii_type: str = Field(alias="piiType")
59+
offset: int
60+
confidence_score: float = Field(alias="confidenceScore")
61+
62+
63+
class PiiDocumentResult(BaseModel):
64+
"""PII detection result for a single document."""
65+
66+
model_config = ConfigDict(populate_by_name=True)
67+
68+
id: str
69+
role: str
70+
masked_document: str = Field(alias="maskedDocument")
71+
initial_document: str = Field(alias="initialDocument")
72+
pii_entities: list[PiiEntity] = Field(default_factory=list, alias="piiEntities")
73+
74+
75+
class PiiFileResult(BaseModel):
76+
"""PII detection result for a single file (fileUrl is the redacted URL)."""
77+
78+
model_config = ConfigDict(populate_by_name=True)
79+
80+
file_name: str = Field(alias="fileName")
81+
file_url: str = Field(alias="fileUrl")
82+
pii_entities: list[PiiEntity] = Field(default_factory=list, alias="piiEntities")
83+
84+
85+
class PiiDetectionResponse(BaseModel):
86+
"""Response payload from the PII detection endpoint."""
87+
88+
model_config = ConfigDict(populate_by_name=True)
89+
90+
response: list[PiiDocumentResult] = Field(default_factory=list)
91+
files: list[PiiFileResult] = Field(default_factory=list)
Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
"""Utility methods for working with PII data.
2+
3+
Python port of UiPath.SemanticProxy.Client.PiiUtilities (C#).
4+
"""
5+
6+
import re
7+
from typing import Callable, Iterable
8+
9+
from .pii_detection import PiiDetectionResponse, PiiEntity
10+
11+
12+
def rehydrate_from_pii_entities(
13+
masked_text: str, pii_entities: Iterable[PiiEntity]
14+
) -> str:
15+
"""Rehydrate masked text by replacing PII placeholders with original values.
16+
17+
Placeholders (e.g. ``[Person-1]``) are matched case-insensitively and replaced
18+
with the corresponding original PII text. The function also replaces variants
19+
without the surrounding brackets (e.g. ``Person-1``) in case the LLM stripped
20+
them in its output.
21+
22+
Args:
23+
masked_text: The masked text with PII placeholders.
24+
pii_entities: The PII entities containing the original values.
25+
26+
Returns:
27+
The rehydrated text with original PII values.
28+
"""
29+
if not masked_text:
30+
return masked_text
31+
32+
entities = [e for e in pii_entities if e.replacement_text]
33+
if not entities:
34+
return masked_text
35+
36+
# Sort by replacement text length descending to avoid substring collisions
37+
# (e.g. "[Person-10]" must be replaced before "[Person-1]").
38+
entities.sort(key=lambda e: len(e.replacement_text), reverse=True)
39+
40+
rehydrated = masked_text
41+
for entity in entities:
42+
if not entity.replacement_text or not entity.pii_text:
43+
continue
44+
# Replace the full placeholder (with brackets) case-insensitively.
45+
# ``_literal_replacer`` bypasses regex backreference interpretation in the
46+
# replacement string.
47+
rehydrated = re.sub(
48+
re.escape(entity.replacement_text),
49+
_literal_replacer(entity.pii_text),
50+
rehydrated,
51+
flags=re.IGNORECASE,
52+
)
53+
# Also replace the content without brackets (in case the LLM dropped them).
54+
if entity.replacement_text.startswith("[") and entity.replacement_text.endswith(
55+
"]"
56+
):
57+
no_brackets = entity.replacement_text[1:-1]
58+
rehydrated = re.sub(
59+
re.escape(no_brackets),
60+
_literal_replacer(entity.pii_text),
61+
rehydrated,
62+
flags=re.IGNORECASE,
63+
)
64+
65+
return rehydrated
66+
67+
68+
def _literal_replacer(replacement: str) -> Callable[[re.Match[str]], str]:
69+
"""Return a replacement function that ignores regex backreference syntax."""
70+
71+
def replace(_match: re.Match[str]) -> str:
72+
return replacement
73+
74+
return replace
75+
76+
77+
def rehydrate_from_pii_response(
78+
masked_text: str, response: PiiDetectionResponse
79+
) -> str:
80+
"""Rehydrate masked text using all PII entities from a detection response.
81+
82+
Merges entities from both ``response.response`` (detected in documents/prompts)
83+
and ``response.files`` (detected in files), so placeholders originating from
84+
either source are rehydrated.
85+
86+
Args:
87+
masked_text: The masked text with PII placeholders.
88+
response: The PII detection response containing entities to rehydrate.
89+
90+
Returns:
91+
The rehydrated text with original PII values.
92+
"""
93+
entities: list[PiiEntity] = []
94+
for doc in response.response:
95+
entities.extend(doc.pii_entities)
96+
for file in response.files:
97+
entities.extend(file.pii_entities)
98+
return rehydrate_from_pii_entities(masked_text, entities)

packages/uipath-platform/src/uipath/platform/semantic_proxy/pii_utilities.py

Lines changed: 2 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@
33
Python port of UiPath.SemanticProxy.Client.PiiUtilities (C#).
44
"""
55

6-
import json
76
import re
87
from typing import Callable, Iterable
98

@@ -42,13 +41,12 @@ def rehydrate_from_pii_entities(
4241
for entity in entities:
4342
if not entity.replacement_text or not entity.pii_text:
4443
continue
45-
escaped_pii = _add_escape_characters(entity.pii_text)
4644
# Replace the full placeholder (with brackets) case-insensitively.
4745
# ``_literal_replacer`` bypasses regex backreference interpretation in the
4846
# replacement string.
4947
rehydrated = re.sub(
5048
re.escape(entity.replacement_text),
51-
_literal_replacer(escaped_pii),
49+
_literal_replacer(entity.pii_text),
5250
rehydrated,
5351
flags=re.IGNORECASE,
5452
)
@@ -59,7 +57,7 @@ def rehydrate_from_pii_entities(
5957
no_brackets = entity.replacement_text[1:-1]
6058
rehydrated = re.sub(
6159
re.escape(no_brackets),
62-
_literal_replacer(escaped_pii),
60+
_literal_replacer(entity.pii_text),
6361
rehydrated,
6462
flags=re.IGNORECASE,
6563
)
@@ -98,18 +96,3 @@ def rehydrate_from_pii_response(
9896
for file in response.files:
9997
entities.extend(file.pii_entities)
10098
return rehydrate_from_pii_entities(masked_text, entities)
101-
102-
103-
def _add_escape_characters(text: str) -> str:
104-
"""Escape special characters in text using JSON serialization.
105-
106-
Mirrors C# ``AddEscapeCharacters`` — serializes as JSON then strips the
107-
surrounding quotes to get the escaped content.
108-
"""
109-
if not text:
110-
return ""
111-
try:
112-
serialized = json.dumps(text, ensure_ascii=False)
113-
return serialized[1:-1]
114-
except (TypeError, ValueError):
115-
return text

0 commit comments

Comments
 (0)