Skip to content

Commit 9e04128

Browse files
viswa-uipathclaude
andcommitted
fix(governance): PR-118 review feedback + default mode DISABLED
Addresses all 7 Copilot review comments on PR #118 and switches the default enforcement mode so empty-policy tenants pay zero per-call audit overhead. PR-118 review comments: - policy_api_client docstring no longer claims "one retry on transient failures" — _get_once is and remains single-shot by design. - Policy fetch GET drops Content-Type: application/json (was sent via json_body=True). Strict origin servers can 415 on unexpected Content-Type for GETs; the helper's own docstring recommends omitting it on reads. - _extract_governable_text dumper loop now CONTINUES instead of BREAKS when model_dump() raises, so dict() is tried as documented ("fall through to other extractors"). - loader.get_policy_index distinguishes "prefetch did not complete in Xs" from "prefetch completed but produced no PolicyIndex" — prod triage can now tell a hung fetch from an auth / parse failure. - disabled_guardrails defensively re-checks mapped_to_uipath=True AND policy_enabled=False on every guardrail_fallback condition. Matches the function's docstring and protects against multi-condition rules or any future code path that bypasses the evaluator gate. - request_governance pre-checks UIPATH_ACCESS_TOKEN and skips when missing. Sending without a bearer guarantees a 401 per compensation call and pollutes logs; mirrors the org-id / tenant-id skip pattern already in place. - AuditManager.flush(timeout=...) now honors its timeout via a time.monotonic() poll loop and warns if drain doesn't complete. Previously called queue.Queue.join() with no timeout argument, allowing indefinite block — risky at process exit where _cleanup_audit_manager supplies a 2-second timeout that was being silently ignored. Default enforcement mode: - get_enforcement_mode default fallback flipped from AUDIT to DISABLED. The server-supplied mode (applied by the policy loader on every successful fetch) still wins; the env-var override still works. Empty-policy / failed-fetch / pre-fetch tenants now short-circuit at evaluator.py:332 with no _emit_audit call, no OTel spans, no AuditManager queue traffic. Previously these scenarios silently fell through to AUDIT and produced ~40 empty governance spans per turn for an N=10 LLM-call agent. Tests (245 passing, +7 new): - test_enforcement_mode_default.py pins the resolution order (programmatic > env > DISABLED default) and the invalid-env-falls-back-to-DISABLED behavior. - test_request_governance_skipped_when_token_missing pins the new bearer-token skip path. - _govern_env fixture now sets UIPATH_ACCESS_TOKEN; the headers test asserts the Authorization header is present (was a side-effect of the no-token test, which is now moved out). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent dfffa6c commit 9e04128

8 files changed

Lines changed: 225 additions & 46 deletions

File tree

src/uipath/runtime/governance/audit/base.py

Lines changed: 29 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -532,18 +532,40 @@ def emit_session_end(
532532
def flush(self, timeout: float = 5.0) -> None:
533533
"""Flush all pending events and sinks.
534534
535-
In async mode, waits for the event queue to drain before
536-
flushing individual sinks.
535+
In async mode, polls the queue until it drains or ``timeout``
536+
seconds elapse, whichever comes first. ``queue.Queue.join`` has
537+
no timeout argument — using it would block indefinitely on a
538+
wedged sink, which defeats the bounded-shutdown contract that
539+
:func:`_cleanup_audit_manager` relies on at process exit.
537540
538541
Args:
539542
timeout: Maximum seconds to wait for queue to drain (default 5.0)
540543
"""
541544
if self._async_mode:
542-
# Wait for queue to drain
543-
try:
544-
self._queue.join()
545-
except Exception:
546-
pass
545+
import time
546+
547+
deadline = time.monotonic() + max(0.0, timeout)
548+
poll_interval = min(0.05, timeout) if timeout > 0 else 0.0
549+
while time.monotonic() < deadline:
550+
try:
551+
if self._queue.unfinished_tasks == 0:
552+
break
553+
except Exception: # noqa: BLE001 - queue introspection is best-effort
554+
break
555+
time.sleep(poll_interval)
556+
else:
557+
# Loop didn't break — drain timed out. Log so a wedged
558+
# sink is surfaced rather than swallowed.
559+
try:
560+
pending = self._queue.unfinished_tasks
561+
except Exception: # noqa: BLE001
562+
pending = -1
563+
if pending:
564+
logger.warning(
565+
"Audit queue did not drain within %.2fs "
566+
"(unfinished tasks=%s); sink may be wedged",
567+
timeout, pending,
568+
)
547569

548570
with self._sinks_lock:
549571
sinks = list(self._sinks)

src/uipath/runtime/governance/config.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -37,19 +37,24 @@ def get_enforcement_mode() -> EnforcementMode:
3737
The mode is cached after first read. Resolution order:
3838
3939
1. A value previously set via :func:`set_enforcement_mode` (the
40-
policy loader calls this with the backend-supplied mode).
40+
policy loader calls this with the backend-supplied mode on every
41+
successful policy fetch — that's the canonical source).
4142
2. ``UIPATH_GOVERNANCE_MODE`` env var (developer override).
42-
3. Default :attr:`EnforcementMode.AUDIT` — log but never block.
43+
3. Default :attr:`EnforcementMode.DISABLED` — skip evaluation
44+
entirely until the server explicitly opts the tenant in. This
45+
keeps empty-policy / failed-fetch / pre-fetch scenarios free of
46+
per-call audit overhead; a tenant with policies wins the cache
47+
on the first ``set_enforcement_mode`` call from the loader.
4348
"""
4449
global _enforcement_mode
4550
if _enforcement_mode is not None:
4651
return _enforcement_mode
4752

48-
mode_str = os.getenv(ENV_ENFORCEMENT_MODE, "audit").lower()
53+
mode_str = os.getenv(ENV_ENFORCEMENT_MODE, "disabled").lower()
4954
try:
5055
_enforcement_mode = EnforcementMode(mode_str)
5156
except ValueError:
52-
_enforcement_mode = EnforcementMode.AUDIT
57+
_enforcement_mode = EnforcementMode.DISABLED
5358

5459
return _enforcement_mode
5560

src/uipath/runtime/governance/native/guardrail_compensation.py

Lines changed: 40 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import atexit
2929
import json
3030
import logging
31+
import os
3132
import threading
3233
import urllib.error
3334
import urllib.request
@@ -37,6 +38,7 @@
3738
from uipath.runtime.governance.native.backend_client import (
3839
BACKEND_REQUEST_TIMEOUT_SECONDS,
3940
COMPENSATION_MAX_WORKERS,
41+
ENV_ACCESS_TOKEN,
4042
ENV_ORGANIZATION_ID,
4143
ENV_TENANT_ID,
4244
GOVERN_API_PATH,
@@ -132,19 +134,31 @@ def disabled_guardrails(audit: Any, policy_index: Any) -> list[FiredRule]:
132134
continue
133135
for check in rule.checks:
134136
for cond in check.conditions:
135-
if cond.operator == "guardrail_fallback" and isinstance(
136-
cond.value, dict
137-
):
138-
validator = str(cond.value.get("validator", ""))
139-
if validator:
140-
out.append(
141-
{
142-
"ruleId": ev.rule_id,
143-
"ruleName": ev.rule_name,
144-
"packName": getattr(rule, "pack_name", "") or "",
145-
"validator": validator,
146-
}
147-
)
137+
if cond.operator != "guardrail_fallback":
138+
continue
139+
if not isinstance(cond.value, dict):
140+
continue
141+
# The ``guardrail_fallback`` operator at evaluation time
142+
# only matches when ``mapped_to_uipath=True`` AND
143+
# ``policy_enabled=False``. We re-check here defensively
144+
# so a future code path that bypasses the evaluator (or
145+
# a multi-condition rule that fired on a sibling check)
146+
# can't trigger a compensation call for a guardrail
147+
# that isn't actually disabled.
148+
if not bool(cond.value.get("mapped_to_uipath", False)):
149+
continue
150+
if bool(cond.value.get("policy_enabled", True)):
151+
continue
152+
validator = str(cond.value.get("validator", ""))
153+
if validator:
154+
out.append(
155+
{
156+
"ruleId": ev.rule_id,
157+
"ruleName": ev.rule_name,
158+
"packName": getattr(rule, "pack_name", "") or "",
159+
"validator": validator,
160+
}
161+
)
148162
return out
149163

150164

@@ -303,6 +317,19 @@ def request_governance(
303317
)
304318
return
305319

320+
# Bearer token is required by the backend; sending without one
321+
# produces a 401 per call and pollutes logs. Skip cleanly when the
322+
# token isn't present (e.g. local dev, missing host bootstrap)
323+
# rather than burning quota on guaranteed auth failures.
324+
if not os.environ.get(ENV_ACCESS_TOKEN):
325+
logger.warning(
326+
"Govern call skipped: %s is not set in the environment; "
327+
"compensation requires a bearer token. validators=[%s]",
328+
ENV_ACCESS_TOKEN,
329+
", ".join(validators),
330+
)
331+
return
332+
306333
try:
307334
payload = json.dumps(
308335
{

src/uipath/runtime/governance/native/loader.py

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -142,11 +142,21 @@ def get_policy_index() -> PolicyIndex:
142142
completed = event.wait(timeout=_PREFETCH_WAIT_SECONDS)
143143
if completed and _policy_index is not None:
144144
return _policy_index
145-
logger.warning(
146-
"Policy prefetch did not complete in %.1fs; "
147-
"agent will run without any policies",
148-
_PREFETCH_WAIT_SECONDS,
149-
)
145+
if not completed:
146+
logger.warning(
147+
"Policy prefetch did not complete in %.1fs; "
148+
"agent will run without any policies",
149+
_PREFETCH_WAIT_SECONDS,
150+
)
151+
else:
152+
# Distinguish from the timeout path so production triage
153+
# can tell "prefetch hung" from "prefetch returned empty"
154+
# (auth failure, server error, parse failure).
155+
logger.warning(
156+
"Policy prefetch completed but produced no PolicyIndex "
157+
"(see prior WARN for the root cause); agent will run "
158+
"without any policies"
159+
)
150160
_policy_index = PolicyIndex()
151161
return _policy_index
152162

src/uipath/runtime/governance/native/policy_api_client.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,12 @@
1818
is the YAML the evaluator compiles into a :class:`PolicyIndex`.
1919
2020
Failure mode is fail-open: when the organization id is unknown, the
21-
access token is missing, the backend errors (one retry on transient
22-
failures), or the body can't be parsed, the caller falls back to an
23-
empty PolicyIndex. Nothing in this module ever raises to the caller.
21+
access token is missing, the backend errors, or the body can't be
22+
parsed, the caller falls back to an empty PolicyIndex. The fetch is
23+
single-shot (no retry by design — see :func:`_get_once`) so a slow
24+
backend can't extend agent startup beyond
25+
:data:`BACKEND_REQUEST_TIMEOUT_SECONDS`. Nothing in this module ever
26+
raises to the caller.
2427
"""
2528

2629
from __future__ import annotations
@@ -147,7 +150,10 @@ def _fetch_policy_response_inner() -> PolicyResponse | None:
147150
)
148151
return None
149152

150-
headers = governance_request_headers(json_body=True)
153+
# Policy fetch is a GET; ``json_body=False`` so ``Content-Type`` is
154+
# omitted. Strict origin servers may 415 on unexpected Content-Type
155+
# for GETs (see :func:`governance_request_headers` docstring).
156+
headers = governance_request_headers(json_body=False)
151157
headers[TENANT_HEADER] = tenant_id
152158
logger.info("Policy fetch starting (org=%s, tenant=%s)", org_id, tenant_id)
153159

src/uipath/runtime/governance/wrapper.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,10 @@ def _extract_governable_text(
138138
return ""
139139

140140
# Pydantic / dataclass-like shapes are easier to walk via their
141-
# dict form than via attribute introspection.
141+
# dict form than via attribute introspection. If the first dumper
142+
# raises (e.g. ``model_dump`` blows up on a partial pydantic v1
143+
# model), fall through to the next one rather than abandoning the
144+
# whole pydantic/dataclass path.
142145
for dumper in ("model_dump", "dict"):
143146
fn = getattr(value, dumper, None)
144147
if callable(fn):
@@ -150,8 +153,8 @@ def _extract_governable_text(
150153
depth=depth + 1,
151154
latest_only=latest_only,
152155
)
153-
except Exception: # noqa: BLE001 - fall through to other extractors
154-
break
156+
except Exception: # noqa: BLE001 - try the next dumper
157+
continue
155158

156159
obj_id = id(value)
157160
if seen is None:
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
"""Tests for the default enforcement-mode resolution.
2+
3+
The default is :attr:`EnforcementMode.DISABLED` — until the policy
4+
loader successfully fetches a backend response and calls
5+
``set_enforcement_mode`` with the server-supplied value, governance
6+
short-circuits cheaply with no per-call audit overhead.
7+
8+
Resolution order (per :func:`get_enforcement_mode`):
9+
1. Previously-cached programmatic value (set via ``set_enforcement_mode``).
10+
2. ``UIPATH_GOVERNANCE_MODE`` env var.
11+
3. Default ``DISABLED``.
12+
"""
13+
14+
from __future__ import annotations
15+
16+
import pytest
17+
18+
from uipath.runtime.governance.config import (
19+
EnforcementMode,
20+
get_enforcement_mode,
21+
reset_enforcement_mode,
22+
set_enforcement_mode,
23+
)
24+
25+
26+
@pytest.fixture(autouse=True)
27+
def _isolate_mode(monkeypatch: pytest.MonkeyPatch):
28+
"""Each test starts from a clean module-state slate."""
29+
monkeypatch.delenv("UIPATH_GOVERNANCE_MODE", raising=False)
30+
reset_enforcement_mode()
31+
yield
32+
reset_enforcement_mode()
33+
34+
35+
def test_default_mode_is_disabled() -> None:
36+
"""No programmatic mode + no env var → DISABLED.
37+
38+
Replaces the prior AUDIT default. Empty-policy / failed-fetch /
39+
pre-fetch tenants pay zero audit overhead until the backend
40+
explicitly enables governance on the next policy fetch.
41+
"""
42+
assert get_enforcement_mode() is EnforcementMode.DISABLED
43+
44+
45+
def test_env_var_audit_wins_over_default(monkeypatch: pytest.MonkeyPatch) -> None:
46+
"""Developer override via env var still works."""
47+
monkeypatch.setenv("UIPATH_GOVERNANCE_MODE", "audit")
48+
reset_enforcement_mode() # clear cached default
49+
assert get_enforcement_mode() is EnforcementMode.AUDIT
50+
51+
52+
def test_env_var_enforce_wins_over_default(monkeypatch: pytest.MonkeyPatch) -> None:
53+
monkeypatch.setenv("UIPATH_GOVERNANCE_MODE", "enforce")
54+
reset_enforcement_mode()
55+
assert get_enforcement_mode() is EnforcementMode.ENFORCE
56+
57+
58+
def test_invalid_env_var_falls_back_to_disabled(
59+
monkeypatch: pytest.MonkeyPatch,
60+
) -> None:
61+
monkeypatch.setenv("UIPATH_GOVERNANCE_MODE", "garbage-value")
62+
reset_enforcement_mode()
63+
assert get_enforcement_mode() is EnforcementMode.DISABLED
64+
65+
66+
def test_programmatic_set_wins_over_env_and_default(
67+
monkeypatch: pytest.MonkeyPatch,
68+
) -> None:
69+
"""The policy loader's ``set_enforcement_mode`` call is canonical."""
70+
monkeypatch.setenv("UIPATH_GOVERNANCE_MODE", "audit")
71+
set_enforcement_mode(EnforcementMode.ENFORCE)
72+
assert get_enforcement_mode() is EnforcementMode.ENFORCE
73+
74+
75+
def test_reset_returns_to_default() -> None:
76+
"""``reset_enforcement_mode`` clears the cache so the default re-applies."""
77+
set_enforcement_mode(EnforcementMode.ENFORCE)
78+
assert get_enforcement_mode() is EnforcementMode.ENFORCE
79+
reset_enforcement_mode()
80+
assert get_enforcement_mode() is EnforcementMode.DISABLED
81+
82+
83+
def test_disabled_mode_is_cached_after_first_read() -> None:
84+
"""First call computes; subsequent calls hit the cache."""
85+
assert get_enforcement_mode() is EnforcementMode.DISABLED
86+
# A second call returns the same instance — the cache survives.
87+
assert get_enforcement_mode() is EnforcementMode.DISABLED

tests/test_guardrail_compensation.py

Lines changed: 29 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -89,15 +89,17 @@ def _reset_enforcement_mode():
8989

9090
@pytest.fixture
9191
def _govern_env(monkeypatch):
92-
"""Provide the org/tenant env vars that request_governance requires.
92+
"""Provide the env vars that request_governance requires.
9393
94-
The compensating call now mirrors the policy fetch — it skips when
95-
``UIPATH_ORGANIZATION_ID`` / ``UIPATH_TENANT_ID`` are missing.
96-
Tests that need the network path to actually fire must opt into
97-
this fixture.
94+
The compensating call mirrors the policy fetch — it skips when
95+
``UIPATH_ORGANIZATION_ID`` / ``UIPATH_TENANT_ID`` /
96+
``UIPATH_ACCESS_TOKEN`` are missing (sending without a bearer
97+
token would generate a guaranteed 401 per call). Tests that need
98+
the network path to actually fire must opt into this fixture.
9899
"""
99100
monkeypatch.setenv("UIPATH_ORGANIZATION_ID", "appsdev")
100101
monkeypatch.setenv("UIPATH_TENANT_ID", "tenant-xyz")
102+
monkeypatch.setenv("UIPATH_ACCESS_TOKEN", "test-token")
101103
yield
102104

103105

@@ -215,9 +217,8 @@ def test_request_governance_posts_expected_payload_and_returns_none(
215217
}
216218

217219

218-
def test_request_governance_sends_shared_headers(monkeypatch, _govern_env):
219-
"""Headers must come from the shared helper — UA + Accept + Content-Type."""
220-
monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False)
220+
def test_request_governance_sends_shared_headers(_govern_env):
221+
"""Headers must come from the shared helper — UA + Accept + Content-Type + Auth."""
221222
with patch.object(
222223
guardrail_compensation.urllib.request,
223224
"urlopen",
@@ -232,8 +233,8 @@ def test_request_governance_sends_shared_headers(monkeypatch, _govern_env):
232233
assert request_arg.get_header("Accept") == "application/json"
233234
assert request_arg.get_header("Content-type") == "application/json"
234235
assert request_arg.get_header("User-agent") == USER_AGENT
235-
# No token in env → no Authorization header.
236-
assert request_arg.get_header("Authorization") is None
236+
# Bearer is required (see ``test_request_governance_skipped_when_token_missing``).
237+
assert request_arg.get_header("Authorization") == "Bearer test-token"
237238
# Tenant header must travel on the compensating POST (same as the
238239
# policy GET) — the agenticgovernance ingress validates it.
239240
assert request_arg.get_header("X-uipath-internal-tenantid") == "tenant-xyz"
@@ -252,6 +253,24 @@ def test_request_governance_includes_bearer_token_when_set(monkeypatch, _govern_
252253
assert request_arg.get_header("Authorization") == "Bearer the-token"
253254

254255

256+
def test_request_governance_skipped_when_token_missing(monkeypatch):
257+
"""Missing bearer → skip cleanly instead of sending a guaranteed-401 request.
258+
259+
Sending without a token would produce a 401 per compensation event
260+
and pollute logs. Mirrors the org-id / tenant-id skip paths above.
261+
"""
262+
monkeypatch.setenv("UIPATH_ORGANIZATION_ID", "appsdev")
263+
monkeypatch.setenv("UIPATH_TENANT_ID", "tenant-xyz")
264+
monkeypatch.delenv("UIPATH_ACCESS_TOKEN", raising=False)
265+
with patch.object(
266+
guardrail_compensation.urllib.request, "urlopen"
267+
) as mock_urlopen:
268+
request_governance(_rules("x"), {}, "before_model", "t", "ts", "a", "r")
269+
assert not mock_urlopen.called, (
270+
"request_governance must NOT POST when bearer token is missing"
271+
)
272+
273+
255274
def test_request_governance_skipped_when_org_id_missing(monkeypatch):
256275
"""Without an org id, we cannot build the URL — skip the call entirely."""
257276
monkeypatch.delenv("UIPATH_ORGANIZATION_ID", raising=False)

0 commit comments

Comments
 (0)