Skip to content

Commit 4cd08eb

Browse files
author
amabito
committed
fix(evaluators): budget R2 -- percent-encoding, fail-closed snapshot, dunder guard
R2 findings: - _sanitize_scope_value: percent-encode |/= instead of replacing with _ (was causing key collisions between "a|b" and "a_b") - max_buckets fail-closed: spent_usd/spent_tokens now 0.0/0 (not recorded, previously reported current-call-only values misleading callers) - _extract_by_path: narrowed guard from startswith("_") to startswith("__") (single-underscore dict keys are legitimate data fields) - Fixed tautological test assertion in test_scope_key_injection_pipe - Added 3 tests: no-collision, single-underscore access, NaN/Inf cost 57 budget tests, 287 total evaluator tests passing.
1 parent 8ae042f commit 4cd08eb

3 files changed

Lines changed: 38 additions & 9 deletions

File tree

evaluators/builtin/src/agent_control_evaluators/budget/evaluator.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,12 @@ def _derive_period_key(window: str | None) -> str:
4242

4343

4444
def _sanitize_scope_value(val: str) -> str:
45-
"""Remove pipe and equals from scope values to prevent key injection."""
46-
return val.replace("|", "_").replace("=", "_")
45+
"""Percent-encode pipe and equals in scope values to prevent key injection.
46+
47+
Uses percent-encoding (not replacement) to preserve round-trip
48+
distinction: "a|b" and "a_b" remain different keys.
49+
"""
50+
return val.replace("%", "%25").replace("|", "%7C").replace("=", "%3D")
4751

4852

4953
def _build_scope_key(
@@ -68,7 +72,7 @@ def _extract_by_path(data: Any, path: str) -> Any:
6872
"""Extract a value from nested data using dot-notation path."""
6973
current = data
7074
for part in path.split("."):
71-
if part.startswith("_"):
75+
if part.startswith("__"):
7276
return None
7377
if isinstance(current, dict):
7478
current = current.get(part)

evaluators/builtin/src/agent_control_evaluators/budget/store.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,12 @@ def record_and_check(
139139
bucket = self._buckets.get(key)
140140
if bucket is None:
141141
if len(self._buckets) >= self._max_buckets:
142-
# Fail-closed: treat as exceeded to prevent OOM
142+
# Fail-closed: treat as exceeded to prevent OOM.
143+
# spent fields are 0 because no bucket exists for
144+
# this scope+period -- the call is rejected, not recorded.
143145
return BudgetSnapshot(
144-
spent_usd=cost_usd,
145-
spent_tokens=input_tokens + output_tokens,
146+
spent_usd=0.0,
147+
spent_tokens=0,
146148
limit_usd=limit_usd,
147149
limit_tokens=limit_tokens,
148150
utilization=1.0,

evaluators/builtin/tests/budget/test_budget.py

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -412,15 +412,26 @@ def test_exceeded_at_exact_limit_tokens(self) -> None:
412412
def test_scope_key_injection_pipe(self) -> None:
413413
"""Pipe in metadata value must be sanitized, not create new scope dimension."""
414414
key = _build_scope_key({"ch": "slack"}, "uid", {"ch": "slack", "uid": "u1|ch=admin"})
415-
assert "|ch=admin" not in key.split("|")[-1] # injected dimension not present
416-
assert "u1_ch_admin" in key # sanitized
415+
parts = key.split("|")
416+
assert len(parts) == 2, f"Expected 2 parts, got {len(parts)}: {parts}"
417+
assert "ch=admin" not in parts # injected component not a segment
418+
419+
def test_scope_key_no_collision(self) -> None:
420+
"""Percent-encoding must keep 'a|b' distinct from 'a_b'."""
421+
key1 = _build_scope_key({}, "uid", {"uid": "a|b"})
422+
key2 = _build_scope_key({}, "uid", {"uid": "a_b"})
423+
assert key1 != key2
417424

418425
def test_max_buckets_prevents_oom(self) -> None:
419426
store = InMemoryBudgetStore(max_buckets=5)
427+
exceeded_count = 0
420428
for i in range(10):
421429
snap = store.record_and_check(f"scope-{i}", "p", 1, 1, 0.01, limit_usd=100.0)
422-
# After 5, new buckets are rejected with exceeded=True
430+
if snap.exceeded:
431+
exceeded_count += 1
432+
assert snap.spent_usd == 0.0 # not recorded, fail-closed
423433
assert len(store._buckets) == 5
434+
assert exceeded_count == 5 # scopes 5-9 rejected
424435

425436
@pytest.mark.asyncio
426437
async def test_per_without_metadata_skips_rule(self) -> None:
@@ -440,3 +451,15 @@ def test_extract_by_path_rejects_dunder(self) -> None:
440451
from agent_control_evaluators.budget.evaluator import _extract_by_path
441452
assert _extract_by_path({"a": 1}, "__class__") is None
442453
assert _extract_by_path({"a": {"__init__": 1}}, "a.__init__") is None
454+
455+
def test_extract_by_path_allows_single_underscore(self) -> None:
456+
"""Single-underscore keys in data dicts are legitimate."""
457+
from agent_control_evaluators.budget.evaluator import _extract_by_path
458+
assert _extract_by_path({"_metadata": {"tokens": 42}}, "_metadata.tokens") == 42
459+
460+
def test_extract_cost_rejects_nan_inf(self) -> None:
461+
"""NaN and Inf must not pass through as cost values."""
462+
from agent_control_evaluators.budget.evaluator import _extract_cost
463+
assert _extract_cost({"c": float("nan")}, "c") is None
464+
assert _extract_cost({"c": float("inf")}, "c") is None
465+
assert _extract_cost({"c": float("-inf")}, "c") is None

0 commit comments

Comments
 (0)