Skip to content

Commit 29ebae3

Browse files
committed
Fix tenant isolation and test harness leaks
1 parent 6484891 commit 29ebae3

16 files changed

Lines changed: 483 additions & 108 deletions

benchmarks/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
run_replay_determinism_session,
1414
run_safety_escalation_session,
1515
)
16+
from .seed_enrichment import SESSION_ENRICHMENT, validate_session_enrichment
1617

1718
__all__ = [
1819
"SeedSession",
@@ -26,4 +27,6 @@
2627
"run_looping_behavior_session",
2728
"run_failure_cluster_session",
2829
"run_replay_determinism_session",
30+
"SESSION_ENRICHMENT",
31+
"validate_session_enrichment",
2932
]

benchmarks/seed_enrichment.py

Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
"""Curated enrichment metadata for demo benchmark sessions."""
2+
3+
from __future__ import annotations
4+
5+
SESSION_ENRICHMENT = {
6+
"seed-prompt-injection": {
7+
"total_tokens": 856,
8+
"total_cost_usd": 0.0042,
9+
"retention_tier": "summarized",
10+
"fix_note": "Added input sanitization and prompt boundary checks",
11+
"errors": 0,
12+
"behavior_alerts": 1,
13+
},
14+
"seed-evidence-grounding": {
15+
"total_tokens": 140,
16+
"total_cost_usd": 0.0021,
17+
"retention_tier": "summarized",
18+
"fix_note": None,
19+
"errors": 0,
20+
"behavior_alerts": 0,
21+
},
22+
"seed-multi-agent-dialogue": {
23+
"total_tokens": 412,
24+
"total_cost_usd": 0.0038,
25+
"retention_tier": "summarized",
26+
"fix_note": None,
27+
"errors": 0,
28+
"behavior_alerts": 0,
29+
},
30+
"seed-prompt-policy-shift": {
31+
"total_tokens": 164,
32+
"total_cost_usd": 0.0028,
33+
"retention_tier": "summarized",
34+
"fix_note": "Added policy consistency checks across turns",
35+
"errors": 0,
36+
"behavior_alerts": 1,
37+
},
38+
"seed-safety-escalation": {
39+
"total_tokens": 1987,
40+
"total_cost_usd": 0.0142,
41+
"retention_tier": "full",
42+
"fix_note": "Added output validation after tool call",
43+
"errors": 1,
44+
"behavior_alerts": 1,
45+
},
46+
"seed-looping-behavior": {
47+
"total_tokens": 1245,
48+
"total_cost_usd": 0.0089,
49+
"retention_tier": "summarized",
50+
"fix_note": "Added max iteration limit with circuit breaker",
51+
"errors": 0,
52+
"behavior_alerts": 2,
53+
},
54+
"seed-failure-cluster": {
55+
"total_tokens": 1567,
56+
"total_cost_usd": 0.0112,
57+
"retention_tier": "full",
58+
"fix_note": "Added pre-call validation and error recovery",
59+
"errors": 0,
60+
"behavior_alerts": 1,
61+
},
62+
"seed-replay-determinism": {
63+
"total_tokens": 289,
64+
"total_cost_usd": 0.0031,
65+
"retention_tier": "summarized",
66+
"fix_note": None,
67+
"errors": 0,
68+
"behavior_alerts": 0,
69+
},
70+
}
71+
72+
73+
def validate_session_enrichment(session_id: str, enrichment: dict[str, object]) -> None:
74+
"""Validate curated enrichment metrics for demo seed sessions."""
75+
total_tokens = enrichment.get("total_tokens")
76+
total_cost_usd = enrichment.get("total_cost_usd")
77+
78+
if not isinstance(total_tokens, int) or total_tokens <= 0:
79+
raise ValueError(f"Seed enrichment for {session_id} must define positive total_tokens")
80+
81+
if not isinstance(total_cost_usd, (int, float)) or float(total_cost_usd) <= 0:
82+
raise ValueError(f"Seed enrichment for {session_id} must define positive total_cost_usd")

collector/server.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,6 +160,19 @@ def _get_redaction_pipeline() -> RedactionPipeline:
160160
return RedactionPipeline.from_config()
161161

162162

163+
def _resolve_session_id(requested_id: str | None) -> str:
164+
if requested_id is None:
165+
return str(uuid.uuid4())
166+
167+
if get_config().mode != "local":
168+
raise HTTPException(
169+
status_code=status.HTTP_400_BAD_REQUEST,
170+
detail="Explicit session IDs are only supported in local mode",
171+
)
172+
173+
return requested_id
174+
175+
163176
async def _persist_event_if_configured(
164177
event: TraceEvent,
165178
tenant_id: str = "local",
@@ -271,7 +284,7 @@ async def _create_session(
271284
) -> SessionResponse:
272285
deps = dependencies or _resolve_dependencies()
273286
session = Session(
274-
id=session_data.id or str(uuid.uuid4()),
287+
id=_resolve_session_id(session_data.id),
275288
agent_name=session_data.agent_name,
276289
framework=session_data.framework,
277290
config=session_data.config,

scripts/seed_demo_sessions.py

Lines changed: 6 additions & 83 deletions
Original file line numberDiff line numberDiff line change
@@ -16,96 +16,19 @@
1616

1717
from agent_debugger_sdk.core.context import configure_event_pipeline
1818
from agent_debugger_sdk.core.events import Checkpoint, Session, TraceEvent
19-
from benchmarks import DEFAULT_SEED_SESSION_IDS, iter_seed_scenarios
19+
from benchmarks import (
20+
DEFAULT_SEED_SESSION_IDS,
21+
SESSION_ENRICHMENT,
22+
iter_seed_scenarios,
23+
validate_session_enrichment,
24+
)
2025
from collector.buffer import get_event_buffer
2126
from collector.server import configure_storage
2227
from storage import Base, TraceRepository
2328
from storage.models import AnomalyAlertModel
2429

2530
DATABASE_URL = os.environ.get("AGENT_DEBUGGER_DB_URL", "sqlite+aiosqlite:///./data/agent_debugger.db")
2631

27-
# Session enrichment data: realistic values for demo sessions
28-
# Note: failure_count is computed in API layer (services.py) as errors count
29-
# behavior_alert_count is computed in API layer from AnomalyAlertModel records
30-
def validate_session_enrichment(session_id: str, enrichment: dict[str, object]) -> None:
31-
"""Validate curated enrichment metrics for demo seed sessions."""
32-
total_tokens = enrichment.get("total_tokens")
33-
total_cost_usd = enrichment.get("total_cost_usd")
34-
35-
if not isinstance(total_tokens, int) or total_tokens <= 0:
36-
raise ValueError(f"Seed enrichment for {session_id} must define positive total_tokens")
37-
38-
if not isinstance(total_cost_usd, (int, float)) or float(total_cost_usd) <= 0:
39-
raise ValueError(f"Seed enrichment for {session_id} must define positive total_cost_usd")
40-
41-
42-
SESSION_ENRICHMENT = {
43-
"seed-prompt-injection": {
44-
"total_tokens": 856,
45-
"total_cost_usd": 0.0042,
46-
"retention_tier": "summarized",
47-
"fix_note": "Added input sanitization and prompt boundary checks",
48-
"errors": 0,
49-
"behavior_alerts": 1,
50-
},
51-
"seed-evidence-grounding": {
52-
"total_tokens": 140,
53-
"total_cost_usd": 0.0021,
54-
"retention_tier": "summarized",
55-
"fix_note": None,
56-
"errors": 0,
57-
"behavior_alerts": 0,
58-
},
59-
"seed-multi-agent-dialogue": {
60-
"total_tokens": 412,
61-
"total_cost_usd": 0.0038,
62-
"retention_tier": "summarized",
63-
"fix_note": None,
64-
"errors": 0,
65-
"behavior_alerts": 0,
66-
},
67-
"seed-prompt-policy-shift": {
68-
"total_tokens": 164,
69-
"total_cost_usd": 0.0028,
70-
"retention_tier": "summarized",
71-
"fix_note": "Added policy consistency checks across turns",
72-
"errors": 0,
73-
"behavior_alerts": 1,
74-
},
75-
"seed-safety-escalation": {
76-
"total_tokens": 1987,
77-
"total_cost_usd": 0.0142,
78-
"retention_tier": "full",
79-
"fix_note": "Added output validation after tool call",
80-
"errors": 1,
81-
"behavior_alerts": 1,
82-
},
83-
"seed-looping-behavior": {
84-
"total_tokens": 1245,
85-
"total_cost_usd": 0.0089,
86-
"retention_tier": "summarized",
87-
"fix_note": "Added max iteration limit with circuit breaker",
88-
"errors": 0,
89-
"behavior_alerts": 2,
90-
},
91-
"seed-failure-cluster": {
92-
"total_tokens": 1567,
93-
"total_cost_usd": 0.0112,
94-
"retention_tier": "full",
95-
"fix_note": "Added pre-call validation and error recovery",
96-
"errors": 0,
97-
"behavior_alerts": 1,
98-
},
99-
"seed-replay-determinism": {
100-
"total_tokens": 289,
101-
"total_cost_usd": 0.0031,
102-
"retention_tier": "summarized",
103-
"fix_note": None,
104-
"errors": 0,
105-
"behavior_alerts": 0,
106-
},
107-
}
108-
10932

11033
def validate_session_metrics(total_tokens: int, total_cost_usd: float, *, context: str) -> None:
11134
"""Validate curated session metrics before persisting demo seed data."""

storage/repositories/session_repo.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,12 @@
44

55
from typing import Any
66

7-
from sqlalchemy import func, select
7+
from sqlalchemy import delete, func, select
88
from sqlalchemy.ext.asyncio import AsyncSession
99

1010
from agent_debugger_sdk.core.events import Session
1111
from storage.converters import orm_to_session
12-
from storage.models import SessionModel
12+
from storage.models import AnomalyAlertModel, SessionModel
1313

1414

1515
class SessionRepository:
@@ -195,5 +195,11 @@ async def delete_session(self, session_id: str) -> bool:
195195
if db_session is None:
196196
return False
197197

198+
await self.session.execute(
199+
delete(AnomalyAlertModel).where(
200+
AnomalyAlertModel.session_id == session_id,
201+
AnomalyAlertModel.tenant_id == self.tenant_id,
202+
)
203+
)
198204
await self.session.delete(db_session)
199205
return True

0 commit comments

Comments
 (0)