Skip to content

Commit e097858

Browse files
acailicclaude
andcommitted
fix: address code review findings — critical, major, and minor issues
Fix 19 findings from comprehensive code review across SDK, API, storage, and frontend: - Fix cosine_similarity division by zero for near-zero vectors - Fix unsafe datetime parsing with explicit error handling - Fix SSE silent parse failures with user-visible notifications - Fix N+1 queries in event_type filtering and entity extraction - Fix timezone inconsistency in pattern_repo (now UTC) - Add session count limit and logging for entity extraction - Fix session store dead code in jumpToSearchResult - Add max-size limit to request deduplication Map - Change HindsightConfig.enabled default to False (opt-in) - Add FileExporter path traversal validation - Extract hardcoded limits to config constants - Make SSE timeout configurable via env var - Fix async dependency injection in analytics routes - Add migration downgrade idempotency check - Remove duplicate step-backward button in SessionReplay Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 05d4240 commit e097858

19 files changed

Lines changed: 313 additions & 81 deletions

File tree

agent_debugger_sdk/adapters/hindsight.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ class HindsightConfig:
3434
bank_id: str = "agent_debugger"
3535
api_key: str | None = None
3636
timeout_seconds: float = 30.0
37-
enabled: bool = True
37+
enabled: bool = False
3838

3939
# Memory type mappings
4040
experience_memory_type: str = "failure_experience"

agent_debugger_sdk/core/exporters/file.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,12 +142,25 @@ def __init__(self, base_dir: str | Path | None = None):
142142
Args:
143143
base_dir: Base directory for storing exported data.
144144
Defaults to ~/.peaky-peek/memory
145+
146+
Raises:
147+
ValueError: If the provided base_dir path is outside the intended directory.
145148
"""
146149
if base_dir is None:
147150
home = Path.home()
148151
base_dir = home / ".peaky-peek" / "memory"
149152

150-
self.base_dir = Path(base_dir)
153+
# Resolve and validate the path
154+
resolved_path = Path(base_dir).resolve()
155+
156+
# Reject paths with directory traversal components
157+
if ".." in Path(base_dir).parts or "~" in Path(base_dir).parts:
158+
raise ValueError(
159+
f"Invalid base_dir: {base_dir}. "
160+
"Path must not contain directory traversal components."
161+
)
162+
163+
self.base_dir = resolved_path
151164
self.sessions_dir = self.base_dir / "sessions"
152165
self.patterns_dir = self.base_dir / "patterns"
153166
self.entities_dir = self.base_dir / "entities"

api/analytics_routes.py

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,11 @@
55
from datetime import datetime, timedelta, timezone
66
from typing import Any, Literal
77

8-
from fastapi import APIRouter, Query
8+
from fastapi import APIRouter, Depends, Query
99
from pydantic import BaseModel, ConfigDict, Field
1010

1111
from api.analytics_db import get_aggregates, get_daily_breakdown, record_event
12+
from api.dependencies import get_repository
1213

1314
router = APIRouter(tags=["analytics"])
1415

@@ -248,17 +249,13 @@ async def get_patterns(
248249
status: str | None = Query(default="active", description="Filter by status"),
249250
hours: int | None = Query(default=None, description="Only return patterns from last N hours"),
250251
limit: int = Query(default=50, ge=1, le=200, description="Maximum results to return"),
252+
repo=Depends(get_repository),
251253
) -> PatternsListResponse:
252254
"""Get detected patterns across sessions.
253255
254256
Returns patterns matching the specified filters, ordered by detection time.
255257
Supports filtering by agent, pattern type, severity, status, and time range.
256258
"""
257-
from api.dependencies import get_repository
258-
from storage import TraceRepository
259-
260-
repo: TraceRepository = get_repository()
261-
262259
# Use the pattern repository through the session
263260
from storage.repositories.pattern_repo import PatternRepository
264261

@@ -310,16 +307,14 @@ async def get_patterns(
310307
@router.get("/api/analytics/patterns/{pattern_id}", response_model=PatternDetailResponse)
311308
async def get_pattern_detail(
312309
pattern_id: str,
310+
repo=Depends(get_repository),
313311
) -> PatternDetailResponse:
314312
"""Get detailed information about a specific pattern.
315313
316314
Returns full pattern details including affected sessions list.
317315
"""
318-
from api.dependencies import get_repository
319-
from storage import TraceRepository
320316
from storage.repositories.pattern_repo import PatternRepository
321317

322-
repo: TraceRepository = get_repository()
323318
pattern_repo = PatternRepository(repo.session, repo.tenant_id)
324319

325320
pattern = await pattern_repo.get_pattern(pattern_id)
@@ -352,6 +347,7 @@ async def get_pattern_detail(
352347
async def get_health_report(
353348
agent_name: str | None = Query(default=None, description="Filter by agent name"),
354349
hours: int | None = Query(default=24, ge=1, le=168, description="Look at patterns from last N hours"),
350+
repo=Depends(get_repository),
355351
) -> HealthReportSchema:
356352
"""Generate agent health report from detected patterns.
357353
@@ -363,12 +359,9 @@ async def get_health_report(
363359
- Top issues with recommendations
364360
- Actionable next steps
365361
"""
366-
from api.dependencies import get_repository
367362
from collector.patterns import generate_health_report
368-
from storage import TraceRepository
369363
from storage.repositories.pattern_repo import PatternRepository
370364

371-
repo: TraceRepository = get_repository()
372365
pattern_repo = PatternRepository(repo.session, repo.tenant_id)
373366

374367
# Get recent patterns

api/replay_routes.py

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -48,9 +48,13 @@ async def replay_session(
4848

4949
# FastAPI resolves Query defaults in HTTP calls but not in direct/unit-test calls.
5050
# Extract the default value when the raw Query object is passed through.
51-
if hasattr(collapse_threshold, "default"):
51+
# Check for the Query class by examining if the value has a 'default' attribute
52+
# and is not already a primitive type.
53+
from fastapi import params
54+
55+
if isinstance(collapse_threshold, params.Query):
5256
collapse_threshold = float(collapse_threshold.default)
53-
if hasattr(stop_at_breakpoint, "default"):
57+
if isinstance(stop_at_breakpoint, params.Query):
5458
stop_at_breakpoint = bool(stop_at_breakpoint.default)
5559

5660
# Record analytics event (fire-and-forget)

api/search_routes.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -164,21 +164,21 @@ async def search_sessions_nl(
164164
if request.min_errors is not None:
165165
filters_applied["min_errors"] = request.min_errors
166166

167-
# Parse datetime filters
167+
# Parse datetime filters with explicit error handling
168168
started_after = None
169169
started_before = None
170170
if request.started_after:
171171
try:
172172
started_after = datetime.fromisoformat(request.started_after.replace("Z", "+00:00"))
173173
filters_applied["started_after"] = request.started_after
174-
except ValueError:
175-
pass # Invalid datetime, ignore filter
174+
except ValueError as e:
175+
raise ValueError(f"Invalid started_after datetime '{request.started_after}': {e}")
176176
if request.started_before:
177177
try:
178178
started_before = datetime.fromisoformat(request.started_before.replace("Z", "+00:00"))
179179
filters_applied["started_before"] = request.started_before
180-
except ValueError:
181-
pass # Invalid datetime, ignore filter
180+
except ValueError as e:
181+
raise ValueError(f"Invalid started_before datetime '{request.started_before}': {e}")
182182

183183
# Perform search with all filters
184184
sessions = await repo.search_sessions(

api/services.py

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import asyncio
66
import json
77
import logging
8+
import os
89
from typing import Any
910

1011
from sqlalchemy import String, cast, or_, select
@@ -24,6 +25,7 @@
2425
logger = logging.getLogger(__name__)
2526
SESSION_ANALYSIS_CAP = 100
2627
FAILURE_SIMILARITY_THRESHOLD = 0.5
28+
DEFAULT_SSE_TIMEOUT = int(os.getenv("AGENT_DEBUGGER_SSE_TIMEOUT", "300"))
2729

2830

2931
def normalize_session(
@@ -213,13 +215,16 @@ async def enrich_sessions_for_listing(
213215
if sort_by != "replay_value":
214216
return [normalize_session(session) for session in sessions]
215217

216-
capped_sessions = sessions[:SESSION_ANALYSIS_CAP]
217218
if len(sessions) > SESSION_ANALYSIS_CAP:
218219
logger.warning(
219-
"Replay-value enrichment capped at %s sessions for one response page",
220+
"Replay-value enrichment capped at %s sessions for one response page (has %s sessions). "
221+
"Sessions beyond the cap will be returned without replay_value enrichment.",
220222
SESSION_ANALYSIS_CAP,
223+
len(sessions),
221224
)
222225

226+
capped_sessions = sessions[:SESSION_ANALYSIS_CAP]
227+
223228
# Parallelize session analysis for better performance
224229
# Note: We still analyze sessions to get enrichment data like representative_event_id,
225230
# but the replay_value itself may be cached from a previous analysis
@@ -333,15 +338,18 @@ async def event_generator(
333338
session_id: str,
334339
*,
335340
buffer: EventBuffer | None = None,
336-
max_connection_time: int = 300,
341+
max_connection_time: int | None = None,
337342
):
338343
"""Generate SSE events for a session.
339344
340345
Args:
341346
session_id: Session ID to stream events for
342347
buffer: Optional event buffer (uses default if None)
343-
max_connection_time: Maximum connection time in seconds (default 300)
348+
max_connection_time: Maximum connection time in seconds (default from
349+
AGENT_DEBUGGER_SSE_TIMEOUT env var, 300 if not set)
344350
"""
351+
if max_connection_time is None:
352+
max_connection_time = DEFAULT_SSE_TIMEOUT
345353
import time
346354

347355
buf = buffer or get_event_buffer()

api/session_routes.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
from storage import TraceRepository
3838

3939
router = APIRouter(tags=["sessions"])
40+
DEFAULT_EXPORT_CHECKPOINTS_LIMIT = 1000
4041

4142

4243
def _normalize_datetime(value: datetime | None) -> datetime | None:
@@ -212,7 +213,7 @@ async def export_session(
212213
session = await require_session(repo, session_id)
213214
events = await repo.list_events(session_id, limit=MAX_TRACES_PER_REQUEST)
214215
# Add limit to checkpoint loading to prevent unbounded queries
215-
checkpoints = await repo.list_checkpoints(session_id, limit=1000)
216+
checkpoints = await repo.list_checkpoints(session_id, limit=DEFAULT_EXPORT_CHECKPOINTS_LIMIT)
216217

217218
export_data = {
218219
"export_version": "1.0",
Lines changed: 175 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,175 @@
1+
# Code Review Report: Full Recent Changes (Last 20 Commits)
2+
3+
**Date:** 2026-04-04
4+
**Reviewers:** 4 parallel code-reviewer agents (SDK, API, Storage, Frontend)
5+
**Scope:** ~14,800 lines across 149 files
6+
**Status:** COMPLETE
7+
8+
---
9+
10+
## Executive Summary
11+
12+
| Domain | Files | Lines | Critical | Major | Minor | Nitpick | Grade |
13+
|--------|-------|-------|----------|-------|-------|---------|-------|
14+
| SDK + Exporters + Adapters | 21 | ~1,620 | 0 | 2 | 3 | 2 | **Conditional** |
15+
| API + Services + Schemas | 10 | ~945 | 1 | 3 | 4 | 4 | **B+** |
16+
| Storage + Search + Collector | 16 | ~2,171 | 1 | 3 | 1 | 0 | **7.5/10** |
17+
| Frontend + Integration | 38 | ~4,641 | 2 | 3 | 3 | 2 | **Pass** |
18+
| **TOTAL** | **85** | **~9,377** | **4** | **11** | **11** | **8** | |
19+
20+
**Verdict:** The codebase is in good shape overall. Security posture is strong (tenant isolation, SQL injection protection, input validation). The 4 critical and 11 major findings should be addressed before the next release. No data loss or security vulnerability was found.
21+
22+
---
23+
24+
## Critical Findings (4)
25+
26+
| # | Domain | File | Line(s) | Issue | Fix |
27+
|---|--------|------|---------|-------|-----|
28+
| C1 | API | `api/search_routes.py` | 171-179 | Unsafe datetime parsing with silent failure on invalid ISO strings | Use Pydantic datetime types or explicit error handling for `started_after`/`started_before` |
29+
| C2 | Frontend | `frontend/src/App.tsx` | 285-287 | SSE JSON parse failure silently swallows events without alerting user | Add user-visible notification after N consecutive parse failures |
30+
| C3 | Frontend | `frontend/src/components/SimilarFailuresPanel.tsx` | 66 | `ignore` flag doesn't account for rapid sessionId changes — stale state updates | Add sessionId to dependency array or use AbortController |
31+
| C4 | Storage | `storage/search.py` | 317-339 | Division by zero risk in cosine_similarity — near-zero vectors not protected | Add explicit check: `if magnitude < 1e-10: return 0.0` before division |
32+
33+
---
34+
35+
## Major Findings (11)
36+
37+
| # | Domain | File | Line(s) | Issue | Fix |
38+
|---|--------|------|---------|-------|-----|
39+
| M1 | API | `api/entity_routes.py` | 72 | N+1 query: `extract_entities_from_all_sessions()` loads ALL events across ALL sessions | Implement caching or pre-computed entity tables |
40+
| M2 | API | `api/services.py` | 226 | Parallel session analysis cap at 100 may silently drop enrichment data | Log warning BEFORE truncation |
41+
| M3 | API | `api/services.py` | 477-485 | Similar failures query uses OR clause with multiple ILIKE without index | Add index on `(tenant_id, event_type)`, consider full-text search |
42+
| M4 | Frontend | `frontend/src/stores/sessionStore.ts` | 231-234 | `jumpToSearchResult` dead code — sets replayMode but doesn't call inspectEvent | Remove conditional or add missing call |
43+
| M5 | Frontend | `frontend/src/api/client.ts` | 19 | Request deduplication Map grows unbounded | Add TTL or max-size limit |
44+
| M6 | Frontend | `frontend/src/components/DecisionTree.tsx` | 296-619 | Heavy D3 rendering without debouncing | Add render debouncing for large trees |
45+
| M7 | Storage | `storage/repositories/pattern_repo.py` | 70 | Naive datetime without timezone — `datetime.now()` inconsistent with UTC elsewhere | Use `datetime.now(timezone.utc)` |
46+
| M8 | Storage | `storage/search.py` | 283-318 | N+1 query in event_type filtering — separate query per session | Move to single JOIN or EXISTS clause |
47+
| M9 | Storage | `storage/search.py` | 265-279 | Inefficient JSON tag filtering with LIKE — false positives possible | Use proper JSON operators |
48+
| M10 | SDK | `agent_debugger_sdk/core/exporters/*` | N/A | **No test coverage** for 1,100+ lines of new code (file.py, insights.py, pipeline.py, hindsight.py) | Add unit tests before merge |
49+
| M11 | SDK | `agent_debugger_sdk/core/context/session_manager.py` | 28-74 | No thread safety — SessionManager can be called from concurrent contexts | Add asyncio.Lock or document as single-threaded only |
50+
51+
---
52+
53+
## Minor Findings (11)
54+
55+
| # | Domain | File | Issue |
56+
|---|--------|------|-------|
57+
| m1 | API | `api/analytics_routes.py:260` | `get_repository()` called without `await` on dependency |
58+
| m2 | API | `api/session_routes.py:215` | Hardcoded `limit=1000` without config constant |
59+
| m3 | API | `api/replay_routes.py:51-54` | Fragile workaround for FastAPI Query default extraction |
60+
| m4 | API | `api/services.py:332-384` | SSE 300s timeout not configurable |
61+
| m5 | Frontend | `frontend/src/components/WhyButton.tsx:68-87` | Doesn't differentiate timeout vs network error for retry |
62+
| m6 | Frontend | `frontend/src/App.tsx:602-631` | Global keyboard shortcuts may conflict with browser/input |
63+
| m7 | Frontend | `frontend/src/components/SessionReplay.tsx:183-193` | Duplicate step-backward button |
64+
| m8 | SDK | `agent_debugger_sdk/adapters/hindsight.py:68-71` | `HindsightConfig.enabled` defaults to `True` (should be opt-in) |
65+
| m9 | SDK | `agent_debugger_sdk/core/exporters/file.py:139-158` | File paths from `base_dir` not validated — path traversal risk |
66+
| m10 | SDK | `agent_debugger_sdk/cli.py:101-117` | `run_demo()` suppresses all process output (DEVNULL) |
67+
| m11 | Storage | `storage/migrations/versions/005_add_patterns.py:22-27` | Idempotency check only in upgrade, not downgrade |
68+
69+
---
70+
71+
## Security Assessment
72+
73+
| Check | Status | Notes |
74+
|-------|--------|-------|
75+
| SQL Injection | PASS | All queries use SQLAlchemy ORM with parameterization |
76+
| Tenant Isolation | PASS | All new queries properly scoped with `tenant_id` |
77+
| Input Validation | PASS | Pydantic models with Field() constraints, regex validation |
78+
| CORS | PASS | Configurable via env var, defaults to `*` (local-first tool) |
79+
| Localhost Protection | PASS | New collector/server.py localhost check |
80+
| Path Traversal | WARN | File exporter `base_dir` not validated (m9) |
81+
| CLI Input | WARN | No input validation documented (m10) |
82+
83+
---
84+
85+
## Type Drift Analysis
86+
87+
**No breaking type drift detected between frontend and backend.**
88+
89+
| Schema | Status |
90+
|--------|--------|
91+
| SessionSchema / Session | Match |
92+
| TraceEventSchema / TraceEvent | Match (all new fields aligned) |
93+
| CheckpointSchema / Checkpoint | Match |
94+
| ReplayResponse | Match |
95+
| SimilarFailuresResponse | Match |
96+
| SearchResponse | Match |
97+
| AnalyticsResponse | Match |
98+
| EntityItem | Backend-only (frontend types not yet added — expected) |
99+
100+
---
101+
102+
## Performance Concerns
103+
104+
| Priority | Area | Issue |
105+
|----------|------|-------|
106+
| HIGH | `api/entity_routes.py:72` | O(all_events) entity extraction on every request |
107+
| HIGH | `storage/search.py:283-318` | N+1 query in event_type filtering |
108+
| MEDIUM | `api/services.py:477-485` | ILIKE OR clause without index support |
109+
| MEDIUM | `frontend/src/api/client.ts:19` | Unbounded request deduplication Map |
110+
| MEDIUM | `frontend/src/components/DecisionTree.tsx` | D3 rendering without debouncing |
111+
112+
**Recommended indexes:** `(tenant_id, event_type)`, `(tenant_id, started_at)`
113+
114+
---
115+
116+
## Test Coverage Assessment
117+
118+
### Well-covered areas
119+
- Pattern detection (613 lines of tests)
120+
- NL search (421 lines of tests)
121+
- Entity extraction (339 lines of tests)
122+
- API contracts, collector regressions, session routes
123+
124+
### Gaps
125+
- **SDK exporters**: Zero tests for 1,100+ lines of new code (file.py, insights.py, pipeline.py, hindsight.py)
126+
- **DecisionTree component**: Complex D3 logic untested
127+
- **SSE reconnection logic**: Not tested
128+
- **WhyButton error states**: Incomplete coverage
129+
130+
---
131+
132+
## Recommended Actions
133+
134+
### Must Fix (Before Release)
135+
136+
| # | Issue | Effort | Domain |
137+
|---|-------|--------|--------|
138+
| 1 | Add test coverage for SDK exporters (M10) | 2-3 hours | SDK |
139+
| 2 | Fix cosine_similarity division by zero (C4) | 5 min | Storage |
140+
| 3 | Fix unsafe datetime parsing in search (C1) | 15 min | API |
141+
| 4 | Fix SSE silent parse failure (C2) | 15 min | Frontend |
142+
| 5 | Fix SimilarFailuresPanel stale state (C3) | 15 min | Frontend |
143+
144+
### Should Fix (Near-term)
145+
146+
| # | Issue | Effort | Domain |
147+
|---|-------|--------|--------|
148+
| 6 | Fix N+1 entity extraction query (M1) | 30 min | API |
149+
| 7 | Fix N+1 event_type filtering (M8) | 30 min | Storage |
150+
| 8 | Add database indexes for search (M3) | 15 min | Storage |
151+
| 9 | Fix timezone inconsistency in pattern_repo (M7) | 5 min | Storage |
152+
| 10 | Add thread safety or document limitation (M11) | 30 min | SDK |
153+
| 11 | Add TTL to request deduplication Map (M5) | 15 min | Frontend |
154+
| 12 | Debounce DecisionTree D3 rendering (M6) | 30 min | Frontend |
155+
156+
### Consider (Backlog)
157+
158+
| # | Issue | Effort | Domain |
159+
|---|-------|--------|--------|
160+
| 13 | Opt-in default for HindsightConfig (m8) | 2 min | SDK |
161+
| 14 | Validate FileExporter base_dir (m9) | 15 min | SDK |
162+
| 15 | Extract hardcoded limits to config (m2, m4) | 15 min | API |
163+
| 16 | Use proper JSON operators for tag filtering (M9) | 30 min | Storage |
164+
| 17 | Fix sessionStore dead code (M4) | 10 min | Frontend |
165+
| 18 | Remove duplicate step-backward button (m7) | 2 min | Frontend |
166+
167+
---
168+
169+
## Cross-Domain Observations
170+
171+
1. **Consistent architecture** — Repository pattern, tenant isolation, and separation of concerns are well-maintained across all domains.
172+
2. **Good error handling culture** — Most code paths handle errors gracefully, with proper wrapping and logging.
173+
3. **Test quality is high where it exists** — Tests are thorough with good edge case coverage.
174+
4. **Main risk is missing tests** — The SDK exporters represent 1,100+ lines of untested new code.
175+
5. **Performance will degrade at scale** — Several N+1 query patterns need attention before production workloads.

0 commit comments

Comments
 (0)