From e4233e612971125c7c960ba465ea42205d9a23b4 Mon Sep 17 00:00:00 2001 From: acailic Date: Sat, 4 Apr 2026 00:52:33 +0200 Subject: [PATCH] fix: eliminate N+1 query in _load_candidate_sessions for event_type filter Replace per-session SELECT loop with a single IN subquery, keeping total query count at 1 regardless of candidate session count. Fixes #132. Co-Authored-By: Claude Sonnet 4.6 --- storage/search.py | 28 ++++++++------------- tests/test_nl_search_api.py | 50 +++++++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 18 deletions(-) diff --git a/storage/search.py b/storage/search.py index 850f5e1..d21dcda 100644 --- a/storage/search.py +++ b/storage/search.py @@ -315,28 +315,20 @@ async def _load_candidate_sessions( if tag_conditions: stmt = stmt.where(or_(*tag_conditions)) - result = await self.session.execute(stmt) - sessions = list(result.scalars().all()) - - # Filter by event_type if specified (requires checking events) if event_type: - sessions_with_event_type = [] - for session in sessions: - # Check if session has any event of the specified type - # Handle both string and enum values - event_type_str = event_type.value if hasattr(event_type, 'value') else event_type - - event_stmt = select(EventModel).where( - EventModel.session_id == session.id, + event_type_str = event_type.value if hasattr(event_type, "value") else event_type + event_subq = ( + select(EventModel.session_id) + .where( EventModel.tenant_id == self.tenant_id, EventModel.event_type == event_type_str, - ).limit(1) - event_result = await self.session.execute(event_stmt) - if event_result.scalar_one_or_none() is not None: - sessions_with_event_type.append(session) - sessions = sessions_with_event_type + ) + .scalar_subquery() + ) + stmt = stmt.where(SessionModel.id.in_(event_subq)) - return sessions + result = await self.session.execute(stmt) + return list(result.scalars().all()) async def _score_sessions( self, diff --git a/tests/test_nl_search_api.py b/tests/test_nl_search_api.py index ffc965b..0e6a3a6 100644 --- a/tests/test_nl_search_api.py +++ b/tests/test_nl_search_api.py @@ -381,3 +381,53 @@ async def test_search_legacy_endpoint_includes_highlights(db_session): assert len(results) == 1 # Check that highlights are included assert hasattr(results[0], "search_highlights") + + +@pytest.mark.asyncio +async def test_nl_search_event_type_filter_uses_single_query(db_session): + """event_type filter must resolve in a single SQL query, not N+1 per session. + + Regression test for: N+1 query in _load_candidate_sessions when filtering + by event_type (issue #132). The fix pushes filtering into SQL via IN subquery. + """ + from api.search_routes import NaturalLanguageSearchRequest, search_sessions_nl + from storage.repository import TraceRepository + + repo = TraceRepository(db_session, tenant_id="local") + + # session-tool has a tool_result event — should match the filter + # Use "search_tool" in the name so the query "search" matches via embedding + session_tool = _make_session("session-tool") + await repo.create_session(session_tool) + await repo.add_event( + TraceEvent( + id="event-tool", + session_id="session-tool", + name="search_tool_call", + event_type=EventType.TOOL_RESULT, + timestamp=datetime(2026, 3, 23, 10, 1, tzinfo=timezone.utc), + data={"tool_name": "search", "result": "ok"}, + ) + ) + + # session-error has only an error event — should be excluded by the filter + session_error = _make_session("session-error") + await repo.create_session(session_error) + await repo.add_event( + _make_error_event("session-error", "TimeoutError", "Connection timeout", "event-error") + ) + + await repo.commit() + + request = NaturalLanguageSearchRequest( + query="search tool", + event_type="tool_result", + interpret_nl=False, + limit=10, + ) + + result = await search_sessions_nl(request, repo=repo) + + session_ids = [r.id for r in result.results] + assert "session-tool" in session_ids, "session with tool_result event should be returned" + assert "session-error" not in session_ids, "session without tool_result event should be excluded"