Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 10 additions & 18 deletions storage/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,28 +315,20 @@ async def _load_candidate_sessions(
if tag_conditions:
stmt = stmt.where(or_(*tag_conditions))

result = await self.session.execute(stmt)
sessions = list(result.scalars().all())

# Filter by event_type if specified (requires checking events)
if event_type:
sessions_with_event_type = []
for session in sessions:
# Check if session has any event of the specified type
# Handle both string and enum values
event_type_str = event_type.value if hasattr(event_type, 'value') else event_type

event_stmt = select(EventModel).where(
EventModel.session_id == session.id,
event_type_str = event_type.value if hasattr(event_type, "value") else event_type
event_subq = (
select(EventModel.session_id)
.where(
EventModel.tenant_id == self.tenant_id,
EventModel.event_type == event_type_str,
).limit(1)
event_result = await self.session.execute(event_stmt)
if event_result.scalar_one_or_none() is not None:
sessions_with_event_type.append(session)
sessions = sessions_with_event_type
)
.scalar_subquery()
)
stmt = stmt.where(SessionModel.id.in_(event_subq))

return sessions
result = await self.session.execute(stmt)
return list(result.scalars().all())

async def _score_sessions(
self,
Expand Down
50 changes: 50 additions & 0 deletions tests/test_nl_search_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -381,3 +381,53 @@ async def test_search_legacy_endpoint_includes_highlights(db_session):
assert len(results) == 1
# Check that highlights are included
assert hasattr(results[0], "search_highlights")


@pytest.mark.asyncio
async def test_nl_search_event_type_filter_uses_single_query(db_session):
"""event_type filter must resolve in a single SQL query, not N+1 per session.

Regression test for: N+1 query in _load_candidate_sessions when filtering
by event_type (issue #132). The fix pushes filtering into SQL via IN subquery.
Comment on lines +387 to +391

Copilot AI Apr 3, 2026

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test is named/documented as enforcing a single SQL query, but it never asserts query count. Also, because it calls search_sessions_nl, the search path still issues per-session event-loading queries during scoring/highlights, so a true “single query” assertion would be unreliable here. Consider either (a) renaming/updating the docstring to only assert correct filtering behavior, or (b) moving the regression to a unit-level test that targets _load_candidate_sessions directly and counts execute/cursor calls for that method only.

Suggested change
async def test_nl_search_event_type_filter_uses_single_query(db_session):
"""event_type filter must resolve in a single SQL query, not N+1 per session.
Regression test for: N+1 query in _load_candidate_sessions when filtering
by event_type (issue #132). The fix pushes filtering into SQL via IN subquery.
async def test_nl_search_event_type_filter_returns_only_matching_sessions(db_session):
"""Regression test for correct event_type filtering in NL search results.
This test verifies that sessions with a matching event_type are returned and
sessions without that event_type are excluded when searching via
search_sessions_nl. It does not assert SQL query counts.

Copilot uses AI. Check for mistakes.
"""
from api.search_routes import NaturalLanguageSearchRequest, search_sessions_nl
from storage.repository import TraceRepository

repo = TraceRepository(db_session, tenant_id="local")

# session-tool has a tool_result event — should match the filter
# Use "search_tool" in the name so the query "search" matches via embedding
session_tool = _make_session("session-tool")
await repo.create_session(session_tool)
await repo.add_event(
TraceEvent(
id="event-tool",
session_id="session-tool",
name="search_tool_call",
event_type=EventType.TOOL_RESULT,
timestamp=datetime(2026, 3, 23, 10, 1, tzinfo=timezone.utc),
data={"tool_name": "search", "result": "ok"},
)
)

# session-error has only an error event — should be excluded by the filter
session_error = _make_session("session-error")
await repo.create_session(session_error)
await repo.add_event(
_make_error_event("session-error", "TimeoutError", "Connection timeout", "event-error")
)

await repo.commit()

request = NaturalLanguageSearchRequest(
query="search tool",
event_type="tool_result",
interpret_nl=False,
limit=10,
)

result = await search_sessions_nl(request, repo=repo)

session_ids = [r.id for r in result.results]
assert "session-tool" in session_ids, "session with tool_result event should be returned"
assert "session-error" not in session_ids, "session without tool_result event should be excluded"
Loading