acailic · acailic · Apr 3, 2026 · Copilot · Apr 3, 2026
diff --git a/storage/search.py b/storage/search.py
@@ -315,28 +315,20 @@ async def _load_candidate_sessions(
             if tag_conditions:
                 stmt = stmt.where(or_(*tag_conditions))
 
-        result = await self.session.execute(stmt)
-        sessions = list(result.scalars().all())
-
-        # Filter by event_type if specified (requires checking events)
         if event_type:
-            sessions_with_event_type = []
-            for session in sessions:
-                # Check if session has any event of the specified type
-                # Handle both string and enum values
-                event_type_str = event_type.value if hasattr(event_type, 'value') else event_type
-
-                event_stmt = select(EventModel).where(
-                    EventModel.session_id == session.id,
+            event_type_str = event_type.value if hasattr(event_type, "value") else event_type
+            event_subq = (
+                select(EventModel.session_id)
+                .where(
                     EventModel.tenant_id == self.tenant_id,
                     EventModel.event_type == event_type_str,
-                ).limit(1)
-                event_result = await self.session.execute(event_stmt)
-                if event_result.scalar_one_or_none() is not None:
-                    sessions_with_event_type.append(session)
-            sessions = sessions_with_event_type
+                )
+                .scalar_subquery()
+            )
+            stmt = stmt.where(SessionModel.id.in_(event_subq))
 
-        return sessions
+        result = await self.session.execute(stmt)
+        return list(result.scalars().all())
 
     async def _score_sessions(
         self,

diff --git a/tests/test_nl_search_api.py b/tests/test_nl_search_api.py
@@ -381,3 +381,53 @@ async def test_search_legacy_endpoint_includes_highlights(db_session):
     assert len(results) == 1
     # Check that highlights are included
     assert hasattr(results[0], "search_highlights")
+
+
+@pytest.mark.asyncio
+async def test_nl_search_event_type_filter_uses_single_query(db_session):
+    """event_type filter must resolve in a single SQL query, not N+1 per session.
+
+    Regression test for: N+1 query in _load_candidate_sessions when filtering
+    by event_type (issue #132). The fix pushes filtering into SQL via IN subquery.
-async def test_nl_search_event_type_filter_uses_single_query(db_session):
-    """event_type filter must resolve in a single SQL query, not N+1 per session.
-
-    Regression test for: N+1 query in _load_candidate_sessions when filtering
-    by event_type (issue #132). The fix pushes filtering into SQL via IN subquery.
+async def test_nl_search_event_type_filter_returns_only_matching_sessions(db_session):
+    """Regression test for correct event_type filtering in NL search results.
+
+    This test verifies that sessions with a matching event_type are returned and
+    sessions without that event_type are excluded when searching via
+    search_sessions_nl. It does not assert SQL query counts.
-async def test_nl_search_event_type_filter_uses_single_query(db_session):
-    """event_type filter must resolve in a single SQL query, not N+1 per session.
-
-    Regression test for: N+1 query in _load_candidate_sessions when filtering
-    by event_type (issue #132). The fix pushes filtering into SQL via IN subquery.
+async def test_nl_search_event_type_filter_returns_only_matching_sessions(db_session):
+    """Regression test for correct event_type filtering in NL search results.
+
+    This test verifies that sessions with a matching event_type are returned and
+    sessions without that event_type are excluded when searching via
+    search_sessions_nl. It does not assert SQL query counts.
+    """
+    from api.search_routes import NaturalLanguageSearchRequest, search_sessions_nl
+    from storage.repository import TraceRepository
+
+    repo = TraceRepository(db_session, tenant_id="local")
+
+    # session-tool has a tool_result event — should match the filter
+    # Use "search_tool" in the name so the query "search" matches via embedding
+    session_tool = _make_session("session-tool")
+    await repo.create_session(session_tool)
+    await repo.add_event(
+        TraceEvent(
+            id="event-tool",
+            session_id="session-tool",
+            name="search_tool_call",
+            event_type=EventType.TOOL_RESULT,
+            timestamp=datetime(2026, 3, 23, 10, 1, tzinfo=timezone.utc),
+            data={"tool_name": "search", "result": "ok"},
+        )
+    )
+
+    # session-error has only an error event — should be excluded by the filter
+    session_error = _make_session("session-error")
+    await repo.create_session(session_error)
+    await repo.add_event(
+        _make_error_event("session-error", "TimeoutError", "Connection timeout", "event-error")
+    )
+
+    await repo.commit()
+
+    request = NaturalLanguageSearchRequest(
+        query="search tool",
+        event_type="tool_result",
+        interpret_nl=False,
+        limit=10,
+    )
+
+    result = await search_sessions_nl(request, repo=repo)
+
+    session_ids = [r.id for r in result.results]
+    assert "session-tool" in session_ids, "session with tool_result event should be returned"
+    assert "session-error" not in session_ids, "session without tool_result event should be excluded"