Address CodeRabbit review feedback for chat context and compaction.

dpage · claude · dpage · commit 11ea048a2fee · 2026-03-12T10:53:55.000Z
- Track tool-use turns as groups instead of one-to-one pairs, so
  multi-tool assistant messages don't leave orphaned results.
- Add fallback to shrink the recent window when protected messages
  alone exceed the token budget, preventing compaction no-ops.
- Fix low-value test fixtures to keep transient messages short so
  they actually classify as low-importance.
- Guard Clear button against in-flight stream race conditions by
  adding a clearedRef flag and cancelling active streams.
- Assert that conversation history is actually passed through to
  chat_with_database in the "With History" test.

Co-Authored-By: Claude Opus 4.6 &lt;noreply@anthropic.com&gt;
diff --git a/web/pgadmin/llm/compaction.py b/web/pgadmin/llm/compaction.py
@@ -182,34 +182,41 @@ def _classify_message(message: Message) -> float:
     return CLASS_CONTEXTUAL
 
 
-def _find_tool_pair_indices(messages: list[Message]) -> dict[int, int]:
-    """Find indices of tool_call/tool_result pairs that must stay together.
+def _find_tool_pair_indices(
+    messages: list[Message]
+) -> dict[int, frozenset[int]]:
+    """Find indices of tool_call/tool_result groups that must stay together.
 
-    Returns a mapping where both the assistant message index and the
-    tool result message index map to each other, so removing one
-    implies removing both.
+    An assistant message may contain multiple tool_calls, each with a
+    corresponding tool result message. All messages in such a group
+    must be dropped or kept together.
+
+    Returns a mapping where every index in a group maps to the full
+    set of indices in that group.
 
     Args:
         messages: The message list.
 
     Returns:
-        Dict mapping index -> paired index.
+        Dict mapping index -> frozenset of all indices in the group.
     """
-    pairs = {}
+    groups: dict[int, frozenset[int]] = {}
 
     for i, msg in enumerate(messages):
         if msg.role == Role.ASSISTANT and msg.tool_calls:
-            # Find the corresponding tool result(s)
             tool_call_ids = {tc.id for tc in msg.tool_calls}
+            group_indices = {i}
             for j in range(i + 1, len(messages)):
                 if messages[j].role == Role.TOOL:
                     for tr in messages[j].tool_results:
                         if tr.tool_call_id in tool_call_ids:
-                            pairs[i] = j
-                            pairs[j] = i
+                            group_indices.add(j)
                             break
+            group = frozenset(group_indices)
+            for idx in group:
+                groups[idx] = group
 
-    return pairs
+    return groups
 
 
 def compact_history(
@@ -257,8 +264,21 @@ def compact_history(
     for i in range(recent_start, total):
         protected.add(i)
 
-    # Find tool pairs
-    tool_pairs = _find_tool_pair_indices(messages)
+    # If protected messages alone exceed the budget, shrink the
+    # recent window until we have room for compaction candidates.
+    while recent_window > 0:
+        protected_tokens = sum(
+            estimate_message_tokens(messages[i], provider)
+            for i in protected
+        )
+        if protected_tokens <= max_tokens:
+            break
+        recent_window -= 1
+        recent_start = max(1, total - recent_window)
+        protected = {0} | set(range(recent_start, total))
+
+    # Find tool groups
+    tool_groups = _find_tool_pair_indices(messages)
 
     # Classify and score all non-protected messages
     candidates = []
@@ -276,7 +296,7 @@ def compact_history(
         if current_tokens <= max_tokens:
             break
 
-        # Skip if already dropped (as part of a pair)
+        # Skip if already dropped (as part of a group)
         if idx in dropped:
             continue
 
@@ -288,12 +308,14 @@ def compact_history(
         saved = estimate_message_tokens(messages[idx], provider)
         dropped.add(idx)
 
-        # If this is part of a tool pair, drop the partner too
-        if idx in tool_pairs:
-            partner = tool_pairs[idx]
-            if partner not in protected:
-                saved += estimate_message_tokens(messages[partner], provider)
-                dropped.add(partner)
+        # If this is part of a tool group, drop all partners too
+        if idx in tool_groups:
+            for partner in tool_groups[idx]:
+                if partner != idx and partner not in protected:
+                    saved += estimate_message_tokens(
+                        messages[partner], provider
+                    )
+                    dropped.add(partner)
 
         current_tokens -= saved
 
@@ -308,13 +330,13 @@ def compact_history(
             saved = estimate_message_tokens(messages[idx], provider)
             dropped.add(idx)
 
-            if idx in tool_pairs:
-                partner = tool_pairs[idx]
-                if partner not in protected:
-                    saved += estimate_message_tokens(
-                        messages[partner], provider
-                    )
-                    dropped.add(partner)
+            if idx in tool_groups:
+                for partner in tool_groups[idx]:
+                    if partner != idx and partner not in protected:
+                        saved += estimate_message_tokens(
+                            messages[partner], provider
+                        )
+                        dropped.add(partner)
 
             current_tokens -= saved
 
diff --git a/web/pgadmin/llm/tests/test_compaction.py b/web/pgadmin/llm/tests/test_compaction.py
@@ -137,15 +137,16 @@ def test_preserves_first_and_recent(self):
 
     def test_drops_low_value(self):
         """Low-value messages should be dropped first."""
-        # Use longer messages to ensure we exceed the token budget
+        # Filler only on important messages to inflate token count;
+        # keep transient messages short so they classify as low-value.
         filler = ' This is extra text to increase token count.' * 5
         messages = [
             Message.user('First important query' + filler),
-            # Short transient messages (low value)
-            Message.user('ok' + filler),
-            Message.assistant('ok' + filler),
-            Message.user('thanks' + filler),
-            Message.assistant('sure' + filler),
+            # Short transient messages (low value) - no filler
+            Message.user('ok'),
+            Message.assistant('ok'),
+            Message.user('thanks'),
+            Message.assistant('sure'),
             # More substantial messages
             Message.user('Show me the schema with CREATE TABLE' + filler),
             Message.assistant(
@@ -166,6 +167,10 @@ def test_drops_low_value(self):
         self.assertIn('First important query', result[0].content)
         # Last 2 preserved
         self.assertIn('Final answer with details', result[-1].content)
+        # Transient messages should be dropped
+        contents = [m.content for m in result]
+        for short_msg in ['ok', 'thanks', 'sure']:
+            self.assertNotIn(short_msg, contents)
 
     def test_tool_pairs(self):
         """Tool call/result pairs should be dropped together."""
diff --git a/web/pgadmin/tools/sqleditor/static/js/components/sections/NLQChatPanel.jsx b/web/pgadmin/tools/sqleditor/static/js/components/sections/NLQChatPanel.jsx
@@ -292,6 +292,7 @@ export function NLQChatPanel() {
   const abortControllerRef = useRef(null);
   const readerRef = useRef(null);
   const stoppedRef = useRef(false);
+  const clearedRef = useRef(false);
   const eventBus = useContext(QueryToolEventsContext);
   const queryToolCtx = useContext(QueryToolContext);
   const editorPrefs = usePreferences().getPreferencesForModule('editor');
@@ -406,9 +407,21 @@ export function NLQChatPanel() {
   };
 
   const handleClearConversation = () => {
+    // Mark as cleared so in-flight stream handlers ignore late events
+    clearedRef.current = true;
+    // Cancel any active stream
+    if (readerRef.current) {
+      readerRef.current.cancel();
+      readerRef.current = null;
+    }
+    if (abortControllerRef.current) {
+      abortControllerRef.current.abort();
+      abortControllerRef.current = null;
+    }
     setMessages([]);
     setConversationId(null);
     setConversationHistory([]);
+    setIsLoading(false);
   };
 
   // Stop the current request
@@ -446,8 +459,9 @@ export function NLQChatPanel() {
   const handleSubmit = async () => {
     if (!inputValue.trim() || isLoading) return;
 
-    // Reset stopped flag
+    // Reset stopped and cleared flags
     stoppedRef.current = false;
+    clearedRef.current = false;
 
     // Fetch latest LLM provider/model info before submitting
     fetchLlmInfo();
@@ -548,8 +562,8 @@ export function NLQChatPanel() {
 
       readerRef.current = null;
 
-      // Check if user manually stopped
-      if (stoppedRef.current) {
+      // Check if user manually stopped (but not cleared)
+      if (stoppedRef.current && !clearedRef.current) {
         setMessages((prev) => [
           ...prev.filter((m) => m.id !== thinkingId),
           {
@@ -562,8 +576,10 @@ export function NLQChatPanel() {
       clearTimeout(timeoutId);
       abortControllerRef.current = null;
       readerRef.current = null;
-      // Show appropriate message based on error type
-      if (error.name === 'AbortError') {
+      // If conversation was cleared, ignore all late errors
+      if (clearedRef.current) {
+        // Do nothing - conversation was wiped
+      } else if (error.name === 'AbortError') {
         // Check if this was a user-initiated stop or a timeout
         if (stoppedRef.current) {
           // User manually stopped
diff --git a/web/pgadmin/tools/sqleditor/tests/test_nlq_chat.py b/web/pgadmin/tools/sqleditor/tests/test_nlq_chat.py
@@ -109,12 +109,14 @@ def runTest(self):
         patches.append(mock_check_trans)
 
         # Mock chat_with_database
+        mock_chat_patcher = None
+        mock_chat_obj = None
         if hasattr(self, 'mock_response'):
-            mock_chat = patch(
+            mock_chat_patcher = patch(
                 'pgadmin.llm.chat.chat_with_database',
                 return_value=(self.mock_response, [])
             )
-            patches.append(mock_chat)
+            patches.append(mock_chat_patcher)
 
         # Mock CSRF protection
         mock_csrf = patch(
@@ -124,8 +126,12 @@ def runTest(self):
         patches.append(mock_csrf)
 
         # Start all patches
+        started_mocks = []
         for p in patches:
-            p.start()
+            m = p.start()
+            started_mocks.append(m)
+            if p is mock_chat_patcher:
+                mock_chat_obj = m
 
         try:
             # Make request
@@ -156,6 +162,19 @@ def runTest(self):
                 self.assertEqual(response.status_code, 200)
                 self.assertIn('text/event-stream', response.content_type)
 
+                # Verify history was passed to chat_with_database
+                if hasattr(self, 'history') and mock_chat_obj:
+                    mock_chat_obj.assert_called_once()
+                    call_kwargs = mock_chat_obj.call_args.kwargs
+                    conv_hist = call_kwargs.get(
+                        'conversation_history', []
+                    )
+                    self.assertTrue(
+                        len(conv_hist) > 0,
+                        'conversation_history should be non-empty '
+                        'when history is provided'
+                    )
+
         finally:
             # Stop all patches
             for p in patches: