ThinkInAIXYZ
diff --git a/‎docs/specs/tool-output-guardrails/plan.md‎
Lines changed: 30 additions & 0 deletions b/‎docs/specs/tool-output-guardrails/plan.md‎
Lines changed: 30 additions & 0 deletions
diff --git a/‎docs/specs/tool-output-guardrails/spec.md‎
Lines changed: 18 additions & 0 deletions b/‎docs/specs/tool-output-guardrails/spec.md‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎src/main/presenter/deepchatAgentPresenter/dispatch.ts‎
Lines changed: 163 additions & 80 deletions b/‎src/main/presenter/deepchatAgentPresenter/dispatch.ts‎
Lines changed: 163 additions & 80 deletions
@@ -0,0 +1,30 @@
+# Tool Output Guardrails Plan
+
+## Summary
+
+- Keep the existing single-tool offload behavior.
+- Add batch fitting for tool results in the new session agent path only.
+- Preserve the largest prefix of tool results that can still fit the next model call.
+- Downgrade overflow tail results to the fixed context-window failure message before continuing.
+- Keep terminal error fallback when even the fully downgraded batch cannot fit.
+
+## Implementation
+
+- Extend `ToolOutputGuard` with a batch fitting helper that:
+  - evaluates the full staged batch against the context budget
+  - downgrades tail items one by one to the fixed failure message
+  - cleans up offload files for downgraded items
+  - returns terminal fallback if the fully downgraded batch still does not fit
+- Refactor `executeTools()` in `deepchatAgentPresenter/dispatch.ts` into two phases:
+  - execute tools and stage candidate outputs plus side effects
+  - fit the staged batch, then commit final tool messages, blocks, hooks, and search persistence once
+- Keep `question` and `permission` pauses on the immediate path; they are not part of staged batch fitting.
+- Keep deferred permission-resume behavior unchanged.
+
+## Test Plan
+
+- Multi-`read` batch: keep prefix, downgrade overflow tail, continue next provider turn.
+- Mixed `exec`/`read`: downgraded offloaded results must delete their `.offload` files.
+- Search resource result in downgraded tail: no search block and no persisted search rows.
+- Fully downgraded batch still too large: return terminal error.
+- Preserve existing deferred single-tool resume regressions.
@@ -10,23 +10,27 @@
 - Provider 报错会出现在主进程日志, 但 UI 未必能看到错误信息.
 - `directory_tree` 无深度限制, 可能产生巨量输出, 触发 10MB 限制.
 - 工具返回过大时会被直接注入到 LLM 上下文, 容易导致请求失败.
+- 多个 tool call 在单次 loop 内各自不大, 但累计后仍可能挤爆上下文窗口, 尤其是 `read` 一次读取大量文件时.
 
 ## 目标
 
 - 让生成失败时的错误信息可见并可追溯.
 - 给 `directory_tree` 增加深度控制, 最大不超过 3.
 - 对过大的工具输出做 offload, 用小的 stub 替代进入上下文.
+- 当同一轮多个 tool 结果累计超窗时, 保留能放下的前缀结果, 将尾部结果统一降级为固定失败文案并继续后续模型调用.
 
 ## 非目标
 
 - 不改动或替换 `agentPresenter/tool` 下的 `ToolRegistry`/`toolRouter`.
 - 不改变 MCP UI 资源与搜索结果的解析逻辑.
+- 不改 legacy `AgentPresenter` 链路, 本次仅覆盖新 session agent.
 
 ## 用户故事
 
 1. 作为用户, 我希望生成失败时能在 UI 直接看到原始错误文本.
 2. 作为模型, 我希望能指定目录树深度, 避免一次输出过大.
 3. 作为系统, 我希望工具输出过大时自动 offload, 仍可在需要时读取完整内容.
+4. 作为模型, 我希望当同一批 tool 结果累计超窗时, 能明确知道哪些尾部 tool 因上下文不足而失败, 从而调整下一步策略.
 
 ## 验收标准
 
@@ -57,3 +61,17 @@
 - 模型可以通过文件类工具读取上述路径.
 - 文件类读取工具仅放行当前会话 `conversationId` 对应目录.
 - `tool_call_response_raw` 不被改写, 避免影响 MCP UI/搜索结果处理.
+
+### 同轮批量尾部降级
+
+- 仅在新 session agent 链路启用.
+- 同一轮多个已完成 tool call 在准备进入下一次上下文前, 必须作为一个 batch 统一做预算拟合.
+- 如果所有结果都能放下, 保持原样进入上下文.
+- 如果累计超窗, 系统从该 batch 的尾部开始逐个降级为固定失败文案:
+  - `The tool call with ID <id> and name <name> failed because the remaining context window is too small to continue this turn.`
+- 降级的 tool 视为失败:
+  - assistant tool_call block 显示固定失败文案
+  - 不保留 search block / search result 持久化
+  - 不保留成功型 hooks
+- 经过尾部降级后只要 batch 可以放进上下文, 就继续后续模型调用.
+- 如果把该 batch 所有 tool 都降级为固定失败文案后仍无法放进上下文, 保持 terminal error 兜底, 结束该 turn.
@@ -16,11 +16,27 @@ import type {
 } from './types'
 import type { ChatMessage } from '@shared/types/core/chat-message'
 import { nanoid } from 'nanoid'
-import type { ToolOutputGuard } from './toolOutputGuard'
+import type { ToolBatchOutputFitItem, ToolOutputGuard } from './toolOutputGuard'
 import { buildTerminalErrorBlocks } from './messageStore'
 
 type PermissionType = 'read' | 'write' | 'all' | 'command'
 
+type ExtractedSearchPayload = ReturnType<typeof extractSearchPayload>
+
+type StagedToolResult = {
+  toolCallId: string
+  toolName: string
+  toolArgs: string
+  responseText: string
+  isError: boolean
+  offloadPath?: string
+  searchPayload: ExtractedSearchPayload
+  rtkApplied?: boolean
+  rtkMode?: 'rewrite' | 'direct' | 'bypass'
+  rtkFallbackReason?: string
+  postHookKind: 'success' | 'failure'
+}
+
 type PermissionRequestLike = {
   toolName?: string
   serverName?: string
@@ -189,6 +205,90 @@ function updateToolCallBlock(
   }
 }
 
+function persistToolExecutionState(io: IoParams, state: StreamState): void {
+  if (!state.dirty) {
+    return
+  }
+
+  flushBlocksToRenderer(io, state.blocks)
+  io.messageStore.updateAssistantContent(io.messageId, state.blocks)
+  state.dirty = false
+}
+
+function applyFinalizedToolResults(params: {
+  stagedResults: StagedToolResult[]
+  fittedResults: ToolBatchOutputFitItem[]
+  conversation: ChatMessage[]
+  state: StreamState
+  io: IoParams
+  hooks?: ProcessHooks
+  appendToConversation: boolean
+}): void {
+  const { stagedResults, fittedResults, conversation, state, io, hooks, appendToConversation } =
+    params
+
+  for (let index = 0; index < stagedResults.length; index += 1) {
+    const stagedResult = stagedResults[index]
+    const fittedResult = fittedResults[index]
+    if (!fittedResult) {
+      continue
+    }
+
+    if (appendToConversation) {
+      conversation.push({
+        role: 'tool',
+        tool_call_id: fittedResult.toolCallId,
+        content: fittedResult.contextResponseText
+      })
+    }
+
+    if (!fittedResult.downgraded && stagedResult.searchPayload) {
+      state.blocks.push(stagedResult.searchPayload.block)
+      for (const result of stagedResult.searchPayload.results) {
+        io.messageStore.addSearchResult({
+          sessionId: io.sessionId,
+          messageId: io.messageId,
+          searchId: result.searchId,
+          rank: typeof result.rank === 'number' ? result.rank : null,
+          result
+        })
+      }
+    }
+
+    updateToolCallBlock(
+      state.blocks,
+      fittedResult.toolCallId,
+      fittedResult.responseText,
+      fittedResult.isError,
+      fittedResult.downgraded
+        ? undefined
+        : {
+            rtkApplied: stagedResult.rtkApplied,
+            rtkMode: stagedResult.rtkMode,
+            rtkFallbackReason: stagedResult.rtkFallbackReason
+          }
+    )
+
+    if (fittedResult.isError) {
+      hooks?.onPostToolUseFailure?.({
+        callId: stagedResult.toolCallId,
+        name: stagedResult.toolName,
+        params: stagedResult.toolArgs,
+        error: fittedResult.responseText
+      })
+    } else if (stagedResult.postHookKind === 'success') {
+      hooks?.onPostToolUse?.({
+        callId: stagedResult.toolCallId,
+        name: stagedResult.toolName,
+        params: stagedResult.toolArgs,
+        response: fittedResult.responseText
+      })
+    }
+  }
+
+  state.dirty = true
+}
+
 function isPermissionType(value: unknown): value is PermissionType {
   return value === 'read' || value === 'write' || value === 'all' || value === 'command'
 }
@@ -450,6 +550,7 @@ export async function executeTools(
 
   let executed = 0
   const pendingInteractions: PendingToolInteraction[] = []
+  const stagedResults: StagedToolResult[] = []
 
   for (const tc of state.completedToolCalls) {
     if (io.abortSignal.aborted) break
@@ -486,8 +587,7 @@ export async function executeTools(
           updateToolCallBlock(state.blocks, tc.id, errorText, true)
           state.dirty = true
           executed += 1
-          flushBlocksToRenderer(io, state.blocks)
-          io.messageStore.updateAssistantContent(io.messageId, state.blocks)
+          persistToolExecutionState(io, state)
           continue
         }
 
@@ -584,100 +684,83 @@ export async function executeTools(
         toolContext.name,
         toolContext.serverName
       )
-      if (searchPayload) {
-        state.blocks.push(searchPayload.block)
-        for (const result of searchPayload.results) {
-          io.messageStore.addSearchResult({
-            sessionId: io.sessionId,
-            messageId: io.messageId,
-            searchId: result.searchId,
-            rank: typeof result.rank === 'number' ? result.rank : null,
-            result
-          })
-        }
-      }
 
       const responseText = toolResponseToText(toolRawData.content)
-      const guardedResult = await toolOutputGuard.guardToolOutput({
+      const preparedResult = await toolOutputGuard.prepareToolOutput({
         sessionId: io.sessionId,
         toolCallId: tc.id,
         toolName: toolContext.name,
-        rawContent: responseText,
-        conversationMessages: conversation,
-        toolDefinitions: tools,
-        contextLength,
-        maxTokens
+        rawContent: responseText
       })
+      const stagedResponseText =
+        preparedResult.kind === 'tool_error' ? preparedResult.message : preparedResult.content
+      const stagedIsError = preparedResult.kind === 'tool_error' || toolRawData.isError === true
 
-      if (guardedResult.kind === 'terminal_error') {
-        updateToolCallBlock(state.blocks, tc.id, guardedResult.message, true)
-        hooks?.onPostToolUseFailure?.({
-          callId: tc.id,
-          name: tc.name,
-          params: tc.arguments,
-          error: guardedResult.message
-        })
-        state.dirty = true
-        executed += 1
-        flushBlocksToRenderer(io, state.blocks)
-        io.messageStore.updateAssistantContent(io.messageId, state.blocks)
-        return {
-          executed,
-          pendingInteractions,
-          terminalError: guardedResult.message
-        }
-      }
-
-      const isToolError = guardedResult.kind === 'tool_error' || toolRawData.isError === true
-      const toolMessageContent =
-        guardedResult.kind === 'tool_error' ? guardedResult.message : guardedResult.content
-      conversation.push({
-        role: 'tool',
-        tool_call_id: tc.id,
-        content: toolMessageContent
-      })
-      updateToolCallBlock(state.blocks, tc.id, toolMessageContent, isToolError, {
+      stagedResults.push({
+        toolCallId: tc.id,
+        toolName: tc.name,
+        toolArgs: tc.arguments,
+        responseText: stagedResponseText,
+        isError: stagedIsError,
+        offloadPath: preparedResult.kind === 'ok' ? preparedResult.offloadPath : undefined,
+        searchPayload,
         rtkApplied: toolRawData.rtkApplied,
         rtkMode: toolRawData.rtkMode,
-        rtkFallbackReason: toolRawData.rtkFallbackReason
+        rtkFallbackReason: toolRawData.rtkFallbackReason,
+        postHookKind: stagedIsError ? 'failure' : 'success'
       })
-      if (isToolError) {
-        hooks?.onPostToolUseFailure?.({
-          callId: tc.id,
-          name: tc.name,
-          params: tc.arguments,
-          error: toolMessageContent
-        })
-      } else {
-        hooks?.onPostToolUse?.({
-          callId: tc.id,
-          name: tc.name,
-          params: tc.arguments,
-          response: toolMessageContent
-        })
-      }
+      executed += 1
     } catch (err) {
       const errorText = err instanceof Error ? err.message : String(err)
-      conversation.push({
-        role: 'tool',
-        tool_call_id: tc.id,
-        content: `Error: ${errorText}`
-      })
-      updateToolCallBlock(state.blocks, tc.id, `Error: ${errorText}`, true)
-      hooks?.onPostToolUseFailure?.({
-        callId: tc.id,
-        name: tc.name,
-        params: tc.arguments,
-        error: `Error: ${errorText}`
+      stagedResults.push({
+        toolCallId: tc.id,
+        toolName: tc.name,
+        toolArgs: tc.arguments,
+        responseText: `Error: ${errorText}`,
+        isError: true,
+        searchPayload: null,
+        postHookKind: 'failure'
       })
+      executed += 1
     }
+  }
+
+  if (stagedResults.length > 0) {
+    const fittedResults = await toolOutputGuard.fitToolBatchOutputs({
+      conversationMessages: conversation,
+      results: stagedResults.map((result) => ({
+        toolCallId: result.toolCallId,
+        toolName: result.toolName,
+        responseText: result.responseText,
+        isError: result.isError,
+        offloadPath: result.offloadPath
+      })),
+      toolDefinitions: tools,
+      contextLength,
+      maxTokens
+    })
 
-    state.dirty = true
-    executed += 1
-    flushBlocksToRenderer(io, state.blocks)
-    io.messageStore.updateAssistantContent(io.messageId, state.blocks)
+    applyFinalizedToolResults({
+      stagedResults,
+      fittedResults: fittedResults.results,
+      conversation,
+      state,
+      io,
+      hooks,
+      appendToConversation: fittedResults.kind === 'ok'
+    })
+    persistToolExecutionState(io, state)
+
+    if (fittedResults.kind === 'terminal_error') {
+      return {
+        executed,
+        pendingInteractions,
+        terminalError: fittedResults.message
+      }
+    }
   }
 
+  persistToolExecutionState(io, state)
   return { executed, pendingInteractions }
 }