diff --git a/src/main/presenter/deepchatAgentPresenter/dispatch.ts b/src/main/presenter/deepchatAgentPresenter/dispatch.ts index 52b1d6a9b..2b5ffe5ba 100644 --- a/src/main/presenter/deepchatAgentPresenter/dispatch.ts +++ b/src/main/presenter/deepchatAgentPresenter/dispatch.ts @@ -9,6 +9,7 @@ import { parseQuestionToolArgs, QUESTION_TOOL_NAME } from '../agentPresenter/too import type { IoParams, PendingToolInteraction, StreamState } from './types' import type { ChatMessage } from '@shared/types/core/chat-message' import { nanoid } from 'nanoid' +import type { ToolOutputGuard } from './toolOutputGuard' type PermissionType = 'read' | 'write' | 'all' | 'command' @@ -376,8 +377,15 @@ export async function executeTools( toolPresenter: IToolPresenter, modelId: string, io: IoParams, - permissionMode: PermissionMode -): Promise<{ executed: number; pendingInteractions: PendingToolInteraction[] }> { + permissionMode: PermissionMode, + toolOutputGuard: ToolOutputGuard, + contextLength: number, + maxTokens: number +): Promise<{ + executed: number + pendingInteractions: PendingToolInteraction[] + terminalError?: string +}> { for (const tc of state.completedToolCalls) { const toolDef = tools.find((t) => t.function.name === tc.name) if (!toolDef) continue @@ -542,12 +550,39 @@ export async function executeTools( } const responseText = toolResponseToText(toolRawData.content) + const guardedResult = await toolOutputGuard.guardToolOutput({ + sessionId: io.sessionId, + toolCallId: tc.id, + toolName: toolContext.name, + rawContent: responseText, + conversationMessages: conversation, + toolDefinitions: tools, + contextLength, + maxTokens + }) + + if (guardedResult.kind === 'terminal_error') { + updateToolCallBlock(state.blocks, tc.id, guardedResult.message, true) + state.dirty = true + executed += 1 + flushBlocksToRenderer(io, state.blocks) + io.messageStore.updateAssistantContent(io.messageId, state.blocks) + return { + executed, + pendingInteractions, + terminalError: guardedResult.message + } + } + + const isToolError = guardedResult.kind === 'tool_error' || toolRawData.isError === true + const toolMessageContent = + guardedResult.kind === 'tool_error' ? guardedResult.message : guardedResult.content conversation.push({ role: 'tool', tool_call_id: tc.id, - content: responseText + content: toolMessageContent }) - updateToolCallBlock(state.blocks, tc.id, responseText, false) + updateToolCallBlock(state.blocks, tc.id, toolMessageContent, isToolError) } catch (err) { const errorText = err instanceof Error ? err.message : String(err) conversation.push({ diff --git a/src/main/presenter/deepchatAgentPresenter/index.ts b/src/main/presenter/deepchatAgentPresenter/index.ts index 80aa301a1..487dd9e68 100644 --- a/src/main/presenter/deepchatAgentPresenter/index.ts +++ b/src/main/presenter/deepchatAgentPresenter/index.ts @@ -14,7 +14,12 @@ import type { } from '@shared/types/agent-interface' import type { MCPToolCall, MCPToolResponse } from '@shared/types/core/mcp' import type { ChatMessage } from '@shared/types/core/chat-message' -import type { IConfigPresenter, ILlmProviderPresenter, ModelConfig } from '@shared/presenter' +import type { + IConfigPresenter, + ILlmProviderPresenter, + MCPToolDefinition, + ModelConfig +} from '@shared/presenter' import type { IToolPresenter } from '@shared/types/presenters/tool.presenter' import { nanoid } from 'nanoid' import type { SQLitePresenter } from '../sqlitePresenter' @@ -32,6 +37,7 @@ import { DeepChatMessageStore } from './messageStore' import { processStream } from './process' import { DeepChatSessionStore, type SessionSummaryState } from './sessionStore' import type { PendingToolInteraction, ProcessResult } from './types' +import { ToolOutputGuard } from './toolOutputGuard' import type { ProviderRequestTracePayload } from '../llmProviderPresenter/requestTrace' type PendingInteractionEntry = { @@ -42,8 +48,16 @@ type PendingInteractionEntry = { type DeferredToolExecutionResult = { responseText: string isError: boolean + offloadPath?: string requiresPermission?: boolean permissionRequest?: PendingToolInteraction['permission'] + terminalError?: string +} + +type ResumeBudgetToolCall = { + id: string + name: string + offloadPath?: string } type PersistedSessionGenerationRow = { @@ -90,6 +104,7 @@ export class DeepChatAgentPresenter implements IAgentImplementation { private readonly interactionLocks: Set = new Set() private readonly resumingMessages: Set = new Set() private readonly compactionService: CompactionService + private readonly toolOutputGuard: ToolOutputGuard constructor( llmProviderPresenter: ILlmProviderPresenter, @@ -108,6 +123,7 @@ export class DeepChatAgentPresenter implements IAgentImplementation { this.llmProviderPresenter, this.configPresenter ) + this.toolOutputGuard = new ToolOutputGuard() const recovered = this.messageStore.recoverPendingMessages() if (recovered > 0) { @@ -348,6 +364,7 @@ export class DeepChatAgentPresenter implements IAgentImplementation { } let waitingForUserMessage = false + let resumeBudgetToolCall: ResumeBudgetToolCall | null = null const actionBlock = blocks[currentEntry.blockIndex] const toolCall = actionBlock.tool_call if (!toolCall?.id) { @@ -385,12 +402,30 @@ export class DeepChatAgentPresenter implements IAgentImplementation { this.markPermissionResolved(actionBlock, true, permissionType) await this.grantPermissionForPayload(sessionId, permissionPayload, toolCall) const execution = await this.executeDeferredToolCall(sessionId, toolCall) + if (execution.terminalError) { + this.updateToolCallResponse(blocks, toolCall.id, execution.terminalError, true) + this.messageStore.setMessageError(messageId, blocks) + this.emitMessageRefresh(sessionId, messageId) + eventBus.sendToRenderer(STREAM_EVENTS.ERROR, SendTarget.ALL_WINDOWS, { + conversationId: sessionId, + eventId: messageId, + messageId, + error: execution.terminalError + }) + this.setSessionStatus(sessionId, 'error') + return { resumed: false } + } this.updateToolCallResponse( blocks, toolCall.id, execution.responseText, execution.isError ) + resumeBudgetToolCall = { + id: toolCall.id, + name: toolCall.name || '', + offloadPath: execution.offloadPath + } if (execution.requiresPermission && execution.permissionRequest) { actionBlock.status = 'pending' @@ -426,8 +461,13 @@ export class DeepChatAgentPresenter implements IAgentImplementation { return { resumed: false, waitingForUserMessage: true } } - await this.resumeAssistantMessage(sessionId, messageId, blocks) - return { resumed: true } + const resumed = await this.resumeAssistantMessage( + sessionId, + messageId, + blocks, + resumeBudgetToolCall + ) + return { resumed } } finally { this.interactionLocks.delete(lockKey) } @@ -698,9 +738,10 @@ export class DeepChatAgentPresenter implements IAgentImplementation { messageId: string messages: ChatMessage[] projectDir: string | null + tools?: MCPToolDefinition[] initialBlocks?: AssistantMessageBlock[] }): Promise { - const { sessionId, messageId, messages, projectDir, initialBlocks } = args + const { sessionId, messageId, messages, projectDir, tools: providedTools, initialBlocks } = args const state = this.runtimeState.get(sessionId) if (!state) { throw new Error(`Session ${sessionId} not found`) @@ -758,18 +799,7 @@ export class DeepChatAgentPresenter implements IAgentImplementation { const temperature = generationSettings.temperature const maxTokens = generationSettings.maxTokens - let tools: import('@shared/presenter').MCPToolDefinition[] = [] - if (this.toolPresenter) { - try { - tools = await this.toolPresenter.getAllToolDefinitions({ - chatMode: 'agent', - conversationId: sessionId, - agentWorkspacePath: projectDir - }) - } catch (error) { - console.error('[DeepChatAgent] failed to fetch tool definitions:', error) - } - } + const tools = providedTools ?? (await this.loadToolDefinitionsForSession(sessionId, projectDir)) const abortController = new AbortController() this.abortControllers.set(sessionId, abortController) @@ -786,6 +816,7 @@ export class DeepChatAgentPresenter implements IAgentImplementation { temperature, maxTokens, permissionMode: state.permissionMode, + toolOutputGuard: this.toolOutputGuard, initialBlocks, io: { sessionId, @@ -828,10 +859,11 @@ export class DeepChatAgentPresenter implements IAgentImplementation { private async resumeAssistantMessage( sessionId: string, messageId: string, - initialBlocks: AssistantMessageBlock[] - ): Promise { + initialBlocks: AssistantMessageBlock[], + budgetToolCall?: ResumeBudgetToolCall | null + ): Promise { if (this.resumingMessages.has(messageId)) { - return + return false } this.resumingMessages.add(messageId) @@ -859,7 +891,7 @@ export class DeepChatAgentPresenter implements IAgentImplementation { supportsVision: this.supportsVision(state.providerId, state.modelId) }) const systemPrompt = appendSummarySection(baseSystemPrompt, summaryState.summaryText) - const resumeContext = buildResumeContext( + let resumeContext = buildResumeContext( sessionId, messageId, systemPrompt, @@ -872,15 +904,55 @@ export class DeepChatAgentPresenter implements IAgentImplementation { fallbackProtectedTurnCount: 1 } ) + const projectDir = this.resolveProjectDir(sessionId) + const tools = await this.loadToolDefinitionsForSession(sessionId, projectDir) + + if (budgetToolCall?.id && budgetToolCall.name) { + const resumeBudget = this.fitResumeBudgetForToolCall({ + resumeContext, + toolDefinitions: tools, + contextLength: generationSettings.contextLength, + maxTokens, + toolCallId: budgetToolCall.id, + toolName: budgetToolCall.name + }) + + if (resumeBudget?.kind === 'tool_error') { + await this.toolOutputGuard.cleanupOffloadedOutput(budgetToolCall.offloadPath) + this.updateToolCallResponse(initialBlocks, budgetToolCall.id, resumeBudget.message, true) + this.messageStore.updateAssistantContent(messageId, initialBlocks) + this.emitMessageRefresh(sessionId, messageId) + resumeContext = this.toolOutputGuard.replaceToolMessageContent( + resumeContext, + budgetToolCall.id, + resumeBudget.message + ) + } else if (resumeBudget?.kind === 'terminal_error') { + await this.toolOutputGuard.cleanupOffloadedOutput(budgetToolCall.offloadPath) + this.updateToolCallResponse(initialBlocks, budgetToolCall.id, resumeBudget.message, true) + this.messageStore.setMessageError(messageId, initialBlocks) + this.emitMessageRefresh(sessionId, messageId) + eventBus.sendToRenderer(STREAM_EVENTS.ERROR, SendTarget.ALL_WINDOWS, { + conversationId: sessionId, + eventId: messageId, + messageId, + error: resumeBudget.message + }) + this.setSessionStatus(sessionId, 'error') + return false + } + } const result = await this.runStreamForMessage({ sessionId, messageId, messages: resumeContext, - projectDir: this.resolveProjectDir(sessionId), + projectDir, + tools, initialBlocks }) this.applyProcessResultStatus(sessionId, result) + return true } catch (error) { console.error('[DeepChatAgent] resumeAssistantMessage error:', error) this.setSessionStatus(sessionId, 'error') @@ -1793,19 +1865,7 @@ export class DeepChatAgentPresenter implements IAgentImplementation { } const projectDir = this.resolveProjectDir(sessionId) - let toolDefinitions: import('@shared/presenter').MCPToolDefinition[] = [] - try { - toolDefinitions = await this.toolPresenter.getAllToolDefinitions({ - chatMode: 'agent', - conversationId: sessionId, - agentWorkspacePath: projectDir - }) - } catch (error) { - console.error( - '[DeepChatAgent] Failed to load tool definitions for deferred execution:', - error - ) - } + const toolDefinitions = await this.loadToolDefinitionsForSession(sessionId, projectDir) const toolDefinition = toolDefinitions.find((definition) => { if (definition.function.name !== toolName) { @@ -1839,9 +1899,23 @@ export class DeepChatAgentPresenter implements IAgentImplementation { permissionRequest: rawData.permissionRequest as PendingToolInteraction['permission'] } } + const responseText = this.toolContentToText(rawData.content) + const prepared = await this.toolOutputGuard.prepareToolOutput({ + sessionId, + toolCallId: toolCall.id || '', + toolName, + rawContent: responseText + }) + if (prepared.kind === 'tool_error') { + return { + responseText: prepared.message, + isError: true + } + } return { - responseText: this.toolContentToText(rawData.content), - isError: Boolean(rawData.isError) + responseText: prepared.content, + isError: Boolean(rawData.isError), + offloadPath: prepared.offloadPath } } catch (error) { const errorText = error instanceof Error ? error.message : String(error) @@ -1852,6 +1926,60 @@ export class DeepChatAgentPresenter implements IAgentImplementation { } } + private async loadToolDefinitionsForSession( + sessionId: string, + projectDir: string | null + ): Promise { + if (!this.toolPresenter) { + return [] + } + + try { + return await this.toolPresenter.getAllToolDefinitions({ + chatMode: 'agent', + conversationId: sessionId, + agentWorkspacePath: projectDir + }) + } catch (error) { + console.error('[DeepChatAgent] failed to fetch tool definitions:', error) + return [] + } + } + + private fitResumeBudgetForToolCall(params: { + resumeContext: ChatMessage[] + toolDefinitions: MCPToolDefinition[] + contextLength: number + maxTokens: number + toolCallId: string + toolName: string + }) { + if ( + this.toolOutputGuard.hasContextBudget({ + conversationMessages: params.resumeContext, + toolDefinitions: params.toolDefinitions, + contextLength: params.contextLength, + maxTokens: params.maxTokens + }) + ) { + return null + } + + return this.toolOutputGuard.fitToolError({ + conversationMessages: params.resumeContext, + toolDefinitions: params.toolDefinitions, + contextLength: params.contextLength, + maxTokens: params.maxTokens, + toolCallId: params.toolCallId, + toolName: params.toolName, + errorMessage: this.toolOutputGuard.buildContextOverflowMessage( + params.toolCallId, + params.toolName + ), + mode: 'replace' + }) + } + private toolContentToText(content: MCPToolResponse['content']): string { if (typeof content === 'string') { return content diff --git a/src/main/presenter/deepchatAgentPresenter/process.ts b/src/main/presenter/deepchatAgentPresenter/process.ts index ea7b2d032..5c059029c 100644 --- a/src/main/presenter/deepchatAgentPresenter/process.ts +++ b/src/main/presenter/deepchatAgentPresenter/process.ts @@ -1,4 +1,4 @@ -import type { ProcessParams, ProcessResult } from './types' +import type { ProcessParams, ProcessResult, StreamState } from './types' import { createState } from './types' import { accumulate } from './accumulator' import { startEcho } from './echo' @@ -7,6 +7,37 @@ import { eventBus, SendTarget } from '@/eventbus' import { STREAM_EVENTS } from '@/events' const MAX_TOOL_CALLS = 128 +const UNKNOWN_CONTEXT_LIMIT = Number.MAX_SAFE_INTEGER +const CONTEXT_WINDOW_ERROR_PATTERNS = [ + 'context length', + 'context window', + 'too many tokens', + 'prompt too long', + 'maximum context length', + 'reduce the length' +] + +function isContextWindowErrorMessage(message: string): boolean { + const normalized = message.toLowerCase() + return CONTEXT_WINDOW_ERROR_PATTERNS.some((pattern) => normalized.includes(pattern)) +} + +function getLatestErrorMessage(state: StreamState): string | null { + for (let index = state.blocks.length - 1; index >= 0; index -= 1) { + const block = state.blocks[index] + if (block.type === 'error' && typeof block.content === 'string' && block.content.trim()) { + return block.content + } + } + return null +} + +function stripTrailingErrorBlock(state: StreamState, message: string): void { + const lastBlock = state.blocks[state.blocks.length - 1] + if (lastBlock?.type === 'error' && lastBlock.content === message) { + state.blocks.pop() + } +} /** * Unified stream processor. Handles both simple completions and multi-turn @@ -112,11 +143,22 @@ export async function processStream(params: ProcessParams): Promise 0 ? modelConfig.contextLength : UNKNOWN_CONTEXT_LIMIT, + maxTokens ) toolCallCount += executed.executed echo.flush() + if (executed.terminalError) { + finalizeError(state, io, executed.terminalError) + return { + status: 'error' as const, + terminalError: executed.terminalError + } + } + if (executed.pendingInteractions.length > 0) { console.log( `[ProcessStream] paused for user interaction count=${executed.pendingInteractions.length}` @@ -133,6 +175,17 @@ export async function processStream(params: ProcessParams): Promise { + const { sessionId, toolCallId, toolName, rawContent } = params + + if (!this.requiresOffload(toolName) || rawContent.length <= TOOL_OUTPUT_OFFLOAD_THRESHOLD) { + return { + kind: 'ok', + content: rawContent, + offloaded: false, + offloadPath: undefined + } + } + + const filePath = resolveToolOffloadPath(sessionId, toolCallId) + if (!filePath) { + return { + kind: 'tool_error', + message: this.buildOffloadFailureMessage(toolCallId, toolName) + } + } + + try { + await fs.mkdir(path.dirname(filePath), { recursive: true }) + await fs.writeFile(filePath, rawContent, 'utf-8') + } catch (error) { + console.warn('[ToolOutputGuard] Failed to offload tool output:', error) + return { + kind: 'tool_error', + message: this.buildOffloadFailureMessage(toolCallId, toolName) + } + } + + return { + kind: 'ok', + content: this.buildOffloadStub(rawContent, filePath), + offloaded: true, + offloadPath: filePath + } + } + + async guardToolOutput(params: GuardToolOutputParams): Promise { + const prepared = await this.prepareToolOutput(params) + if (prepared.kind === 'tool_error') { + return this.fitToolError({ + ...params, + errorMessage: prepared.message + }) + } + + const nextMessages = this.withToolMessage( + params.conversationMessages, + params.toolCallId, + prepared.content, + 'append' + ) + if ( + this.hasContextBudget({ + conversationMessages: nextMessages, + toolDefinitions: params.toolDefinitions, + contextLength: params.contextLength, + maxTokens: params.maxTokens + }) + ) { + return prepared + } + + const overflowResult = this.fitToolError({ + ...params, + errorMessage: this.buildContextOverflowMessage(params.toolCallId, params.toolName) + }) + await this.cleanupOffloadedOutput(prepared.offloadPath) + return overflowResult + } + + hasContextBudget(params: ContextBudgetParams): boolean { + const { conversationMessages, toolDefinitions, contextLength, maxTokens } = params + const toolDefinitionTokens = toolDefinitions.reduce( + (total, tool) => total + approximateTokenSize(JSON.stringify(tool)), + 0 + ) + return ( + estimateMessagesTokens(conversationMessages) + + toolDefinitionTokens + + Math.max(0, Math.floor(maxTokens)) <= + contextLength + ) + } + + fitToolError(params: FitToolErrorParams): ToolOutputGuardResult { + const mode = params.mode ?? 'append' + const errorMessages = this.withToolMessage( + params.conversationMessages, + params.toolCallId, + params.errorMessage, + mode + ) + if ( + this.hasContextBudget({ + conversationMessages: errorMessages, + toolDefinitions: params.toolDefinitions, + contextLength: params.contextLength, + maxTokens: params.maxTokens + }) + ) { + return { + kind: 'tool_error', + message: params.errorMessage + } + } + return { + kind: 'terminal_error', + message: this.buildTerminalErrorMessage(params.toolCallId, params.toolName) + } + } + + replaceToolMessageContent( + conversationMessages: ChatMessage[], + toolCallId: string, + content: string + ): ChatMessage[] { + return this.withToolMessage(conversationMessages, toolCallId, content, 'replace') + } + + async cleanupOffloadedOutput(offloadPath?: string): Promise { + if (!offloadPath) { + return + } + + try { + await fs.rm(offloadPath, { force: true }) + } catch (error) { + console.warn('[ToolOutputGuard] Failed to delete offloaded tool output:', error) + } + } + + buildContextOverflowMessage(toolCallId: string, toolName: string): string { + return `The tool call with ID ${toolCallId} and name ${toolName} could not be injected into the conversation because the remaining context window is insufficient. Treat this tool call as failed and continue without its result.` + } + + private requiresOffload(toolName: string): boolean { + return TOOLS_REQUIRING_OFFLOAD.has(toolName) || toolName.startsWith('yo_browser_') + } + + private withToolMessage( + conversationMessages: ChatMessage[], + toolCallId: string, + content: string, + mode: ToolMessageUpdateMode + ): ChatMessage[] { + if (mode === 'replace') { + let replaced = false + const nextMessages = conversationMessages.map((message) => { + if (replaced || message.role !== 'tool' || message.tool_call_id !== toolCallId) { + return message + } + replaced = true + return { + ...message, + content + } + }) + if (replaced) { + return nextMessages + } + return [ + ...nextMessages, + { + role: 'tool', + tool_call_id: toolCallId, + content + } + ] + } + + return [ + ...conversationMessages, + { + role: 'tool', + tool_call_id: toolCallId, + content + } + ] + } + + private buildOffloadStub(rawContent: string, filePath: string): string { + const preview = rawContent.slice(0, TOOL_OUTPUT_PREVIEW_LENGTH) + return [ + '[Tool output offloaded]', + `Total characters: ${rawContent.length}`, + `Offload file: ${path.basename(filePath)}`, + `first ${preview.length} chars:`, + preview + ].join('\n') + } + + private buildOffloadFailureMessage(toolCallId: string, toolName: string): string { + return `The tool call with ID ${toolCallId} and name ${toolName} produced a large result, but offloading that result to disk failed. Treat this tool call as failed and continue without its result.` + } + + private buildTerminalErrorMessage(toolCallId: string, toolName: string): string { + return `The tool call with ID ${toolCallId} and name ${toolName} failed because the remaining context window is too small to continue this turn.` + } +} diff --git a/src/main/presenter/deepchatAgentPresenter/types.ts b/src/main/presenter/deepchatAgentPresenter/types.ts index d01930887..b4ca8561c 100644 --- a/src/main/presenter/deepchatAgentPresenter/types.ts +++ b/src/main/presenter/deepchatAgentPresenter/types.ts @@ -9,6 +9,7 @@ import type { ChatMessage } from '@shared/types/core/chat-message' import type { MCPToolDefinition, ModelConfig } from '@shared/presenter' import type { IToolPresenter } from '@shared/types/presenters/tool.presenter' import type { DeepChatMessageStore } from './messageStore' +import type { ToolOutputGuard } from './toolOutputGuard' export interface ToolCallResult { id: string @@ -77,6 +78,7 @@ export interface PendingToolInteraction { export interface ProcessResult { status: 'completed' | 'paused' | 'aborted' | 'error' pendingInteractions?: PendingToolInteraction[] + terminalError?: string } export interface ProcessParams { @@ -97,6 +99,7 @@ export interface ProcessParams { temperature: number maxTokens: number permissionMode: PermissionMode + toolOutputGuard: ToolOutputGuard initialBlocks?: AssistantMessageBlock[] io: IoParams } diff --git a/test/main/presenter/deepchatAgentPresenter/deepchatAgentPresenter.test.ts b/test/main/presenter/deepchatAgentPresenter/deepchatAgentPresenter.test.ts index 330753479..aa654021b 100644 --- a/test/main/presenter/deepchatAgentPresenter/deepchatAgentPresenter.test.ts +++ b/test/main/presenter/deepchatAgentPresenter/deepchatAgentPresenter.test.ts @@ -1,4 +1,8 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest' +import fs from 'fs/promises' +import os from 'os' +import path from 'path' +import { app } from 'electron' import { DeepChatAgentPresenter } from '@/presenter/deepchatAgentPresenter/index' vi.mock('nanoid', () => ({ nanoid: vi.fn(() => 'mock-msg-id') })) @@ -195,6 +199,8 @@ describe('DeepChatAgentPresenter', () => { let configPresenter: ReturnType let toolPresenter: ReturnType let agent: DeepChatAgentPresenter + let tempHome: string | null = null + let getPathSpy: ReturnType | null = null beforeEach(() => { vi.clearAllMocks() @@ -213,8 +219,14 @@ describe('DeepChatAgentPresenter', () => { agent = new DeepChatAgentPresenter(llmProvider, configPresenter, sqlitePresenter, toolPresenter) }) - afterEach(() => { + afterEach(async () => { vi.useRealTimers() + getPathSpy?.mockRestore() + getPathSpy = null + if (tempHome) { + await fs.rm(tempHome, { recursive: true, force: true }) + tempHome = null + } }) describe('constructor (crash recovery)', () => { @@ -1562,6 +1574,163 @@ describe('DeepChatAgentPresenter', () => { expect(updatedBlocks[1].extra.needsUserAction).toBe(false) }) + it('offloads deferred tool results before resume', async () => { + tempHome = await fs.mkdtemp(path.join(os.tmpdir(), 'deepchat-deferred-offload-')) + getPathSpy = vi.spyOn(app, 'getPath').mockReturnValue(tempHome) + + await agent.initSession('s1', { providerId: 'openai', modelId: 'gpt-4' }) + makeAssistantRow({ + blocks: [ + { + type: 'tool_call', + status: 'pending', + timestamp: 1, + tool_call: { + id: 'tc1', + name: 'yo_browser_cdp_send', + params: '{"method":"Page.captureScreenshot"}', + response: '' + } + }, + { + type: 'action', + action_type: 'tool_call_permission', + status: 'pending', + timestamp: 2, + content: 'Need permission', + tool_call: { + id: 'tc1', + name: 'yo_browser_cdp_send', + params: '{"method":"Page.captureScreenshot"}' + }, + extra: { + needsUserAction: true, + permissionType: 'write', + permissionRequest: JSON.stringify({ + permissionType: 'write', + description: 'Need permission', + toolName: 'yo_browser_cdp_send', + serverName: 'yo-browser' + }) + } + } + ] + }) + toolPresenter.getAllToolDefinitions.mockResolvedValueOnce([ + { + type: 'function', + function: { + name: 'yo_browser_cdp_send', + description: 'CDP send', + parameters: { type: 'object', properties: {} } + }, + server: { name: 'yo-browser', icons: '', description: '' } + } + ]) + toolPresenter.callTool.mockResolvedValueOnce({ + content: JSON.stringify({ data: 'x'.repeat(7000) }), + rawData: { content: JSON.stringify({ data: 'x'.repeat(7000) }), isError: false } + }) + + const result = await agent.respondToolInteraction('s1', 'm1', 'tc1', { + kind: 'permission', + granted: true + }) + + expect(result).toEqual({ resumed: true }) + const updatedBlocks = JSON.parse( + sqlitePresenter.deepchatMessagesTable.updateContent.mock.calls[0][1] + ) + expect(updatedBlocks[0].tool_call.response).toContain('[Tool output offloaded]') + expect(updatedBlocks[0].tool_call.response).toContain('tool_tc1.offload') + expect(updatedBlocks[0].tool_call.response).not.toContain(tempHome!) + expect(updatedBlocks[0].status).toBe('success') + expect(processStream).toHaveBeenCalledTimes(1) + }) + + it('cleans deferred offload files when resume budget downgrades the tool result', async () => { + tempHome = await fs.mkdtemp(path.join(os.tmpdir(), 'deepchat-deferred-cleanup-')) + getPathSpy = vi.spyOn(app, 'getPath').mockReturnValue(tempHome) + + await agent.initSession('s1', { providerId: 'openai', modelId: 'gpt-4' }) + makeAssistantRow({ + blocks: [ + { + type: 'tool_call', + status: 'pending', + timestamp: 1, + tool_call: { + id: 'tc1', + name: 'yo_browser_cdp_send', + params: '{"method":"Page.captureScreenshot"}', + response: '' + } + }, + { + type: 'action', + action_type: 'tool_call_permission', + status: 'pending', + timestamp: 2, + content: 'Need permission', + tool_call: { + id: 'tc1', + name: 'yo_browser_cdp_send', + params: '{"method":"Page.captureScreenshot"}' + }, + extra: { + needsUserAction: true, + permissionType: 'write', + permissionRequest: JSON.stringify({ + permissionType: 'write', + description: 'Need permission', + toolName: 'yo_browser_cdp_send', + serverName: 'yo-browser' + }) + } + } + ] + }) + toolPresenter.getAllToolDefinitions.mockResolvedValueOnce([ + { + type: 'function', + function: { + name: 'yo_browser_cdp_send', + description: 'CDP send', + parameters: { type: 'object', properties: {} } + }, + server: { name: 'yo-browser', icons: '', description: '' } + } + ]) + toolPresenter.callTool.mockResolvedValueOnce({ + content: JSON.stringify({ data: 'x'.repeat(7000) }), + rawData: { content: JSON.stringify({ data: 'x'.repeat(7000) }), isError: false } + }) + + const hasContextBudgetSpy = vi.spyOn((agent as any).toolOutputGuard, 'hasContextBudget') + hasContextBudgetSpy.mockReturnValueOnce(false).mockReturnValueOnce(true) + + try { + const result = await agent.respondToolInteraction('s1', 'm1', 'tc1', { + kind: 'permission', + granted: true + }) + + expect(result).toEqual({ resumed: true }) + const updateCalls = sqlitePresenter.deepchatMessagesTable.updateContent.mock.calls + const updatedBlocks = JSON.parse(updateCalls[updateCalls.length - 1][1]) + expect(updatedBlocks[0].tool_call.response).toContain( + 'remaining context window is insufficient' + ) + expect(updatedBlocks[0].tool_call.response).not.toContain('[Tool output offloaded]') + await expect( + fs.access(path.join(tempHome, '.deepchat', 'sessions', 's1', 'tool_tc1.offload')) + ).rejects.toThrow() + expect(processStream).toHaveBeenCalledTimes(1) + } finally { + hasContextBudgetSpy.mockRestore() + } + }) + it('handles permission deny and resumes with denial result', async () => { await agent.initSession('s1', { providerId: 'openai', modelId: 'gpt-4' }) makeAssistantRow({ diff --git a/test/main/presenter/deepchatAgentPresenter/dispatch.test.ts b/test/main/presenter/deepchatAgentPresenter/dispatch.test.ts index e37bccba5..33d335d51 100644 --- a/test/main/presenter/deepchatAgentPresenter/dispatch.test.ts +++ b/test/main/presenter/deepchatAgentPresenter/dispatch.test.ts @@ -1,8 +1,13 @@ -import { describe, it, expect, vi, beforeEach } from 'vitest' +import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest' +import fs from 'fs/promises' +import os from 'os' +import path from 'path' +import { app } from 'electron' import type { StreamState, IoParams } from '@/presenter/deepchatAgentPresenter/types' import { createState } from '@/presenter/deepchatAgentPresenter/types' import type { MCPToolDefinition } from '@shared/presenter' import type { IToolPresenter } from '@shared/types/presenters/tool.presenter' +import { ToolOutputGuard } from '@/presenter/deepchatAgentPresenter/toolOutputGuard' vi.mock('@/eventbus', () => ({ eventBus: { sendToRenderer: vi.fn() }, @@ -82,6 +87,8 @@ function createMockToolPresenter(responses: Record = {}): IToolP describe('dispatch', () => { let state: StreamState let io: IoParams + let tempHome: string | null = null + let getPathSpy: ReturnType | null = null beforeEach(() => { vi.clearAllMocks() @@ -89,6 +96,15 @@ describe('dispatch', () => { io = createIo() }) + afterEach(async () => { + getPathSpy?.mockRestore() + getPathSpy = null + if (tempHome) { + await fs.rm(tempHome, { recursive: true, force: true }) + tempHome = null + } + }) + describe('executeTools', () => { it('builds assistant message, calls tools, updates blocks', async () => { const tools = [makeTool('get_weather')] @@ -119,7 +135,10 @@ describe('dispatch', () => { toolPresenter, 'gpt-4', io, - 'full_access' + 'full_access', + new ToolOutputGuard(), + 32000, + 1024 ) expect(executed.executed).toBe(1) @@ -157,7 +176,19 @@ describe('dispatch', () => { }) state.completedToolCalls = [{ id: 'tc1', name: 'get_weather', arguments: '{}' }] - await executeTools(state, [], 0, tools, toolPresenter, 'gpt-4', io, 'full_access') + await executeTools( + state, + [], + 0, + tools, + toolPresenter, + 'gpt-4', + io, + 'full_access', + new ToolOutputGuard(), + 32000, + 1024 + ) expect(state.blocks[0].tool_call!.server_name).toBe('test-server') expect(state.blocks[0].tool_call!.server_icons).toBe('icon') @@ -192,7 +223,10 @@ describe('dispatch', () => { toolPresenter, 'deepseek-reasoner', io, - 'full_access' + 'full_access', + new ToolOutputGuard(), + 32000, + 1024 ) const assistantMsg = conversation.find((m: any) => m.role === 'assistant') @@ -219,7 +253,19 @@ describe('dispatch', () => { }) state.completedToolCalls = [{ id: 'tc1', name: 'search', arguments: '{}' }] - await executeTools(state, conversation, 0, tools, toolPresenter, 'gpt-4', io, 'full_access') + await executeTools( + state, + conversation, + 0, + tools, + toolPresenter, + 'gpt-4', + io, + 'full_access', + new ToolOutputGuard(), + 32000, + 1024 + ) const assistantMsg = conversation.find((m: any) => m.role === 'assistant') expect(assistantMsg.reasoning_content).toBeUndefined() @@ -242,7 +288,19 @@ describe('dispatch', () => { }) state.completedToolCalls = [{ id: 'tc1', name: 'bad_tool', arguments: '{}' }] - await executeTools(state, conversation, 0, tools, toolPresenter, 'gpt-4', io, 'full_access') + await executeTools( + state, + conversation, + 0, + tools, + toolPresenter, + 'gpt-4', + io, + 'full_access', + new ToolOutputGuard(), + 32000, + 1024 + ) const toolMsg = conversation.find((m: any) => m.role === 'tool') expect(toolMsg.content).toBe('Error: Tool failed') @@ -252,6 +310,50 @@ describe('dispatch', () => { expect(block!.status).toBe('error') }) + it('preserves raw tool error status when guard returns ok', async () => { + const tools = [makeTool('bad_tool')] + const toolPresenter = createMockToolPresenter() + ;(toolPresenter.callTool as ReturnType).mockResolvedValue({ + content: 'Upstream failure', + rawData: { + toolCallId: 'tc1', + content: 'Upstream failure', + isError: true + } + }) + const conversation: any[] = [] + + state.blocks.push({ + type: 'tool_call', + content: '', + status: 'pending', + timestamp: Date.now(), + tool_call: { id: 'tc1', name: 'bad_tool', params: '{}', response: '' } + }) + state.completedToolCalls = [{ id: 'tc1', name: 'bad_tool', arguments: '{}' }] + + await executeTools( + state, + conversation, + 0, + tools, + toolPresenter, + 'gpt-4', + io, + 'full_access', + new ToolOutputGuard(), + 32000, + 1024 + ) + + const toolMsg = conversation.find((message: any) => message.role === 'tool') + expect(toolMsg.content).toBe('Upstream failure') + + const block = state.blocks.find((b) => b.type === 'tool_call') + expect(block!.tool_call!.response).toBe('Upstream failure') + expect(block!.status).toBe('error') + }) + it('stops on abort', async () => { const abortController = new AbortController() const abortIo = createIo({ abortSignal: abortController.signal }) @@ -291,7 +393,10 @@ describe('dispatch', () => { toolPresenter, 'gpt-4', abortIo, - 'full_access' + 'full_access', + new ToolOutputGuard(), + 32000, + 1024 ) // Only first tool should have been called @@ -312,7 +417,19 @@ describe('dispatch', () => { }) state.completedToolCalls = [{ id: 'tc1', name: 'tool_a', arguments: '{}' }] - await executeTools(state, [], 0, tools, toolPresenter, 'gpt-4', io, 'full_access') + await executeTools( + state, + [], + 0, + tools, + toolPresenter, + 'gpt-4', + io, + 'full_access', + new ToolOutputGuard(), + 32000, + 1024 + ) expect(eventBus.sendToRenderer).toHaveBeenCalledWith( 'stream:response', @@ -326,6 +443,211 @@ describe('dispatch', () => { ) expect(io.messageStore.updateAssistantContent).toHaveBeenCalled() }) + + it('offloads large yo_browser responses into a stub', async () => { + tempHome = await fs.mkdtemp(path.join(os.tmpdir(), 'deepchat-dispatch-offload-')) + getPathSpy = vi.spyOn(app, 'getPath').mockReturnValue(tempHome) + + const tools = [makeTool('yo_browser_cdp_send')] + const longScreenshot = JSON.stringify({ data: 'x'.repeat(7000) }) + const toolPresenter = createMockToolPresenter({ yo_browser_cdp_send: longScreenshot }) + const conversation: any[] = [] + + state.blocks.push({ + type: 'tool_call', + content: '', + status: 'pending', + timestamp: Date.now(), + tool_call: { + id: 'tc1', + name: 'yo_browser_cdp_send', + params: '{"method":"Page.captureScreenshot"}', + response: '' + } + }) + state.completedToolCalls = [ + { + id: 'tc1', + name: 'yo_browser_cdp_send', + arguments: '{"method":"Page.captureScreenshot"}' + } + ] + + const executed = await executeTools( + state, + conversation, + 0, + tools, + toolPresenter, + 'gpt-4', + io, + 'full_access', + new ToolOutputGuard(), + 32000, + 1024 + ) + + expect(executed.terminalError).toBeUndefined() + const toolMessage = conversation.find((message: any) => message.role === 'tool') + expect(toolMessage.content).toContain('[Tool output offloaded]') + expect(toolMessage.content).toContain('tool_tc1.offload') + expect(toolMessage.content).not.toContain(tempHome!) + expect(state.blocks[0].tool_call?.response).toContain('[Tool output offloaded]') + expect(state.blocks[0].status).toBe('success') + }) + + it('turns offload write failures into tool errors instead of falling back to raw content', async () => { + tempHome = await fs.mkdtemp(path.join(os.tmpdir(), 'deepchat-dispatch-offload-fail-')) + getPathSpy = vi.spyOn(app, 'getPath').mockReturnValue(tempHome) + const writeFileSpy = vi.spyOn(fs, 'writeFile').mockRejectedValueOnce(new Error('disk full')) + + const tools = [makeTool('yo_browser_cdp_send')] + const longScreenshot = JSON.stringify({ data: 'x'.repeat(7000) }) + const toolPresenter = createMockToolPresenter({ yo_browser_cdp_send: longScreenshot }) + const conversation: any[] = [] + + state.blocks.push({ + type: 'tool_call', + content: '', + status: 'pending', + timestamp: Date.now(), + tool_call: { + id: 'tc1', + name: 'yo_browser_cdp_send', + params: '{"method":"Page.captureScreenshot"}', + response: '' + } + }) + state.completedToolCalls = [ + { + id: 'tc1', + name: 'yo_browser_cdp_send', + arguments: '{"method":"Page.captureScreenshot"}' + } + ] + + await executeTools( + state, + conversation, + 0, + tools, + toolPresenter, + 'gpt-4', + io, + 'full_access', + new ToolOutputGuard(), + 32000, + 1024 + ) + + writeFileSpy.mockRestore() + const toolMessage = conversation.find((message: any) => message.role === 'tool') + expect(toolMessage.content).toContain('offloading that result to disk failed') + expect(toolMessage.content).not.toContain(longScreenshot) + expect(state.blocks[0].status).toBe('error') + }) + + it('marks the tool as error when offload succeeds but context budget cannot fit the stub', async () => { + tempHome = await fs.mkdtemp(path.join(os.tmpdir(), 'deepchat-dispatch-offload-clean-')) + getPathSpy = vi.spyOn(app, 'getPath').mockReturnValue(tempHome) + + const tools = [makeTool('yo_browser_cdp_send')] + const longScreenshot = JSON.stringify({ data: 'x'.repeat(7000) }) + const toolPresenter = createMockToolPresenter({ yo_browser_cdp_send: longScreenshot }) + const conversation: any[] = [] + + state.blocks.push({ + type: 'tool_call', + content: '', + status: 'pending', + timestamp: Date.now(), + tool_call: { + id: 'tc1', + name: 'yo_browser_cdp_send', + params: '{"method":"Page.captureScreenshot"}', + response: '' + } + }) + state.completedToolCalls = [ + { + id: 'tc1', + name: 'yo_browser_cdp_send', + arguments: '{"method":"Page.captureScreenshot"}' + } + ] + + await executeTools( + state, + conversation, + 0, + tools, + toolPresenter, + 'gpt-4', + io, + 'full_access', + new ToolOutputGuard(), + 200, + 32 + ) + + const toolMessage = conversation.find((message: any) => message.role === 'tool') + expect(toolMessage.content).toContain('remaining context window is insufficient') + expect(state.blocks[0].status).toBe('error') + await expect( + fs.access(path.join(tempHome, '.deepchat', 'sessions', 's1', 'tool_tc1.offload')) + ).rejects.toThrow() + }) + + it('returns terminalError when even the minimal tool failure stub cannot fit', async () => { + tempHome = await fs.mkdtemp(path.join(os.tmpdir(), 'deepchat-dispatch-terminal-clean-')) + getPathSpy = vi.spyOn(app, 'getPath').mockReturnValue(tempHome) + + const tools = [makeTool('yo_browser_cdp_send')] + const longScreenshot = JSON.stringify({ data: 'x'.repeat(7000) }) + const toolPresenter = createMockToolPresenter({ yo_browser_cdp_send: longScreenshot }) + const conversation: any[] = [] + + state.blocks.push({ + type: 'tool_call', + content: '', + status: 'pending', + timestamp: Date.now(), + tool_call: { + id: 'tc1', + name: 'yo_browser_cdp_send', + params: '{"method":"Page.captureScreenshot"}', + response: '' + } + }) + state.completedToolCalls = [ + { + id: 'tc1', + name: 'yo_browser_cdp_send', + arguments: '{"method":"Page.captureScreenshot"}' + } + ] + + const executed = await executeTools( + state, + conversation, + 0, + tools, + toolPresenter, + 'gpt-4', + io, + 'full_access', + new ToolOutputGuard(), + 1, + 1 + ) + + expect(executed.terminalError).toContain('remaining context window is too small') + expect(conversation.find((message: any) => message.role === 'tool')).toBeUndefined() + expect(state.blocks[0].status).toBe('error') + await expect( + fs.access(path.join(tempHome, '.deepchat', 'sessions', 's1', 'tool_tc1.offload')) + ).rejects.toThrow() + }) }) describe('finalize', () => { diff --git a/test/main/presenter/deepchatAgentPresenter/process.test.ts b/test/main/presenter/deepchatAgentPresenter/process.test.ts index 426114f19..3949472c7 100644 --- a/test/main/presenter/deepchatAgentPresenter/process.test.ts +++ b/test/main/presenter/deepchatAgentPresenter/process.test.ts @@ -1,8 +1,13 @@ import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest' +import fs from 'fs/promises' +import os from 'os' +import path from 'path' +import { app } from 'electron' import type { LLMCoreStreamEvent } from '@shared/types/core/llm-events' import type { MCPToolDefinition } from '@shared/presenter' import type { IToolPresenter } from '@shared/types/presenters/tool.presenter' import type { ProcessParams } from '@/presenter/deepchatAgentPresenter/types' +import { ToolOutputGuard } from '@/presenter/deepchatAgentPresenter/toolOutputGuard' vi.mock('@/eventbus', () => ({ eventBus: { sendToRenderer: vi.fn() }, @@ -75,6 +80,8 @@ function makeStreamEvents(...events: LLMCoreStreamEvent[]): LLMCoreStreamEvent[] describe('processStream', () => { let messageStore: ReturnType + let tempHome: string | null = null + let getPathSpy: ReturnType | null = null beforeEach(() => { vi.useFakeTimers() @@ -84,6 +91,13 @@ describe('processStream', () => { afterEach(() => { vi.useRealTimers() + getPathSpy?.mockRestore() + getPathSpy = null + if (tempHome) { + return fs.rm(tempHome, { recursive: true, force: true }).then(() => { + tempHome = null + }) + } }) function createParams(overrides: Partial = {}): ProcessParams { @@ -108,6 +122,7 @@ describe('processStream', () => { temperature: 0.7, maxTokens: 4096, permissionMode: 'full_access', + toolOutputGuard: new ToolOutputGuard(), io: { sessionId: 's1', messageId: 'm1', @@ -190,6 +205,53 @@ describe('processStream', () => { expect(toolResultMsg.content).toBe('Sunny, 72F') }) + it('offloads large tool results before the next provider call', async () => { + tempHome = await fs.mkdtemp(path.join(os.tmpdir(), 'deepchat-process-offload-')) + getPathSpy = vi.spyOn(app, 'getPath').mockReturnValue(tempHome) + + let callCount = 0 + const longScreenshot = JSON.stringify({ data: 'x'.repeat(7000) }) + const coreStream = vi.fn(function () { + callCount++ + if (callCount === 1) { + return (async function* () { + yield { + type: 'tool_call_start', + tool_call_id: 'tc1', + tool_call_name: 'yo_browser_cdp_send' + } as LLMCoreStreamEvent + yield { + type: 'tool_call_end', + tool_call_id: 'tc1', + tool_call_arguments_complete: '{"method":"Page.captureScreenshot"}' + } as LLMCoreStreamEvent + yield { type: 'stop', stop_reason: 'tool_use' } as LLMCoreStreamEvent + })() + } + return (async function* () { + yield { type: 'text', content: 'Done' } as LLMCoreStreamEvent + yield { type: 'stop', stop_reason: 'complete' } as LLMCoreStreamEvent + })() + }) as unknown as ProcessParams['coreStream'] + + const toolPresenter = createMockToolPresenter({ yo_browser_cdp_send: longScreenshot }) + const params = createParams({ + coreStream, + toolPresenter, + tools: [makeTool('yo_browser_cdp_send')] + }) + + const promise = processStream(params) + await vi.runAllTimersAsync() + await promise + + const secondCallMessages = (coreStream as ReturnType).mock.calls[1][0] + const toolResultMsg = secondCallMessages.find((m: any) => m.role === 'tool') + expect(toolResultMsg.content).toContain('[Tool output offloaded]') + expect(toolResultMsg.content).toContain('tool_tc1.offload') + expect(toolResultMsg.content).not.toContain(tempHome!) + }) + it('multiple tool calls in one turn', async () => { let callCount = 0 const toolPresenter = createMockToolPresenter({ @@ -447,6 +509,61 @@ describe('processStream', () => { expect(messageStore.finalizeAssistantMessage).toHaveBeenCalled() }) + it('context window error event is finalized as an error', async () => { + const coreStream = vi.fn(function* () { + yield { + type: 'error', + error_message: 'maximum context length exceeded' + } as LLMCoreStreamEvent + }) as unknown as ProcessParams['coreStream'] + + const params = createParams({ coreStream }) + + const promise = processStream(params) + await vi.runAllTimersAsync() + await promise + + expect(messageStore.setMessageError).toHaveBeenCalled() + expect(messageStore.finalizeAssistantMessage).not.toHaveBeenCalled() + }) + + it('terminal tool output failure stops before the next provider call', async () => { + const coreStream = vi.fn(function () { + return (async function* () { + yield { + type: 'tool_call_start', + tool_call_id: 'tc1', + tool_call_name: 'yo_browser_cdp_send' + } as LLMCoreStreamEvent + yield { + type: 'tool_call_end', + tool_call_id: 'tc1', + tool_call_arguments_complete: '{"method":"Page.captureScreenshot"}' + } as LLMCoreStreamEvent + yield { type: 'stop', stop_reason: 'tool_use' } as LLMCoreStreamEvent + })() + }) as unknown as ProcessParams['coreStream'] + + const longScreenshot = JSON.stringify({ data: 'x'.repeat(7000) }) + const toolPresenter = createMockToolPresenter({ yo_browser_cdp_send: longScreenshot }) + const params = createParams({ + coreStream, + toolPresenter, + tools: [makeTool('yo_browser_cdp_send')], + modelConfig: { contextLength: 1 } as any, + maxTokens: 1 + }) + + const promise = processStream(params) + await vi.runAllTimersAsync() + const result = await promise + + expect(result.status).toBe('error') + expect(result.terminalError).toContain('remaining context window is too small') + expect(coreStream).toHaveBeenCalledTimes(1) + expect(messageStore.setMessageError).toHaveBeenCalled() + }) + it('stream exception → catch finalizeError', async () => { const coreStream = vi.fn(function () { return (async function* () {