diff --git a/src/agent/__tests__/agent-observation.test.ts b/src/agent/__tests__/agent-observation.test.ts new file mode 100644 index 0000000..52f471f --- /dev/null +++ b/src/agent/__tests__/agent-observation.test.ts @@ -0,0 +1,149 @@ +import { describe, expect, test } from "bun:test"; +import z from "zod"; + +import type { AssistantMessage, ModelProvider, ModelProviderInvokeParams } from "@/foundation"; +import { defineTool } from "@/foundation"; +import { Model } from "@/foundation/models"; + +import { Agent } from "../agent"; + +class RecordingProvider implements ModelProvider { + calls: ModelProviderInvokeParams[] = []; + private readonly responses: AssistantMessage[]; + + constructor(responses: AssistantMessage[]) { + this.responses = responses; + } + + async invoke(): Promise { + throw new Error("invoke not implemented in test provider"); + } + + async *stream(params: ModelProviderInvokeParams): AsyncGenerator { + this.calls.push(params); + const next = this.responses[this.calls.length - 1]; + if (!next) { + throw new Error(`Unexpected model stream call #${this.calls.length}`); + } + yield next; + } +} + +function getSystemPromptText(call: ModelProviderInvokeParams) { + const first = call.messages[0]; + if (!first || first.role !== "system") { + throw new Error("Expected first message to be a system prompt"); + } + + const textBlock = first.content[0]; + if (!textBlock || textBlock.type !== "text") { + throw new Error("Expected first system content block to be text"); + } + + return textBlock.text; +} + +describe("Agent tool observation injection", () => { + test("injects tool observation immediately after the first failure and upgrades it after repeated failure", async () => { + const provider = new RecordingProvider([ + { + role: "assistant", + content: [ + { + type: "tool_use", + id: "toolu_1", + name: "grep_search", + input: { pattern: "foo" }, + }, + ], + }, + { + role: "assistant", + content: [ + { + type: "tool_use", + id: "toolu_2", + name: "grep_search", + input: { pattern: "foo" }, + }, + ], + }, + { + role: "assistant", + content: [{ type: "text", text: "done" }], + }, + ]); + + const model = new Model("test-model", provider); + const grepTool = defineTool({ + name: "grep_search", + description: "test grep", + parameters: z.object({ pattern: z.string() }), + invoke: async () => ({ + ok: false as const, + summary: "Failed to run rg", + error: "Failed to run rg", + code: "RG_NOT_FOUND", + }), + }); + + const agent = new Agent({ + model, + prompt: "You are a coding agent.", + messages: [], + tools: [grepTool], + maxSteps: 5, + }); + + for await (const _ of agent.stream({ role: "user", content: [{ type: "text", text: "find foo" }] })) { + void _; + } + + expect(provider.calls).toHaveLength(3); + + const firstPromptText = getSystemPromptText(provider.calls[0]!); + const secondPromptText = getSystemPromptText(provider.calls[1]!); + const thirdPromptText = getSystemPromptText(provider.calls[2]!); + + expect(firstPromptText).toBe("You are a coding agent."); + + expect(secondPromptText).toContain("You are a coding agent."); + expect(secondPromptText).toContain(""); + expect(secondPromptText).toContain("tool=grep_search"); + expect(secondPromptText).toContain("repeated_failures=0"); + expect(secondPromptText).not.toContain("repeated_failure=true"); + expect(secondPromptText).toContain("avoid_immediate_retry_tools=grep_search"); + + expect(thirdPromptText).toContain("You are a coding agent."); + expect(thirdPromptText).toContain(""); + expect(thirdPromptText).toContain("tool=grep_search"); + expect(thirdPromptText).toContain("repeated_failures=1"); + expect(thirdPromptText).toContain("repeated_failure=true"); + expect(thirdPromptText).toContain("avoid_immediate_retry_tools=grep_search"); + }); + + test("does not inject tool observation before any tool failure occurs", async () => { + const provider = new RecordingProvider([ + { + role: "assistant", + content: [{ type: "text", text: "done" }], + }, + ]); + + const model = new Model("test-model", provider); + const agent = new Agent({ + model, + prompt: "You are a coding agent.", + messages: [], + tools: [], + maxSteps: 2, + }); + + for await (const _ of agent.stream({ role: "user", content: [{ type: "text", text: "hello" }] })) { + void _; + } + + expect(provider.calls).toHaveLength(1); + expect(getSystemPromptText(provider.calls[0]!)).toBe("You are a coding agent."); + }); +}); diff --git a/src/agent/__tests__/tool-compaction.test.ts b/src/agent/__tests__/tool-compaction.test.ts new file mode 100644 index 0000000..b697cee --- /dev/null +++ b/src/agent/__tests__/tool-compaction.test.ts @@ -0,0 +1,98 @@ +import { describe, expect, test } from "bun:test"; + +import { compactToolResultData, compactTranscriptPayload } from "../tool-compaction"; +import { getToolResultPolicy } from "../tool-result-policy"; + +describe("compactToolResultData", () => { + test("truncates large strings in success data", () => { + const result = compactToolResultData({ + toolName: "apply_patch", + normalized: { + ok: true, + summary: "Applied patch", + data: { patch: "x".repeat(1200) }, + raw: null, + }, + policy: getToolResultPolicy("apply_patch"), + }); + + expect(result).toEqual({ + ok: true, + summary: "Applied patch", + data: { + patch: expect.stringContaining("[truncated"), + }, + }); + }); + + test("samples long arrays", () => { + const result = compactToolResultData({ + toolName: "list_files", + normalized: { + ok: true, + summary: "Listed files", + data: { entries: Array.from({ length: 25 }, (_, i) => `file-${i}`) }, + raw: null, + }, + policy: getToolResultPolicy("list_files"), + }); + + expect(result).toEqual({ + ok: true, + summary: "Listed files", + data: { + entries: { + items: Array.from({ length: 10 }, (_, i) => `file-${i}`), + truncated: true, + originalLength: 25, + }, + }, + }); + }); +}); + +describe("compactTranscriptPayload", () => { + test("drops data for summary-first tools", () => { + const result = compactTranscriptPayload({ + toolName: "grep_search", + normalized: { + ok: true, + summary: "Found 42 matches", + data: { matches: Array.from({ length: 15 }, (_, i) => `match-${i}`) }, + raw: null, + }, + policy: getToolResultPolicy("grep_search"), + }); + + expect(result).toEqual({ + ok: true, + summary: "Found 42 matches", + }); + }); + + test("preserves compacted details for errors", () => { + const result = compactTranscriptPayload({ + toolName: "grep_search", + normalized: { + ok: false, + summary: "grep failed", + error: "x".repeat(1200), + code: "GREP_FAILED", + details: { stderr: "y".repeat(1200) }, + errorKind: "execution_failed", + raw: null, + }, + policy: getToolResultPolicy("grep_search"), + }); + + expect(result).toEqual({ + ok: false, + summary: "grep failed", + error: expect.stringContaining("[truncated"), + code: "GREP_FAILED", + details: { + stderr: expect.stringContaining("[truncated"), + }, + }); + }); +}); diff --git a/src/agent/__tests__/tool-observation.test.ts b/src/agent/__tests__/tool-observation.test.ts new file mode 100644 index 0000000..ebafe29 --- /dev/null +++ b/src/agent/__tests__/tool-observation.test.ts @@ -0,0 +1,113 @@ +import { describe, expect, test } from "bun:test"; + +import { buildRecentToolObservation } from "../tool-observation"; +import type { ToolTraceRecord, ToolTraceState } from "../tool-trace"; + +function makeState(records: ToolTraceRecord[]): ToolTraceState { + return { + recent: records, + repeatedFailureCount: records.filter((r) => r.repeatedFailure).length, + }; +} + +describe("buildRecentToolObservation", () => { + test("returns null when there are no failures", () => { + const state = makeState([ + { + step: 1, + toolName: "list_files", + toolUseId: "t1", + inputSignature: "{}", + ok: true, + summary: "ok", + repeatedFailure: false, + }, + ]); + + const observation = buildRecentToolObservation({ + state, + getRecoveryHint: () => null, + }); + + expect(observation).toBeNull(); + }); + + test("includes a summary line and avoid_immediate_retry_tools for repeated failures", () => { + const failure: ToolTraceRecord = { + step: 2, + toolName: "grep_search", + toolUseId: "t2", + inputSignature: "{\"pattern\":\"foo\"}", + ok: false, + summary: "rg not found", + code: "RG_NOT_FOUND", + errorKind: "environment_missing", + repeatedFailure: true, + }; + + const state = makeState([failure]); + const observation = buildRecentToolObservation({ + state, + getRecoveryHint: () => ({ + message: "Missing env", + shouldSuppressImmediateRetry: true, + retryable: false, + }), + }); + + expect(observation).toContain(""); + expect(observation).toContain("summary: recent_failures=1"); + expect(observation).toContain("repeated_failures=1"); + expect(observation).toContain("avoid_immediate_retry_tools=grep_search"); + expect(observation).toContain("repeated_failure=true"); + expect(observation).toContain("avoid_immediate_retry=true"); + expect(observation).toContain("code=RG_NOT_FOUND"); + expect(observation).toContain("kind=environment_missing"); + }); + + test("dedupes identical failures and prioritizes repeated failures over newer non-repeated ones", () => { + const a1: ToolTraceRecord = { + step: 1, + toolName: "apply_patch", + toolUseId: "t1", + inputSignature: "{\"patch\":\"...\"}", + ok: false, + summary: "Patch failed", + code: "PATCH_APPLY_FAILED", + errorKind: "execution_failed", + repeatedFailure: false, + }; + + const a2: ToolTraceRecord = { + ...a1, + step: 2, + toolUseId: "t2", + repeatedFailure: true, + }; + + const newerNonRepeated: ToolTraceRecord = { + step: 3, + toolName: "glob_search", + toolUseId: "t3", + inputSignature: "{\"pattern\":\"bar\"}", + ok: false, + summary: "glob failed", + code: "FILE_NOT_FOUND", + errorKind: "not_found", + repeatedFailure: false, + }; + + const state = makeState([a1, a2, newerNonRepeated]); + const observation = buildRecentToolObservation({ + state, + getRecoveryHint: () => null, + maxFailures: 3, + }); + + expect(observation).toContain("summary: recent_failures=3"); + // Dedup should keep one formatted line for these identical failures. + expect(observation?.match(/tool=apply_patch/g)?.length).toBe(1); + expect(observation).toContain("repeated_failure=true"); + }); +}); + diff --git a/src/agent/__tests__/tool-recovery-policy.test.ts b/src/agent/__tests__/tool-recovery-policy.test.ts new file mode 100644 index 0000000..cda69d5 --- /dev/null +++ b/src/agent/__tests__/tool-recovery-policy.test.ts @@ -0,0 +1,39 @@ +import { describe, expect, test } from "bun:test"; + +import { getToolRecoveryHint } from "../tool-recovery-policy"; + +describe("getToolRecoveryHint", () => { + test("maps invalid path on file tools to discovery guidance", () => { + expect(getToolRecoveryHint({ toolName: "read_file", code: "INVALID_PATH", errorKind: "invalid_input" })).toEqual({ + message: "The target path could not be used. Discover the correct path before retrying.", + suggestedTools: ["list_files", "glob_search"], + shouldSuppressImmediateRetry: true, + retryable: false, + }); + }); + + test("maps patch apply failure to reread guidance", () => { + expect(getToolRecoveryHint({ toolName: "apply_patch", code: "PATCH_APPLY_FAILED", errorKind: "execution_failed" })).toEqual({ + message: "Patch application failed. Re-read the file and verify the target lines before trying another edit.", + suggestedTools: ["read_file"], + shouldSuppressImmediateRetry: true, + retryable: false, + }); + }); + + test("treats missing environment tools as non-retryable", () => { + expect(getToolRecoveryHint({ toolName: "grep_search", code: "RG_NOT_FOUND", errorKind: "environment_missing" })).toEqual({ + message: "Required environment dependency is missing. Do not blindly retry the same tool call.", + shouldSuppressImmediateRetry: true, + retryable: false, + }); + }); + + test("provides generic guidance for unknown failures", () => { + expect(getToolRecoveryHint({ toolName: "write_file", errorKind: "unknown" })).toEqual({ + message: "Tool call failed. Inspect the previous result before retrying the same action.", + shouldSuppressImmediateRetry: false, + retryable: false, + }); + }); +}); diff --git a/src/agent/__tests__/tool-result-policy.test.ts b/src/agent/__tests__/tool-result-policy.test.ts index cf5b3e4..8670eb2 100644 --- a/src/agent/__tests__/tool-result-policy.test.ts +++ b/src/agent/__tests__/tool-result-policy.test.ts @@ -9,6 +9,8 @@ describe("getToolResultPolicy", () => { includeData: false, maxStringLength: 1000, uiSummaryOnly: true, + maxDataItems: 10, + maxDataTextLength: 400, }); }); @@ -17,6 +19,8 @@ describe("getToolResultPolicy", () => { preferSummaryOnly: false, includeData: true, maxStringLength: 12000, + maxDataItems: 50, + maxDataTextLength: 12000, }); }); @@ -25,6 +29,8 @@ describe("getToolResultPolicy", () => { preferSummaryOnly: false, includeData: true, maxStringLength: 4000, + maxDataItems: 20, + maxDataTextLength: 1000, }); }); }); diff --git a/src/agent/__tests__/tool-result-runtime.test.ts b/src/agent/__tests__/tool-result-runtime.test.ts index 1f3ed3e..0a10ddf 100644 --- a/src/agent/__tests__/tool-result-runtime.test.ts +++ b/src/agent/__tests__/tool-result-runtime.test.ts @@ -1,6 +1,6 @@ import { describe, expect, test } from "bun:test"; -import { formatToolResultForMessage, inferToolErrorKind, normalizeToolResult } from "../tool-result-runtime"; +import { buildToolResultEnvelope, formatToolResultForMessage, inferToolErrorKind, normalizeToolResult } from "../tool-result-runtime"; describe("inferToolErrorKind", () => { test("maps common tool error code families", () => { @@ -63,6 +63,33 @@ describe("normalizeToolResult", () => { }); }); + +describe("buildToolResultEnvelope", () => { + test("returns normalized result alongside transcript text", () => { + const envelope = buildToolResultEnvelope({ + toolName: "grep_search", + result: { + ok: false, + summary: "Failed to run rg", + error: "Failed to run rg", + code: "RG_NOT_FOUND", + }, + }); + + expect(envelope.normalized).toMatchObject({ + ok: false, + code: "RG_NOT_FOUND", + errorKind: "environment_missing", + }); + expect(JSON.parse(envelope.transcript)).toEqual({ + ok: false, + summary: "Failed to run rg", + error: "Failed to run rg", + code: "RG_NOT_FOUND", + }); + }); +}); + describe("formatToolResultForMessage", () => { test("omits data for summary-first tools", () => { const formatted = formatToolResultForMessage({ @@ -136,6 +163,25 @@ describe("formatToolResultForMessage", () => { }); }); + test("compacts oversized structured data before transcript serialization", () => { + const formatted = formatToolResultForMessage({ + toolName: "grep_search", + result: { + ok: true, + summary: "Found 40 matches for foo", + data: { + matches: Array.from({ length: 20 }, (_, i) => `match-${i}`), + content: "x".repeat(5000), + }, + }, + }); + + expect(JSON.parse(formatted)).toEqual({ + ok: true, + summary: "Found 40 matches for foo", + }); + }); + test("always returns valid json when payload exceeds limits", () => { const formatted = formatToolResultForMessage({ toolName: "apply_patch", diff --git a/src/agent/__tests__/tool-trace.test.ts b/src/agent/__tests__/tool-trace.test.ts new file mode 100644 index 0000000..0e9e7a4 --- /dev/null +++ b/src/agent/__tests__/tool-trace.test.ts @@ -0,0 +1,94 @@ +import { describe, expect, test } from "bun:test"; + +import { appendToolTrace, createToolInputSignature, createToolTraceState, summarizeRecentToolTrace } from "../tool-trace"; + +describe("createToolInputSignature", () => { + test("serializes json-safe input", () => { + expect(createToolInputSignature({ path: "/tmp/demo.ts", count: 1 })).toBe('{"path":"/tmp/demo.ts","count":1}'); + }); +}); + +describe("appendToolTrace", () => { + test("detects repeated identical failures", () => { + const state = createToolTraceState(); + appendToolTrace(state, { + step: 1, + toolName: "read_file", + toolUseId: "toolu_1", + inputSignature: '{"path":"/tmp/missing.ts"}', + ok: false, + summary: "File not found", + code: "FILE_NOT_FOUND", + errorKind: "not_found", + }); + + const second = appendToolTrace(state, { + step: 2, + toolName: "read_file", + toolUseId: "toolu_2", + inputSignature: '{"path":"/tmp/missing.ts"}', + ok: false, + summary: "File not found", + code: "FILE_NOT_FOUND", + errorKind: "not_found", + }); + + expect(second.repeatedFailure).toBe(true); + expect(state.repeatedFailureCount).toBe(1); + }); + + test("evicts old entries beyond the recent window", () => { + const state = createToolTraceState(); + for (let i = 0; i < 4; i++) { + appendToolTrace(state, { + step: i + 1, + toolName: "list_files", + toolUseId: `toolu_${i}`, + inputSignature: String(i), + ok: true, + summary: `ok-${i}`, + }, 2); + } + + expect(state.recent).toHaveLength(2); + expect(state.recent.map((entry) => entry.summary)).toEqual(["ok-2", "ok-3"]); + }); +}); + +describe("summarizeRecentToolTrace", () => { + test("returns recent failure summary", () => { + const state = createToolTraceState(); + appendToolTrace(state, { + step: 1, + toolName: "read_file", + toolUseId: "toolu_1", + inputSignature: '{"path":"/tmp/missing.ts"}', + ok: false, + summary: "File not found", + code: "FILE_NOT_FOUND", + errorKind: "not_found", + }); + appendToolTrace(state, { + step: 2, + toolName: "list_files", + toolUseId: "toolu_2", + inputSignature: '{"path":"/tmp"}', + ok: true, + summary: "Listed files", + }); + + expect(summarizeRecentToolTrace(state)).toEqual({ + totalRecent: 2, + repeatedFailureCount: 0, + latestFailures: [ + { + toolName: "read_file", + summary: "File not found", + code: "FILE_NOT_FOUND", + errorKind: "not_found", + repeatedFailure: false, + }, + ], + }); + }); +}); diff --git a/src/agent/agent.ts b/src/agent/agent.ts index 4535d82..e3a4fbf 100644 --- a/src/agent/agent.ts +++ b/src/agent/agent.ts @@ -12,7 +12,10 @@ import type { import type { AgentEvent } from "./agent-event"; import type { AgentMiddleware } from "./agent-middleware"; import type { SkillFrontmatter } from "./skills/types"; -import { formatToolResultForMessage } from "./tool-result-runtime"; +import { buildRecentToolObservation } from "./tool-observation"; +import { getToolRecoveryHint } from "./tool-recovery-policy"; +import { buildToolResultEnvelope } from "./tool-result-runtime"; +import { appendToolTrace, createToolInputSignature, createToolTraceState } from "./tool-trace"; /** * A context that is used to invoke a React agent. @@ -49,6 +52,7 @@ export class Agent { private readonly _context: AgentContext; private _streaming = false; private _abortController: AbortController | null = null; + private _toolTrace = createToolTraceState(); readonly name?: string; readonly model: Model; @@ -143,6 +147,7 @@ export class Agent { } this._abortController = new AbortController(); + this._toolTrace = createToolTraceState(); this._appendMessage(message); await this._beforeAgentRun(); this._streaming = true; @@ -160,7 +165,7 @@ export class Agent { return; } - yield* this._act(toolUses); + yield* this._act(step, toolUses); await this._afterAgentStep(step); } throw new Error("Maximum number of steps reached"); @@ -179,7 +184,7 @@ export class Agent { private async *_think(): AsyncGenerator { const modelContext: ModelContext = { - prompt: this.prompt, + prompt: this._buildPromptWithToolObservation(), messages: this.messages, tools: this.tools, signal: this._abortController?.signal, @@ -204,6 +209,26 @@ export class Agent { return latest; } + + private _buildPromptWithToolObservation() { + const observation = buildRecentToolObservation({ + state: this._toolTrace, + getRecoveryHint: (record) => getToolRecoveryHint({ + toolName: record.toolName, + errorKind: record.errorKind, + code: record.code, + }), + }); + + if (!observation) { + return this.prompt; + } + + return `${this.prompt} + +${observation}`; + } + private _deriveProgress(snapshot: AssistantMessage): AgentEvent { const toolUses = snapshot.content.filter( (c): c is ToolUseContent => c.type === "tool_use", @@ -219,7 +244,7 @@ export class Agent { return message.content.filter((content): content is ToolUseContent => content.type === "tool_use"); } - private async *_act(toolUses: ToolUseContent[]): AsyncGenerator { + private async *_act(step: number, toolUses: ToolUseContent[]): AsyncGenerator { const signal = this._abortController?.signal; const pending = toolUses.map(async (toolUse, index) => { try { @@ -227,14 +252,14 @@ export class Agent { if (!tool) throw new Error(`Tool ${toolUse.name} not found`); const beforeResult = await this._beforeToolUse(toolUse); if (beforeResult.skip) { - return { index, toolUseId: toolUse.id, toolName: toolUse.name, result: beforeResult.result }; + return { index, toolUseId: toolUse.id, toolName: toolUse.name, toolInput: toolUse.input, result: beforeResult.result }; } const result = await tool.invoke(toolUse.input, signal); await this._afterToolUse(toolUse, result); - return { index, toolUseId: toolUse.id, toolName: toolUse.name, result }; + return { index, toolUseId: toolUse.id, toolName: toolUse.name, toolInput: toolUse.input, result }; } catch (error) { const message = error instanceof Error ? error.message : String(error); - return { index, toolUseId: toolUse.id, toolName: toolUse.name, result: `Error: ${message}` }; + return { index, toolUseId: toolUse.id, toolName: toolUse.name, toolInput: toolUse.input, result: `Error: ${message}` }; } }); @@ -256,13 +281,39 @@ export class Agent { : Promise.race(candidates)))!; remaining.delete(resolved.index); + const envelope = buildToolResultEnvelope({ toolName: resolved.toolName, result: resolved.result }); + const traceEntry = appendToolTrace(this._toolTrace, { + step, + toolName: resolved.toolName, + toolUseId: resolved.toolUseId, + inputSignature: createToolInputSignature(resolved.toolInput), + ok: envelope.normalized.ok, + summary: envelope.normalized.summary, + ...(envelope.normalized.ok ? {} : { + ...(envelope.normalized.code ? { code: envelope.normalized.code } : {}), + errorKind: envelope.normalized.errorKind, + }), + }); + + const recoveryHint = envelope.normalized.ok + ? null + : getToolRecoveryHint({ + toolName: resolved.toolName, + errorKind: traceEntry.errorKind, + code: traceEntry.code, + }); + const toolMessage: ToolMessage = { role: "tool", content: [ { type: "tool_result", tool_use_id: resolved.toolUseId, - content: formatToolResultForMessage({ toolName: resolved.toolName, result: resolved.result }), + content: recoveryHint && traceEntry.repeatedFailure && recoveryHint.shouldSuppressImmediateRetry + ? `${envelope.transcript} + +Recovery hint: ${recoveryHint.message}` + : envelope.transcript, }, ], }; diff --git a/src/agent/tool-compaction.ts b/src/agent/tool-compaction.ts new file mode 100644 index 0000000..1cea107 --- /dev/null +++ b/src/agent/tool-compaction.ts @@ -0,0 +1,95 @@ +import type { StructuredToolResult } from "@/foundation"; + +import type { ToolResultPolicy } from "./tool-result-policy"; +import type { NormalizedToolError, NormalizedToolResult, NormalizedToolSuccess } from "./tool-result-runtime"; + +function truncateString(value: string, maxLength: number) { + if (value.length <= maxLength) { + return { value, truncated: false }; + } + return { + value: `${value.slice(0, maxLength)}... [truncated ${value.length - maxLength} chars]`, + truncated: true, + }; +} + +function compactValue(value: unknown, maxItems: number, maxTextLength: number): unknown { + if (typeof value === "string") { + return truncateString(value, maxTextLength).value; + } + + if (Array.isArray(value)) { + const items = value.slice(0, maxItems).map((item) => compactValue(item, maxItems, maxTextLength)); + return value.length > maxItems + ? { + items, + truncated: true, + originalLength: value.length, + } + : items; + } + + if (value && typeof value === "object") { + const entries = Object.entries(value as Record); + return Object.fromEntries(entries.map(([key, entryValue]) => [key, compactValue(entryValue, maxItems, maxTextLength)])); + } + + return value; +} + +export function compactToolResultData(params: { + toolName: string; + normalized: NormalizedToolResult; + policy: ToolResultPolicy; +}): StructuredToolResult { + const { normalized, policy } = params; + const maxItems = policy.maxDataItems ?? 20; + const maxTextLength = policy.maxDataTextLength ?? 1000; + + if (!normalized.ok) { + const errorResult: StructuredToolResult = { + ok: false, + summary: truncateString(normalized.summary, maxTextLength).value, + error: truncateString(normalized.error, maxTextLength).value, + ...(normalized.code ? { code: normalized.code } : {}), + ...(normalized.details ? { details: compactValue(normalized.details, maxItems, maxTextLength) as Record } : {}), + }; + return errorResult; + } + + const success = normalized as NormalizedToolSuccess; + return { + ok: true, + summary: truncateString(success.summary, maxTextLength).value, + ...(success.data !== undefined ? { data: compactValue(success.data, maxItems, maxTextLength) } : {}), + }; +} + +export function compactTranscriptPayload(params: { + toolName: string; + normalized: NormalizedToolResult; + policy: ToolResultPolicy; +}): StructuredToolResult { + const compacted = compactToolResultData(params); + + if (!params.normalized.ok) { + return compacted; + } + + if (params.policy.preferSummaryOnly || !params.policy.includeData) { + return { + ok: true, + summary: compacted.summary, + }; + } + + return compacted; +} + +export function compactErrorForHint(normalized: NormalizedToolError, maxTextLength = 240) { + return { + summary: truncateString(normalized.summary, maxTextLength).value, + error: truncateString(normalized.error, maxTextLength).value, + ...(normalized.code ? { code: normalized.code } : {}), + }; +} diff --git a/src/agent/tool-observation.ts b/src/agent/tool-observation.ts new file mode 100644 index 0000000..76cad87 --- /dev/null +++ b/src/agent/tool-observation.ts @@ -0,0 +1,102 @@ +import type { ToolRecoveryHint } from "./tool-recovery-policy"; +import type { ToolTraceRecord, ToolTraceState } from "./tool-trace"; +import { summarizeRecentToolTrace } from "./tool-trace"; + +function formatFailureLine(record: ToolTraceRecord, recoveryHint?: ToolRecoveryHint | null) { + const parts = [ + `- step ${record.step}`, + `tool=${record.toolName}`, + `summary=${JSON.stringify(record.summary)}`, + ]; + + if (record.code) { + parts.push(`code=${record.code}`); + } + if (record.errorKind) { + parts.push(`kind=${record.errorKind}`); + } + if (record.repeatedFailure) { + parts.push("repeated_failure=true"); + } + if (recoveryHint?.shouldSuppressImmediateRetry) { + parts.push("avoid_immediate_retry=true"); + } + if (recoveryHint?.suggestedTools?.length) { + parts.push(`suggested_tools=${recoveryHint.suggestedTools.join(",")}`); + } + + return parts.join(" "); +} + +function dedupeFailureRecords(records: ToolTraceRecord[]) { + const seen = new Set(); + const result: ToolTraceRecord[] = []; + + for (const record of records) { + const key = [record.toolName, record.inputSignature, record.code ?? "", record.errorKind ?? "", record.summary].join("|"); + if (seen.has(key)) { + continue; + } + seen.add(key); + result.push(record); + } + + return result; +} + +export function buildRecentToolObservation(params: { + state: ToolTraceState; + getRecoveryHint: (_record: ToolTraceRecord) => ToolRecoveryHint | null; + maxFailures?: number; +}) { + const { state, getRecoveryHint, maxFailures = 3 } = params; + + const recentFailures = state.recent.filter((record) => !record.ok); + if (recentFailures.length === 0) { + return null; + } + + const prioritized = dedupeFailureRecords( + [...recentFailures].sort((a, b) => { + const repeatedDiff = Number(b.repeatedFailure) - Number(a.repeatedFailure); + if (repeatedDiff !== 0) { + return repeatedDiff; + } + + const suppressRetryDiff = Number(Boolean(getRecoveryHint(b)?.shouldSuppressImmediateRetry)) + - Number(Boolean(getRecoveryHint(a)?.shouldSuppressImmediateRetry)); + if (suppressRetryDiff !== 0) { + return suppressRetryDiff; + } + + return b.step - a.step; + }), + ).slice(0, maxFailures); + + // Build a compact header with aggregated signals to nudge planning: + // - total recent failures + // - repeated failure count (same tool/input/code/kind) + // - tools to avoid immediate retry (from hints or repeated failure) + const summary = summarizeRecentToolTrace(state, maxFailures); + const avoidImmediateRetryTools = Array.from(new Set( + prioritized + .filter((r) => r.repeatedFailure || getRecoveryHint(r)?.shouldSuppressImmediateRetry) + .map((r) => r.toolName), + )); + + const header = [ + `summary: recent_failures=${recentFailures.length}`, + `repeated_failures=${summary.repeatedFailureCount}`, + `avoid_immediate_retry_tools=${avoidImmediateRetryTools.length ? avoidImmediateRetryTools.join(",") : "-"}`, + ].join(" "); + + const lines = prioritized.map((record) => formatFailureLine(record, getRecoveryHint(record))); + + return [ + "", + "Recent tool failures were observed. Use this to plan the next action, and avoid blindly repeating the same failed call.", + header, + ...lines, + "", + ].join("\n"); +} diff --git a/src/agent/tool-recovery-policy.ts b/src/agent/tool-recovery-policy.ts new file mode 100644 index 0000000..1a8553b --- /dev/null +++ b/src/agent/tool-recovery-policy.ts @@ -0,0 +1,108 @@ +import type { ToolErrorKind } from "./tool-result-runtime"; + +export type ToolRecoveryHint = { + message: string; + suggestedTools?: string[]; + shouldSuppressImmediateRetry?: boolean; + retryable: boolean; +}; + +const DEFAULT_ERROR_HINT: ToolRecoveryHint = { + message: "Tool call failed. Inspect the previous result before retrying the same action.", + shouldSuppressImmediateRetry: false, + retryable: false, +}; + +const DEFAULT_ENVIRONMENT_HINT: ToolRecoveryHint = { + message: "Required environment dependency is missing. Do not blindly retry the same tool call.", + shouldSuppressImmediateRetry: true, + retryable: false, +}; + +function isFileOrSearchTool(toolName: string) { + return [ + "read_file", + "write_file", + "str_replace", + "apply_patch", + "list_files", + "glob_search", + "grep_search", + "file_info", + "move_path", + "mkdir", + ].includes(toolName); +} + +export function getToolRecoveryHint(params: { + toolName: string; + errorKind?: ToolErrorKind; + code?: string; +}): ToolRecoveryHint | null { + const { toolName, errorKind = "unknown", code } = params; + + if (code === "RG_NOT_FOUND") { + return DEFAULT_ENVIRONMENT_HINT; + } + + if (code === "PATCH_APPLY_FAILED") { + return { + message: "Patch application failed. Re-read the file and verify the target lines before trying another edit.", + suggestedTools: ["read_file"], + shouldSuppressImmediateRetry: true, + retryable: false, + }; + } + + if ((code === "INVALID_PATH" || code === "FILE_NOT_FOUND") && isFileOrSearchTool(toolName)) { + return { + message: "The target path could not be used. Discover the correct path before retrying.", + suggestedTools: ["list_files", "glob_search"], + shouldSuppressImmediateRetry: true, + retryable: false, + }; + } + + if (errorKind === "environment_missing") { + return DEFAULT_ENVIRONMENT_HINT; + } + + if (errorKind === "invalid_input") { + return { + message: "The tool input is invalid. Adjust the arguments before retrying.", + shouldSuppressImmediateRetry: true, + retryable: false, + }; + } + + if (errorKind === "not_found") { + return { + message: "The requested target was not found. Use a discovery tool to inspect the workspace before retrying.", + suggestedTools: ["list_files", "glob_search"], + shouldSuppressImmediateRetry: true, + retryable: false, + }; + } + + if (errorKind === "execution_failed") { + return { + message: "The tool execution failed. Inspect the previous result and only retry if you have new information.", + shouldSuppressImmediateRetry: false, + retryable: true, + }; + } + + if (errorKind === "unsupported") { + return { + message: "The requested operation is not supported by this tool. Choose a different tool or strategy.", + shouldSuppressImmediateRetry: true, + retryable: false, + }; + } + + if (errorKind === "unknown") { + return DEFAULT_ERROR_HINT; + } + + return null; +} diff --git a/src/agent/tool-result-policy.ts b/src/agent/tool-result-policy.ts index e582722..8e51ec8 100644 --- a/src/agent/tool-result-policy.ts +++ b/src/agent/tool-result-policy.ts @@ -3,12 +3,16 @@ export type ToolResultPolicy = { includeData: boolean; maxStringLength?: number; uiSummaryOnly?: boolean; + maxDataItems?: number; + maxDataTextLength?: number; }; const DEFAULT_POLICY: ToolResultPolicy = { preferSummaryOnly: false, includeData: true, maxStringLength: 4000, + maxDataItems: 20, + maxDataTextLength: 1000, }; export function getToolResultPolicy(toolName: string): ToolResultPolicy { @@ -16,6 +20,14 @@ export function getToolResultPolicy(toolName: string): ToolResultPolicy { case "list_files": case "glob_search": case "grep_search": + return { + preferSummaryOnly: true, + includeData: false, + maxStringLength: 1000, + uiSummaryOnly: true, + maxDataItems: 10, + maxDataTextLength: 400, + }; case "file_info": case "mkdir": case "move_path": @@ -24,20 +36,33 @@ export function getToolResultPolicy(toolName: string): ToolResultPolicy { includeData: false, maxStringLength: 1000, uiSummaryOnly: true, + maxDataItems: 10, + maxDataTextLength: 400, }; case "read_file": return { preferSummaryOnly: false, includeData: true, maxStringLength: 12000, + maxDataItems: 50, + maxDataTextLength: 12000, }; case "apply_patch": + return { + preferSummaryOnly: false, + includeData: true, + maxStringLength: 1000, + maxDataItems: 20, + maxDataTextLength: 1000, + }; case "write_file": case "str_replace": return { preferSummaryOnly: false, includeData: true, maxStringLength: 4000, + maxDataItems: 20, + maxDataTextLength: 800, }; default: return DEFAULT_POLICY; diff --git a/src/agent/tool-result-runtime.ts b/src/agent/tool-result-runtime.ts index b87194a..12b8a68 100644 --- a/src/agent/tool-result-runtime.ts +++ b/src/agent/tool-result-runtime.ts @@ -1,5 +1,6 @@ import type { StructuredToolError, StructuredToolResult, StructuredToolSuccess } from "@/foundation"; +import { compactTranscriptPayload } from "./tool-compaction"; import { getToolResultPolicy } from "./tool-result-policy"; export type ToolErrorKind = @@ -21,6 +22,11 @@ export type NormalizedToolError = StructuredToolError & { export type NormalizedToolResult = NormalizedToolSuccess | NormalizedToolError; +export type ToolResultEnvelope = { + normalized: NormalizedToolResult; + transcript: string; +}; + export function inferToolErrorKind(code?: string): ToolErrorKind { if (!code) return "unknown"; if (code.startsWith("INVALID_")) return "invalid_input"; @@ -97,49 +103,46 @@ export function normalizeToolResult(result: unknown): NormalizedToolResult { }; } -export function formatToolResultForMessage({ toolName, result }: { toolName: string; result: unknown }): string { +export function buildToolResultEnvelope({ toolName, result }: { toolName: string; result: unknown }): ToolResultEnvelope { if (toolName === "read_file" && typeof result === "string") { - return result; + return { + normalized: { + ok: true, + summary: truncateSummary(result), + data: result, + raw: result, + }, + transcript: result, + }; } const normalized = normalizeToolResult(result); const policy = getToolResultPolicy(toolName); - if (!normalized.ok) { - return stringifyWithinLimit( - { - ok: false, - summary: normalized.summary, - error: normalized.error, - ...(normalized.code ? { code: normalized.code } : {}), - ...(normalized.details ? { details: normalized.details } : {}), - }, - policy.maxStringLength, - { - ok: false, - summary: truncateSummary(normalized.summary), - error: truncateSummary(normalized.error), - ...(normalized.code ? { code: normalized.code } : {}), - }, - ); - } + const compacted = compactTranscriptPayload({ toolName, normalized, policy }); - if (policy.preferSummaryOnly || !policy.includeData) { - return JSON.stringify({ ok: true, summary: truncateSummary(normalized.summary) } satisfies StructuredToolResult); - } + return { + normalized, + transcript: stringifyWithinLimit( + compacted, + policy.maxStringLength, + compacted.ok + ? { + ok: true, + summary: truncateSummary(compacted.summary), + } + : { + ok: false, + summary: truncateSummary(compacted.summary), + error: truncateSummary(compacted.error), + ...(compacted.code ? { code: compacted.code } : {}), + }, + ), + }; +} - return stringifyWithinLimit( - { - ok: true, - summary: normalized.summary, - ...(normalized.data !== undefined ? { data: normalized.data } : {}), - }, - policy.maxStringLength, - { - ok: true, - summary: truncateSummary(normalized.summary), - }, - ); +export function formatToolResultForMessage({ toolName, result }: { toolName: string; result: unknown }): string { + return buildToolResultEnvelope({ toolName, result }).transcript; } function stringifyValue(value: unknown) { diff --git a/src/agent/tool-trace.ts b/src/agent/tool-trace.ts new file mode 100644 index 0000000..240f0c8 --- /dev/null +++ b/src/agent/tool-trace.ts @@ -0,0 +1,77 @@ +import type { ToolErrorKind } from "./tool-result-runtime"; + +export type ToolTraceRecord = { + step: number; + toolName: string; + toolUseId: string; + inputSignature: string; + ok: boolean; + summary: string; + code?: string; + errorKind?: ToolErrorKind; + repeatedFailure: boolean; +}; + +export type ToolTraceState = { + recent: ToolTraceRecord[]; + repeatedFailureCount: number; +}; + +export function createToolTraceState(): ToolTraceState { + return { + recent: [], + repeatedFailureCount: 0, + }; +} + +export function createToolInputSignature(input: unknown): string { + if (input === undefined) return "undefined"; + try { + return JSON.stringify(input); + } catch { + return "[unserializable input]"; + } +} + +export function appendToolTrace( + state: ToolTraceState, + record: Omit, + windowSize = 12, +): ToolTraceRecord { + const repeatedFailure = !record.ok && state.recent.some((entry) => ( + !entry.ok + && entry.toolName === record.toolName + && entry.inputSignature === record.inputSignature + && entry.code === record.code + && entry.errorKind === record.errorKind + )); + + const next: ToolTraceRecord = { + ...record, + repeatedFailure, + }; + + state.recent.push(next); + if (state.recent.length > windowSize) { + state.recent.splice(0, state.recent.length - windowSize); + } + if (repeatedFailure) { + state.repeatedFailureCount += 1; + } + return next; +} + +export function summarizeRecentToolTrace(state: ToolTraceState, maxEntries = 3) { + const entries = state.recent.slice(-maxEntries); + return { + totalRecent: state.recent.length, + repeatedFailureCount: state.repeatedFailureCount, + latestFailures: entries.filter((entry) => !entry.ok).map((entry) => ({ + toolName: entry.toolName, + summary: entry.summary, + ...(entry.code ? { code: entry.code } : {}), + ...(entry.errorKind ? { errorKind: entry.errorKind } : {}), + repeatedFailure: entry.repeatedFailure, + })), + }; +} diff --git a/src/cli/tui/message-text.ts b/src/cli/tui/message-text.ts index 3a89644..134c0af 100644 --- a/src/cli/tui/message-text.ts +++ b/src/cli/tui/message-text.ts @@ -1,5 +1,4 @@ -import { summarizeToolResultText } from "@/agent/tool-result-summary"; -import type { AssistantMessage, NonSystemMessage, ToolMessage, ToolUseContent, UserMessage } from "@/foundation"; +import type { AssistantMessage, NonSystemMessage, ToolUseContent, UserMessage } from "@/foundation"; const ESC = "\x1b["; const RESET = `${ESC}0m`; @@ -75,14 +74,3 @@ function toolUseText(content: ToolUseContent): string { return `${dim("⏺")} Tool call\n ${dim(`└─ ${content.name}`)}`; } } - -function toolMessageText(message: ToolMessage): string | null { - const parts: string[] = []; - for (const content of message.content) { - const summary = summarizeToolResultText(content.content); - if (summary) { - parts.push(`${dim("✓")} ${dim(summary)}`); - } - } - return parts.length > 0 ? parts.join("\n") : null; -} diff --git a/src/coding/tools/read-file.ts b/src/coding/tools/read-file.ts index 4319ec4..7523fb2 100644 --- a/src/coding/tools/read-file.ts +++ b/src/coding/tools/read-file.ts @@ -2,7 +2,7 @@ import z from "zod"; import { defineTool } from "@/foundation"; -import { errorToolResult, okToolResult } from "./tool-result"; +import { errorToolResult } from "./tool-result"; import { ensureAbsolutePath, truncateText } from "./tool-utils"; const DEFAULT_MAX_CHARS = 12000;