fix(agent): rebuild resume transcripts from ACP top-level tool fields (#3084)

tatoalo · web-flow · commit 84bfe6825845 · 2026-07-02T16:59:02.000+02:00
diff --git a/packages/agent/src/adapters/claude/session/jsonl-hydration.test.ts b/packages/agent/src/adapters/claude/session/jsonl-hydration.test.ts
@@ -4,6 +4,7 @@ import {
   conversationTurnsToJsonlEntries,
   getSessionJsonlPath,
   rebuildConversation,
+  selectRecentTurns,
 } from "./jsonl-hydration";
 
 function entry(
@@ -285,6 +286,118 @@ describe("rebuildConversation", () => {
     expect(turns[1].toolCalls).toHaveLength(1);
     expect(turns[1].toolCalls?.[0].result).toBeUndefined();
   });
+
+  it("tracks tool calls from the ACP shape: top-level toolCallId/rawInput/rawOutput, toolName in _meta", () => {
+    // Mirrors the exact update sequence agent-server persists to S3. Before
+    // the top-level fields were read, every tool call was dropped and a
+    // 30-minute run resumed as a 4-line transcript.
+    const turns = rebuildConversation([
+      entry("user_message", { content: { type: "text", text: "fix it" } }),
+      entry("tool_call", {
+        toolCallId: "toolu_01",
+        _meta: { claudeCode: { toolName: "Bash" } },
+        rawInput: {},
+        status: "pending",
+        title: "Execute command",
+        kind: "execute",
+        content: [],
+      }),
+      entry("tool_call_update", {
+        toolCallId: "toolu_01",
+        rawInput: { command: "gh pr view 123" },
+      }),
+      entry("tool_call_update", {
+        toolCallId: "toolu_01",
+        _meta: { claudeCode: { toolName: "Bash" } },
+        status: "completed",
+        rawOutput: { stdout: "PR title" },
+      }),
+    ]);
+
+    expect(turns).toHaveLength(2);
+    expect(turns[1].toolCalls).toEqual([
+      {
+        toolCallId: "toolu_01",
+        toolName: "Bash",
+        input: { command: "gh pr view 123" },
+        result: { stdout: "PR title" },
+      },
+    ]);
+  });
+
+  it("truncates oversized tool payloads, keeping object inputs as objects", () => {
+    const bigOutput = "x".repeat(50_000);
+    const bigInput = { file_path: "/tmp/big.ts", content: "y".repeat(50_000) };
+    const turns = rebuildConversation([
+      entry("user_message", { content: { type: "text", text: "go" } }),
+      entry("tool_call", {
+        toolCallId: "toolu_01",
+        _meta: { claudeCode: { toolName: "Write" } },
+        rawInput: bigInput,
+      }),
+      entry("tool_call_update", {
+        toolCallId: "toolu_01",
+        rawOutput: bigOutput,
+      }),
+    ]);
+
+    // String outputs may truncate to a string; tool_use.input must stay an
+    // object per the Claude API schema.
+    const result = turns[1].toolCalls?.[0].result as string;
+    expect(result.length).toBeLessThan(11_000);
+    expect(result).toContain("[truncated");
+
+    const input = turns[1].toolCalls?.[0].input as {
+      _truncated: boolean;
+      preview: string;
+      originalSize: number;
+    };
+    expect(input._truncated).toBe(true);
+    expect(input.preview.length).toBeLessThan(11_000);
+    expect(input.originalSize).toBeGreaterThan(50_000);
+  });
+});
+
+describe("selectRecentTurns", () => {
+  it("keeps the user turn and sheds oldest tool calls when the final turn alone exceeds the budget", () => {
+    // A single-prompt run rebuilds into [user, one giant assistant turn].
+    // Before the fallback, that shape selected zero turns and hydration
+    // wrote an empty transcript.
+    const bigInput = { data: "y".repeat(8_000) };
+    const turns = rebuildConversation([
+      entry("user_message", { content: { type: "text", text: "the task" } }),
+      ...[1, 2, 3].map((i) =>
+        entry("tool_call", {
+          toolCallId: `toolu_0${i}`,
+          _meta: { claudeCode: { toolName: "Bash" } },
+          rawInput: bigInput,
+        }),
+      ),
+    ]);
+
+    // Budget fits the user turn plus roughly one big tool call.
+    const selected = selectRecentTurns(turns, 3_000);
+
+    expect(selected).toHaveLength(2);
+    expect(selected[0].role).toBe("user");
+    expect(selected[1].role).toBe("assistant");
+    const keptIds = selected[1].toolCalls?.map((tc) => tc.toolCallId);
+    expect(keptIds).toEqual(["toolu_03"]);
+  });
+
+  it("returns recent turns that fit the budget unchanged", () => {
+    const turns = [
+      {
+        role: "user" as const,
+        content: [{ type: "text" as const, text: "a" }],
+      },
+      {
+        role: "assistant" as const,
+        content: [{ type: "text" as const, text: "b" }],
+      },
+    ];
+    expect(selectRecentTurns(turns, 1_000)).toEqual(turns);
+  });
 });
 
 describe("conversationTurnsToJsonlEntries", () => {
diff --git a/packages/agent/src/adapters/claude/session/jsonl-hydration.ts b/packages/agent/src/adapters/claude/session/jsonl-hydration.ts
@@ -42,6 +42,37 @@ interface SessionUpdate {
   sessionUpdate: string;
   content?: ContentBlock | ContentBlock[];
   _meta?: { claudeCode?: ClaudeCodeMeta };
+  // ACP puts these on the update itself; _meta.claudeCode only reliably
+  // carries toolName (and sometimes toolResponse).
+  toolCallId?: string;
+  rawInput?: unknown;
+  rawOutput?: unknown;
+}
+
+// Individual tool payloads can be huge (whole-file Write inputs, full test
+// output). Cap each one so a single call can't dominate the resume budget.
+const MAX_TOOL_PAYLOAD_CHARS = 10_000;
+
+function capToolPayload(value: unknown): unknown {
+  const text = typeof value === "string" ? value : JSON.stringify(value);
+  if (typeof text !== "string" || text.length <= MAX_TOOL_PAYLOAD_CHARS) {
+    return value;
+  }
+  const preview = `${text.slice(0, MAX_TOOL_PAYLOAD_CHARS)}… [truncated ${text.length - MAX_TOOL_PAYLOAD_CHARS} chars]`;
+  // tool_use.input must stay an object per the Claude API schema — wrap
+  // instead of replacing with a bare string.
+  return typeof value === "string"
+    ? preview
+    : { _truncated: true, preview, originalSize: text.length };
+}
+
+function isEmptyRecord(value: unknown): boolean {
+  return (
+    typeof value === "object" &&
+    value !== null &&
+    !Array.isArray(value) &&
+    Object.keys(value).length === 0
+  );
 }
 
 const MAX_PROJECT_KEY_LENGTH = 200;
@@ -148,36 +179,47 @@ export function rebuildConversation(
         case "tool_call":
         case "tool_call_update": {
           const meta = update._meta?.claudeCode;
-          if (meta) {
-            const { toolCallId, toolName, toolInput, toolResponse } = meta;
-
-            if (toolCallId && toolName) {
-              let toolCall = currentToolCalls.find(
-                (tc) => tc.toolCallId === toolCallId,
-              );
-              if (!toolCall) {
-                toolCall = { toolCallId, toolName, input: toolInput };
-                currentToolCalls.push(toolCall);
-              }
-              if (toolResponse !== undefined) {
-                toolCall.result = toolResponse;
-              }
-            }
+          const toolCallId = update.toolCallId ?? meta?.toolCallId;
+          if (!toolCallId) break;
+
+          let toolCall = currentToolCalls.find(
+            (tc) => tc.toolCallId === toolCallId,
+          );
+          if (!toolCall) {
+            const toolName = meta?.toolName;
+            // Bare streaming updates carry no name; the opening tool_call
+            // always does, so the call exists by the time they arrive.
+            if (!toolName) break;
+            toolCall = { toolCallId, toolName, input: undefined };
+            currentToolCalls.push(toolCall);
+          }
+
+          const input = update.rawInput ?? meta?.toolInput;
+          // The opening tool_call ships rawInput: {} — don't clobber an
+          // already-streamed input with it.
+          if (
+            input !== undefined &&
+            !(isEmptyRecord(input) && toolCall.input !== undefined)
+          ) {
+            toolCall.input = capToolPayload(input);
+          }
+          const result = update.rawOutput ?? meta?.toolResponse;
+          if (result !== undefined) {
+            toolCall.result = capToolPayload(result);
           }
           break;
         }
 
         case "tool_result": {
           const meta = update._meta?.claudeCode;
-          if (meta) {
-            const { toolCallId, toolResponse } = meta;
-            if (toolCallId) {
-              const toolCall = currentToolCalls.find(
-                (tc) => tc.toolCallId === toolCallId,
-              );
-              if (toolCall && toolResponse !== undefined) {
-                toolCall.result = toolResponse;
-              }
+          const toolCallId = update.toolCallId ?? meta?.toolCallId;
+          if (toolCallId) {
+            const toolCall = currentToolCalls.find(
+              (tc) => tc.toolCallId === toolCallId,
+            );
+            const result = update.rawOutput ?? meta?.toolResponse;
+            if (toolCall && result !== undefined) {
+              toolCall.result = capToolPayload(result);
             }
           }
           break;
@@ -236,6 +278,15 @@ export function selectRecentTurns(
     startIndex = i;
   }
 
+  if (startIndex === turns.length && turns.length > 0) {
+    // Even the most recent turn alone exceeds the budget — typical for a
+    // single-prompt run, where everything after the prompt is one giant
+    // assistant turn. Resuming with nothing loses all context, so keep the
+    // nearest user turn (the task intent) and shed the assistant turn's
+    // oldest tool calls until it fits.
+    return selectOversizedTailFallback(turns, maxTokens);
+  }
+
   // Ensure we start on a user turn so the conversation is well-formed
   while (startIndex < turns.length && turns[startIndex].role !== "user") {
     startIndex++;
@@ -244,6 +295,45 @@ export function selectRecentTurns(
   return turns.slice(startIndex);
 }
 
+function selectOversizedTailFallback(
+  turns: ConversationTurn[],
+  maxTokens: number,
+): ConversationTurn[] {
+  const last = turns[turns.length - 1];
+
+  let userIndex = turns.length - 1;
+  while (userIndex >= 0 && turns[userIndex].role !== "user") {
+    userIndex--;
+  }
+
+  const selected: ConversationTurn[] = [];
+  let budget = maxTokens;
+  if (userIndex >= 0) {
+    selected.push(turns[userIndex]);
+    budget -= estimateTurnTokens(turns[userIndex]);
+  }
+  if (userIndex !== turns.length - 1) {
+    selected.push(dropOldestToolCalls(last, Math.max(budget, 0)));
+  }
+  return selected;
+}
+
+function dropOldestToolCalls(
+  turn: ConversationTurn,
+  budget: number,
+): ConversationTurn {
+  if (!turn.toolCalls?.length) return turn;
+  const toolCalls = [...turn.toolCalls];
+  const trimmed: ConversationTurn = { ...turn, toolCalls };
+  while (toolCalls.length > 0 && estimateTurnTokens(trimmed) > budget) {
+    toolCalls.shift();
+  }
+  if (toolCalls.length === 0) {
+    trimmed.toolCalls = undefined;
+  }
+  return trimmed;
+}
+
 const BASE62 = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
 
 function generateMessageId(): string {