diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index f5e5ab3f..3a94418d 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -9,7 +9,7 @@ "source": { "source": "npm", "package": "@copilotkit/aimock", - "version": "^1.13.0" + "version": "^1.16.0" }, "description": "Fixture authoring skill for @copilotkit/aimock — LLM, multimedia (image/TTS/transcription/video), MCP, A2A, AG-UI, vector, embeddings, structured output, sequential responses, streaming physics, record/replay, agent loop patterns, and debugging" } diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index 2bea69d8..98da1b7f 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,6 +1,6 @@ { "name": "aimock", - "version": "1.16.0", + "version": "1.16.1", "description": "Fixture authoring guidance for @copilotkit/aimock — LLM, multimedia, MCP, A2A, AG-UI, vector, and service mocking", "author": { "name": "CopilotKit" diff --git a/CHANGELOG.md b/CHANGELOG.md index c5acbabf..949f9ba1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,20 @@ # @copilotkit/aimock +## [1.16.1] - 2026-04-28 + +### Fixed + +- **Responses API: item_reference dropped** — `responsesInputToMessages()` now synthesizes an assistant message with a matching `function_call` when a `function_call_output` has no prior matching call, preventing item_reference loss +- **Responses API: annotations missing** — Added `annotations: []` to all four `output_text` content items (streaming `.added`, `.done`, prefix, and non-streaming) for schema conformance +- **Responses API: item_id missing on reasoning events** — Added `item_id` to `reasoning_summary_part.added`, `reasoning_summary_part.done`, and `reasoning_summary_text.done` events +- **Responses API: web_search_call action missing type** — Changed `action: { query }` to `action: { type: "search", query }` in both streaming events and output prefix +- **Responses API: item_reference for text messages** — Extended item_reference handling to cover assistant text messages, not just function_call_output compensation +- **Responses API: multi-fco assistantCount inflation** — Fixed backward scan in `responsesInputToMessages()` to find and append to existing assistant messages with tool_calls instead of creating duplicates + +### Added + +- **Debug logging across all LLM handlers** — Added `logger.debug("Fixture matched: ...")` on match and `logger.debug("No fixture matched...")` on no-match to: server.ts, responses.ts, messages.ts, gemini.ts, bedrock.ts, bedrock-converse.ts, cohere.ts, ollama.ts, embeddings.ts, images.ts, speech.ts, transcription.ts, video.ts + ## 1.16.0 ### Added diff --git a/charts/aimock/Chart.yaml b/charts/aimock/Chart.yaml index faf508ae..04c53354 100644 --- a/charts/aimock/Chart.yaml +++ b/charts/aimock/Chart.yaml @@ -3,4 +3,4 @@ name: aimock description: Mock infrastructure for AI application testing (OpenAI, Anthropic, Gemini, MCP, A2A, vector) type: application version: 0.1.0 -appVersion: "1.13.0" +appVersion: "1.16.1" diff --git a/package.json b/package.json index a5389ccc..71472af1 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@copilotkit/aimock", - "version": "1.16.0", + "version": "1.16.1", "description": "Mock infrastructure for AI application testing — LLM APIs, image generation, text-to-speech, transcription, video generation, MCP tools, A2A agents, AG-UI event streams, vector databases, search, rerank, and moderation. One package, one port, zero dependencies.", "license": "MIT", "keywords": [ diff --git a/src/__tests__/responses.test.ts b/src/__tests__/responses.test.ts index 5f668800..20e2a515 100644 --- a/src/__tests__/responses.test.ts +++ b/src/__tests__/responses.test.ts @@ -7,6 +7,8 @@ import { responsesInputToMessages, responsesToCompletionRequest, handleResponses, + buildTextStreamEvents, + buildContentWithToolCallsStreamEvents, } from "../responses.js"; import { Journal } from "../journal.js"; import { Logger } from "../logger.js"; @@ -248,7 +250,7 @@ describe("responsesInputToMessages", () => { expect(messages[0].tool_calls![0].function.arguments).toBe('{"city":"NYC"}'); }); - it("converts function_call_output to tool message", () => { + it("converts function_call_output to tool message (with synthesized assistant)", () => { const messages = responsesInputToMessages({ model: "gpt-4", input: [ @@ -259,10 +261,19 @@ describe("responsesInputToMessages", () => { }, ], }); - expect(messages).toEqual([{ role: "tool", content: '{"temp":72}', tool_call_id: "call_123" }]); + // Bug 1 fix: a synthetic assistant message is now prepended when no + // matching function_call precedes the function_call_output + expect(messages).toHaveLength(2); + expect(messages[0].role).toBe("assistant"); + expect(messages[0].tool_calls![0].id).toBe("call_123"); + expect(messages[1]).toEqual({ + role: "tool", + content: '{"temp":72}', + tool_call_id: "call_123", + }); }); - it("skips unknown item types", () => { + it("converts item_reference to assistant placeholder", () => { const messages = responsesInputToMessages({ model: "gpt-4", input: [ @@ -270,6 +281,19 @@ describe("responsesInputToMessages", () => { { role: "user", content: "hi" }, ], }); + expect(messages).toHaveLength(2); + expect(messages[0]).toEqual({ role: "assistant", content: "" }); + expect(messages[1]).toEqual({ role: "user", content: "hi" }); + }); + + it("skips truly unknown item types (local_shell_call, mcp_list_tools, etc.)", () => { + const messages = responsesInputToMessages({ + model: "gpt-4", + input: [ + { type: "local_shell_call" } as { type: string; role?: string }, + { role: "user", content: "hi" }, + ], + }); expect(messages).toEqual([{ role: "user", content: "hi" }]); }); @@ -796,7 +820,10 @@ describe("responsesInputToMessages (fallback branches)", () => { }, ], }); - expect(messages[0].content).toBe(""); + // messages[0] is the synthesized assistant (Bug 1 fix), messages[1] is the tool + expect(messages).toHaveLength(2); + expect(messages[1].role).toBe("tool"); + expect(messages[1].content).toBe(""); }); it("handles content parts with missing text (text ?? '')", () => { @@ -1355,3 +1382,536 @@ describe("handleResponses (direct call — ?? fallback branches)", () => { expect(entry!.response.status).toBe(503); }); }); + +// ─── Bug 1: item_reference dropped → turnIndex stuck at 0 ────────────────── + +describe("Bug 1: item_reference + function_call_output synthesizes assistant", () => { + it("[user, item_reference, function_call_output] produces user, assistant(synthetic), tool", () => { + const messages = responsesInputToMessages({ + model: "gpt-4", + input: [ + { role: "user", content: "hello" }, + { type: "item_reference", id: "ref_fc_123" }, + { type: "function_call_output", call_id: "call_abc", output: '{"result":42}' }, + ], + }); + + expect(messages).toHaveLength(3); + expect(messages[0].role).toBe("user"); + expect(messages[1].role).toBe("assistant"); + expect(messages[1].content).toBeNull(); + expect(messages[1].tool_calls).toHaveLength(1); + expect(messages[1].tool_calls![0].id).toBe("call_abc"); + expect(messages[2].role).toBe("tool"); + expect(messages[2].content).toBe('{"result":42}'); + expect(messages[2].tool_call_id).toBe("call_abc"); + }); + + it("[user, function_call, function_call_output] produces NO duplicate assistant", () => { + const messages = responsesInputToMessages({ + model: "gpt-4", + input: [ + { role: "user", content: "hello" }, + { + type: "function_call", + call_id: "call_real", + name: "get_weather", + arguments: '{"city":"NYC"}', + }, + { type: "function_call_output", call_id: "call_real", output: '{"temp":72}' }, + ], + }); + + expect(messages).toHaveLength(3); + expect(messages[0].role).toBe("user"); + expect(messages[1].role).toBe("assistant"); + expect(messages[1].tool_calls![0].id).toBe("call_real"); + expect(messages[1].tool_calls![0].function.name).toBe("get_weather"); + expect(messages[2].role).toBe("tool"); + }); + + it("assistantCount equals 1 after [user, item_reference, function_call_output]", () => { + const messages = responsesInputToMessages({ + model: "gpt-4", + input: [ + { role: "user", content: "hello" }, + { type: "item_reference", id: "ref_123" }, + { type: "function_call_output", call_id: "call_xyz", output: "{}" }, + ], + }); + + const assistantCount = messages.filter((m) => m.role === "assistant").length; + expect(assistantCount).toBe(1); + }); + + it("function_call_output without preceding item_reference or function_call still synthesizes", () => { + const messages = responsesInputToMessages({ + model: "gpt-4", + input: [ + { role: "user", content: "hello" }, + { type: "function_call_output", call_id: "call_orphan", output: '{"x":1}' }, + ], + }); + + expect(messages).toHaveLength(3); + expect(messages[1].role).toBe("assistant"); + expect(messages[1].tool_calls![0].id).toBe("call_orphan"); + expect(messages[2].role).toBe("tool"); + }); +}); + +// ─── Bug 2: annotations missing on output_text content items ──────────────── + +describe("Bug 2: output_text includes annotations: []", () => { + it("streaming content_part.added has annotations: []", () => { + const events = buildTextStreamEvents("Hello", "gpt-4", 100); + const partAdded = events.find((e) => e.type === "response.content_part.added"); + expect(partAdded).toBeDefined(); + const part = (partAdded as { part: { type: string; annotations: unknown[] } }).part; + expect(part.type).toBe("output_text"); + expect(part.annotations).toEqual([]); + }); + + it("streaming content_part.done has annotations: []", () => { + const events = buildTextStreamEvents("Hello", "gpt-4", 100); + const partDone = events.find((e) => e.type === "response.content_part.done"); + expect(partDone).toBeDefined(); + const part = (partDone as { part: { type: string; text: string; annotations: unknown[] } }) + .part; + expect(part.type).toBe("output_text"); + expect(part.text).toBe("Hello"); + expect(part.annotations).toEqual([]); + }); + + it("streaming output_item.done message content has annotations: []", () => { + const events = buildTextStreamEvents("Hello", "gpt-4", 100); + const itemDone = events.find( + (e) => + e.type === "response.output_item.done" && (e.item as { type: string })?.type === "message", + ); + expect(itemDone).toBeDefined(); + const item = itemDone!.item as { content: { type: string; annotations: unknown[] }[] }; + expect(item.content[0].annotations).toEqual([]); + }); + + it("streaming response.completed output message content has annotations: []", () => { + const events = buildTextStreamEvents("Hello", "gpt-4", 100); + const completed = events.find((e) => e.type === "response.completed"); + expect(completed).toBeDefined(); + const response = completed!.response as { output: { content: { annotations: unknown[] }[] }[] }; + const msgOutput = response.output.find( + (o: { type?: string }) => (o as { type: string }).type === "message", + ) as { content: { annotations: unknown[] }[] }; + expect(msgOutput).toBeDefined(); + expect(msgOutput.content[0].annotations).toEqual([]); + }); + + it("non-streaming response output_text has annotations: []", async () => { + const textFix: Fixture = { + match: { userMessage: "annotations-check" }, + response: { content: "annotated" }, + }; + instance = await createServer([textFix]); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "annotations-check" }], + stream: false, + }); + + const body = JSON.parse(res.body); + expect(body.output[0].content[0].annotations).toEqual([]); + }); + + it("content+toolCalls streaming has annotations: [] on output_text", () => { + const events = buildContentWithToolCallsStreamEvents( + "some text", + [{ name: "fn", arguments: "{}" }], + "gpt-4", + 100, + ); + const partAdded = events.find((e) => e.type === "response.content_part.added"); + expect(partAdded).toBeDefined(); + const part = (partAdded as { part: { annotations: unknown[] } }).part; + expect(part.annotations).toEqual([]); + }); +}); + +// ─── Bug 3 & 4: item_id missing on reasoning_summary_part events ──────────── + +describe("Bug 3 & 4: reasoning_summary_part events include item_id", () => { + it("reasoning_summary_part.added includes item_id", () => { + const events = buildTextStreamEvents("result", "gpt-4", 100, "thinking..."); + const partAdded = events.find((e) => e.type === "response.reasoning_summary_part.added"); + expect(partAdded).toBeDefined(); + expect(partAdded!.item_id).toBeDefined(); + expect(typeof partAdded!.item_id).toBe("string"); + }); + + it("reasoning_summary_part.done includes item_id", () => { + const events = buildTextStreamEvents("result", "gpt-4", 100, "thinking..."); + const partDone = events.find((e) => e.type === "response.reasoning_summary_part.done"); + expect(partDone).toBeDefined(); + expect(partDone!.item_id).toBeDefined(); + expect(typeof partDone!.item_id).toBe("string"); + }); + + it("reasoning_summary_part.added and .done share the same item_id as the reasoning item", () => { + const events = buildTextStreamEvents("result", "gpt-4", 100, "thinking..."); + const reasoningAdded = events.find( + (e) => + e.type === "response.output_item.added" && + (e.item as { type: string })?.type === "reasoning", + ); + const partAdded = events.find((e) => e.type === "response.reasoning_summary_part.added"); + const partDone = events.find((e) => e.type === "response.reasoning_summary_part.done"); + + const reasoningId = (reasoningAdded!.item as { id: string }).id; + expect(partAdded!.item_id).toBe(reasoningId); + expect(partDone!.item_id).toBe(reasoningId); + }); +}); + +// ─── Bug 5: web_search_call action missing type:"search" ──────────────────── + +describe("Bug 5: web_search_call action includes type:search", () => { + it("streaming web_search_call output_item.added has action.type=search", () => { + const events = buildTextStreamEvents("result", "gpt-4", 100, undefined, ["test query"]); + const searchAdded = events.find( + (e) => + e.type === "response.output_item.added" && + (e.item as { type: string })?.type === "web_search_call", + ); + expect(searchAdded).toBeDefined(); + const action = (searchAdded!.item as { action: { type: string; query: string } }).action; + expect(action.type).toBe("search"); + expect(action.query).toBe("test query"); + }); + + it("streaming web_search_call output_item.done has action.type=search", () => { + const events = buildTextStreamEvents("result", "gpt-4", 100, undefined, ["test query"]); + const searchDone = events.find( + (e) => + e.type === "response.output_item.done" && + (e.item as { type: string })?.type === "web_search_call", + ); + expect(searchDone).toBeDefined(); + const action = (searchDone!.item as { action: { type: string; query: string } }).action; + expect(action.type).toBe("search"); + expect(action.query).toBe("test query"); + }); + + it("non-streaming web_search_call has action.type=search", async () => { + const webSearchFixture: Fixture = { + match: { userMessage: "search-action-type" }, + response: { content: "found it", webSearches: ["copilotkit docs"] }, + }; + instance = await createServer([webSearchFixture]); + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [{ role: "user", content: "search-action-type" }], + stream: false, + }); + + const body = JSON.parse(res.body); + const searchItem = body.output.find((o: { type: string }) => o.type === "web_search_call"); + expect(searchItem).toBeDefined(); + expect(searchItem.action.type).toBe("search"); + expect(searchItem.action.query).toBe("copilotkit docs"); + }); + + it("multiple web searches all have action.type=search", () => { + const events = buildTextStreamEvents("result", "gpt-4", 100, undefined, ["query1", "query2"]); + const searchItems = events.filter( + (e) => + e.type === "response.output_item.done" && + (e.item as { type: string })?.type === "web_search_call", + ); + expect(searchItems).toHaveLength(2); + for (const item of searchItems) { + const action = (item.item as { action: { type: string } }).action; + expect(action.type).toBe("search"); + } + }); +}); + +// ─── Bug 6: item_reference for assistant text messages ────────────────────── + +describe("Bug 6: item_reference for assistant text turns counted in assistantCount", () => { + it("[user, item_reference(text), user_2] → assistantCount = 1", () => { + const messages = responsesInputToMessages({ + model: "gpt-4", + input: [ + { role: "user", content: "hello" }, + { type: "item_reference", id: "ref_text_msg" }, + { role: "user", content: "follow up" }, + ], + }); + + expect(messages).toHaveLength(3); + expect(messages[0].role).toBe("user"); + expect(messages[1].role).toBe("assistant"); + expect(messages[1].content).toBe(""); + expect(messages[2].role).toBe("user"); + + const assistantCount = messages.filter((m) => m.role === "assistant").length; + expect(assistantCount).toBe(1); + }); + + it("[user, item_reference(fc), function_call_output, user_2] → assistantCount = 1 (not 2)", () => { + const messages = responsesInputToMessages({ + model: "gpt-4", + input: [ + { role: "user", content: "hello" }, + { type: "item_reference", id: "ref_fc" }, + { type: "function_call_output", call_id: "call_fc", output: '{"ok":true}' }, + { role: "user", content: "next" }, + ], + }); + + const assistantCount = messages.filter((m) => m.role === "assistant").length; + expect(assistantCount).toBe(1); + // The item_reference placeholder was upgraded to carry tool_calls + const assistantMsg = messages.find((m) => m.role === "assistant")!; + expect(assistantMsg.tool_calls).toHaveLength(1); + expect(assistantMsg.tool_calls![0].id).toBe("call_fc"); + }); + + it("[user, item_ref(text), user_2, item_ref(fc), function_call_output] → assistantCount = 2", () => { + const messages = responsesInputToMessages({ + model: "gpt-4", + input: [ + { role: "user", content: "hello" }, + { type: "item_reference", id: "ref_text" }, + { role: "user", content: "follow up" }, + { type: "item_reference", id: "ref_fc" }, + { type: "function_call_output", call_id: "call_fc2", output: '{"done":true}' }, + ], + }); + + const assistantMsgs = messages.filter((m) => m.role === "assistant"); + expect(assistantMsgs).toHaveLength(2); + + // First assistant is a text placeholder (no tool_calls) + expect(assistantMsgs[0].content).toBe(""); + expect(assistantMsgs[0].tool_calls).toBeUndefined(); + + // Second assistant was upgraded from item_reference to carry tool_calls + expect(assistantMsgs[1].content).toBeNull(); + expect(assistantMsgs[1].tool_calls).toHaveLength(1); + expect(assistantMsgs[1].tool_calls![0].id).toBe("call_fc2"); + }); + + it("multiple item_references without function_call_output all count", () => { + const messages = responsesInputToMessages({ + model: "gpt-4", + input: [ + { role: "user", content: "q1" }, + { type: "item_reference", id: "ref_1" }, + { role: "user", content: "q2" }, + { type: "item_reference", id: "ref_2" }, + { role: "user", content: "q3" }, + ], + }); + + const assistantCount = messages.filter((m) => m.role === "assistant").length; + expect(assistantCount).toBe(2); + expect(messages).toHaveLength(5); + }); +}); + +// ─── Bug fix: reasoning_summary_text.done must include item_id ────────────── + +describe("reasoning_summary_text.done includes item_id", () => { + it("reasoning_summary_text.done has item_id matching the reasoning item", () => { + const events = buildTextStreamEvents("result", "gpt-4", 100, "thinking hard"); + const textDone = events.find((e) => e.type === "response.reasoning_summary_text.done"); + expect(textDone).toBeDefined(); + expect(textDone!.item_id).toBeDefined(); + expect(typeof textDone!.item_id).toBe("string"); + + // Verify it matches the reasoning item id + const reasoningAdded = events.find( + (e) => + e.type === "response.output_item.added" && + (e.item as { type: string })?.type === "reasoning", + ); + const reasoningId = (reasoningAdded!.item as { id: string }).id; + expect(textDone!.item_id).toBe(reasoningId); + }); +}); + +// ─── Bug fix: multi-fco after single item_reference ───────────────────────── + +describe("multi-fco after single item_reference", () => { + it("[user, item_reference, fco_A, fco_B] produces assistantCount=1 with 2 tool_calls", () => { + const messages = responsesInputToMessages({ + model: "gpt-4", + input: [ + { role: "user", content: "hello" }, + { type: "item_reference", id: "ref_multi_fc" }, + { type: "function_call_output", call_id: "call_A", output: '{"a":1}' }, + { type: "function_call_output", call_id: "call_B", output: '{"b":2}' }, + ], + }); + + const assistantMsgs = messages.filter((m) => m.role === "assistant"); + expect(assistantMsgs).toHaveLength(1); + expect(assistantMsgs[0].tool_calls).toHaveLength(2); + expect(assistantMsgs[0].tool_calls![0].id).toBe("call_A"); + expect(assistantMsgs[0].tool_calls![1].id).toBe("call_B"); + + const toolMsgs = messages.filter((m) => m.role === "tool"); + expect(toolMsgs).toHaveLength(2); + }); + + it("[user, item_reference, fco_A, fco_B, user] produces assistantCount=1", () => { + const messages = responsesInputToMessages({ + model: "gpt-4", + input: [ + { role: "user", content: "hello" }, + { type: "item_reference", id: "ref_multi_fc" }, + { type: "function_call_output", call_id: "call_A", output: '{"a":1}' }, + { type: "function_call_output", call_id: "call_B", output: '{"b":2}' }, + { role: "user", content: "next question" }, + ], + }); + + const assistantCount = messages.filter((m) => m.role === "assistant").length; + expect(assistantCount).toBe(1); + }); +}); + +// ─── e2e: turnIndex + item_reference via Responses API ────────────────────── + +describe("turnIndex + item_reference via Responses API (e2e)", () => { + it("selects turnIndex:1 fixture when input has item_reference + fco (assistantCount=1)", async () => { + const turn0Fixture: Fixture = { + match: { userMessage: "turn-index-test", turnIndex: 0 }, + response: { content: "turn zero response" }, + }; + const turn1Fixture: Fixture = { + match: { userMessage: "turn-index-test", turnIndex: 1 }, + response: { content: "turn one response" }, + }; + instance = await createServer([turn0Fixture, turn1Fixture]); + + // Input: [user, item_reference, function_call_output, user] + // This should produce assistantCount=1 → turnIndex 1 match + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [ + { role: "user", content: "first question" }, + { type: "item_reference", id: "ref_prev_assistant" }, + { type: "function_call_output", call_id: "call_prev", output: '{"done":true}' }, + { role: "user", content: "turn-index-test" }, + ], + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.output[0].content[0].text).toBe("turn one response"); + }); + + it("multi-fco [user, item_reference, fco_A, fco_B, user] produces assistantCount=1", async () => { + const turn0Fixture: Fixture = { + match: { userMessage: "multi-fco-turn-test", turnIndex: 0 }, + response: { content: "should not match" }, + }; + const turn1Fixture: Fixture = { + match: { userMessage: "multi-fco-turn-test", turnIndex: 1 }, + response: { content: "correct turn one" }, + }; + instance = await createServer([turn0Fixture, turn1Fixture]); + + const res = await post(`${instance.url}/v1/responses`, { + model: "gpt-4", + input: [ + { role: "user", content: "initial" }, + { type: "item_reference", id: "ref_2tool_assistant" }, + { type: "function_call_output", call_id: "call_X", output: '{"x":1}' }, + { type: "function_call_output", call_id: "call_Y", output: '{"y":2}' }, + { role: "user", content: "multi-fco-turn-test" }, + ], + stream: false, + }); + + expect(res.status).toBe(200); + const body = JSON.parse(res.body); + expect(body.output[0].content[0].text).toBe("correct turn one"); + }); +}); + +// ─── Debug logging in handleResponses ─────────────────────────────────────── + +describe("handleResponses debug logging", () => { + it("logs debug on fixture match", async () => { + const journal = new Journal(); + const debugMessages: string[] = []; + const logger = new Logger("debug"); + const origDebug = logger.debug.bind(logger); + logger.debug = (...args: unknown[]) => { + debugMessages.push(String(args[0])); + origDebug(...args); + }; + const defaults = { latency: 0, chunkSize: 10, logger }; + + const mockReq = { + method: "POST", + url: "/v1/responses", + headers: {}, + } as unknown as http.IncomingMessage; + + const mockRes = createMockRes(); + + await handleResponses( + mockReq, + mockRes, + JSON.stringify({ + model: "gpt-4", + input: [{ role: "user", content: "hello" }], + }), + [textFixture], + journal, + defaults, + () => {}, + ); + + expect(debugMessages.some((m) => m.includes("Responses fixture matched"))).toBe(true); + }); + + it("logs debug on no fixture match", async () => { + const journal = new Journal(); + const debugMessages: string[] = []; + const logger = new Logger("debug"); + const origDebug = logger.debug.bind(logger); + logger.debug = (...args: unknown[]) => { + debugMessages.push(String(args[0])); + origDebug(...args); + }; + const defaults = { latency: 0, chunkSize: 10, logger }; + + const mockReq = { + method: "POST", + url: "/v1/responses", + headers: {}, + } as unknown as http.IncomingMessage; + + const mockRes = createMockRes(); + + await handleResponses( + mockReq, + mockRes, + JSON.stringify({ + model: "gpt-4", + input: [{ role: "user", content: "no-match-here" }], + }), + [], + journal, + defaults, + () => {}, + ); + + expect(debugMessages.some((m) => m.includes("No responses fixture matched"))).toBe(true); + }); +}); diff --git a/src/bedrock-converse.ts b/src/bedrock-converse.ts index 605126d4..f842ebf9 100644 --- a/src/bedrock-converse.ts +++ b/src/bedrock-converse.ts @@ -365,6 +365,12 @@ export async function handleConverse( defaults.requestTransform, ); + if (fixture) { + logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + logger.debug(`No fixture matched for request`); + } + if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); } @@ -616,6 +622,12 @@ export async function handleConverseStream( defaults.requestTransform, ); + if (fixture) { + logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + logger.debug(`No fixture matched for request`); + } + if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); } diff --git a/src/bedrock.ts b/src/bedrock.ts index 02a3dc1f..337ab426 100644 --- a/src/bedrock.ts +++ b/src/bedrock.ts @@ -358,6 +358,12 @@ export async function handleBedrock( defaults.requestTransform, ); + if (fixture) { + logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + logger.debug(`No fixture matched for request`); + } + if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); } @@ -926,6 +932,12 @@ export async function handleBedrockStream( defaults.requestTransform, ); + if (fixture) { + logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + logger.debug(`No fixture matched for request`); + } + if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); } diff --git a/src/cohere.ts b/src/cohere.ts index afc99010..608037cf 100644 --- a/src/cohere.ts +++ b/src/cohere.ts @@ -786,6 +786,9 @@ export async function handleCohere( if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); + logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + logger.debug(`No fixture matched for request`); } if ( diff --git a/src/embeddings.ts b/src/embeddings.ts index 1291322b..15e663f7 100644 --- a/src/embeddings.ts +++ b/src/embeddings.ts @@ -126,6 +126,9 @@ export async function handleEmbeddings( if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); + logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + logger.debug(`No fixture matched for request`); } if ( diff --git a/src/gemini.ts b/src/gemini.ts index 232ab49a..94e6d5ba 100644 --- a/src/gemini.ts +++ b/src/gemini.ts @@ -549,6 +549,12 @@ export async function handleGemini( ); const path = req.url ?? `/v1beta/models/${model}:generateContent`; + if (fixture) { + logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + logger.debug(`No fixture matched for request`); + } + if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); } diff --git a/src/images.ts b/src/images.ts index fe20f8b3..b3e528f1 100644 --- a/src/images.ts +++ b/src/images.ts @@ -106,6 +106,9 @@ export async function handleImages( if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); + defaults.logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + defaults.logger.debug(`No fixture matched for request`); } if ( diff --git a/src/messages.ts b/src/messages.ts index 9b61264e..93a153dc 100644 --- a/src/messages.ts +++ b/src/messages.ts @@ -737,6 +737,14 @@ export async function handleMessages( if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); + logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + const lastUserMsg = completionReq.messages.filter((m) => m.role === "user").pop(); + const snippet = + typeof lastUserMsg?.content === "string" ? lastUserMsg.content.slice(0, 80) : ""; + logger.debug( + `No fixture matched for request (model=${completionReq.model ?? "?"}, msg="${snippet}")`, + ); } if ( diff --git a/src/ollama.ts b/src/ollama.ts index 698567fe..eb21aca8 100644 --- a/src/ollama.ts +++ b/src/ollama.ts @@ -498,6 +498,9 @@ export async function handleOllama( if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); + logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + logger.debug(`No fixture matched for request`); } if ( @@ -800,6 +803,9 @@ export async function handleOllamaGenerate( if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); + defaults.logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + defaults.logger.debug(`No fixture matched for request`); } if ( diff --git a/src/responses.ts b/src/responses.ts index f7c91a60..37132e37 100644 --- a/src/responses.ts +++ b/src/responses.ts @@ -86,6 +86,8 @@ function extractTextContent(content: string | ResponsesContentPart[] | undefined export function responsesInputToMessages(req: ResponsesRequest): ChatMessage[] { const messages: ChatMessage[] = []; + // Track item_reference placeholders so we can upgrade or clean them up + const itemReferencePlaceholders = new WeakSet(); // instructions field → system message if (req.instructions) { @@ -120,15 +122,85 @@ export function responsesInputToMessages(req: ResponsesRequest): ChatMessage[] { ], }); } else if (item.type === "function_call_output") { + // Bug 1 fix: If there's no preceding assistant message with a matching + // tool_call for this call_id, synthesize one. This happens when the AI SDK + // sends [user, item_reference, function_call_output] — the item_reference + // placeholder (see below) has no tool_calls, so we need a real assistant + // message with the tool_call for turnIndex counting. + const hasMatchingToolCall = messages.some( + (m) => m.role === "assistant" && m.tool_calls?.some((tc) => tc.id === item.call_id), + ); + if (!hasMatchingToolCall) { + // Check if the last message is an item_reference placeholder — if so, + // upgrade it to carry the tool_call instead of synthesizing a duplicate. + const lastMsg = messages[messages.length - 1]; + if ( + lastMsg && + lastMsg.role === "assistant" && + itemReferencePlaceholders.has(lastMsg) && + !lastMsg.tool_calls + ) { + lastMsg.content = null; + lastMsg.tool_calls = [ + { + id: item.call_id ?? generateToolCallId(), + type: "function", + function: { name: "", arguments: "" }, + }, + ]; + itemReferencePlaceholders.delete(lastMsg); + } else { + // Multi-fco case: look for a recent assistant with tool_calls that + // belongs to the same turn. After the first fco upgrades a placeholder, + // subsequent fco's see [assistant(call_A), tool(call_A)] — the last + // assistant with tool_calls (right before the trailing tool messages) + // is the correct target. + let appended = false; + for (let k = messages.length - 1; k >= 0; k--) { + const m = messages[k]; + if (m.role === "assistant" && m.tool_calls) { + m.tool_calls.push({ + id: item.call_id ?? generateToolCallId(), + type: "function", + function: { name: "", arguments: "" }, + }); + appended = true; + break; + } + // Stop scanning if we hit a user message — different turn + if (m.role === "user") break; + } + if (!appended) { + messages.push({ + role: "assistant", + content: null, + tool_calls: [ + { + id: item.call_id ?? generateToolCallId(), + type: "function", + function: { name: "", arguments: "" }, + }, + ], + }); + } + } + } messages.push({ role: "tool", content: item.output ?? "", tool_call_id: item.call_id, }); + } else if (item.type === "item_reference") { + // Bug 6 fix: item_reference items represent prior assistant turns (text + // or function_call). Push a placeholder so they count in assistantCount. + // If a subsequent function_call_output arrives, the handler above will + // upgrade this placeholder to carry tool_calls (avoiding double-count). + const placeholder: ChatMessage = { role: "assistant", content: "" }; + itemReferencePlaceholders.add(placeholder); + messages.push(placeholder); } else { - // Skip item_reference, local_shell_call, mcp_list_tools, etc. — not needed - // for fixture matching. Logging is not threaded into this pure conversion - // function; callers can inspect the returned messages if needed. + // Skip local_shell_call, mcp_list_tools, etc. — not needed for fixture + // matching. } } @@ -370,6 +442,7 @@ function buildReasoningStreamEvents( events.push({ type: "response.reasoning_summary_part.added", + item_id: reasoningId, output_index: 0, summary_index: 0, part: { type: "summary_text", text: "" }, @@ -388,6 +461,7 @@ function buildReasoningStreamEvents( events.push({ type: "response.reasoning_summary_text.done", + item_id: reasoningId, output_index: 0, summary_index: 0, text: reasoning, @@ -395,6 +469,7 @@ function buildReasoningStreamEvents( events.push({ type: "response.reasoning_summary_part.done", + item_id: reasoningId, output_index: 0, summary_index: 0, part: { type: "summary_text", text: reasoning }, @@ -430,7 +505,7 @@ function buildWebSearchStreamEvents( type: "web_search_call", id: searchId, status: "in_progress", - action: { query: queries[i] }, + action: { type: "search", query: queries[i] }, }, }); @@ -441,7 +516,7 @@ function buildWebSearchStreamEvents( type: "web_search_call", id: searchId, status: "completed", - action: { query: queries[i] }, + action: { type: "search", query: queries[i] }, }, }); } @@ -545,7 +620,7 @@ function buildMessageOutputEvents( type: "response.content_part.added", output_index: outputIndex, content_index: 0, - part: { type: "output_text", text: "" }, + part: { type: "output_text", text: "", annotations: [] }, }); for (let i = 0; i < content.length; i += chunkSize) { @@ -568,7 +643,7 @@ function buildMessageOutputEvents( type: "response.content_part.done", output_index: outputIndex, content_index: 0, - part: { type: "output_text", text: content }, + part: { type: "output_text", text: content, annotations: [] }, }); const msgItem = { @@ -576,7 +651,7 @@ function buildMessageOutputEvents( id: msgId, status: "completed", role: "assistant", - content: [{ type: "output_text", text: content }], + content: [{ type: "output_text", text: content, annotations: [] }], }; events.push({ type: "response.output_item.done", output_index: outputIndex, item: msgItem }); @@ -603,7 +678,7 @@ function buildOutputPrefix(content: string, reasoning?: string, webSearches?: st type: "web_search_call", id: generateId("ws"), status: "completed", - action: { query }, + action: { type: "search", query }, }); } } @@ -613,7 +688,7 @@ function buildOutputPrefix(content: string, reasoning?: string, webSearches?: st id: itemId(), status: "completed", role: "assistant", - content: [{ type: "output_text", text: content }], + content: [{ type: "output_text", text: content, annotations: [] }], }); return output; @@ -869,7 +944,14 @@ export async function handleResponses( ); if (fixture) { + defaults.logger.debug( + `Responses fixture matched for ${req.method ?? "POST"} ${req.url ?? "/v1/responses"}`, + ); journal.incrementFixtureMatchCount(fixture, fixtures, testId); + } else { + defaults.logger.debug( + `No responses fixture matched for ${req.method ?? "POST"} ${req.url ?? "/v1/responses"}`, + ); } if ( diff --git a/src/server.ts b/src/server.ts index 08144b5b..5118c548 100644 --- a/src/server.ts +++ b/src/server.ts @@ -462,6 +462,14 @@ async function handleCompletions( if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); + defaults.logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + const lastUserMsg = body.messages.filter((m) => m.role === "user").pop(); + const snippet = + typeof lastUserMsg?.content === "string" ? lastUserMsg.content.slice(0, 80) : ""; + defaults.logger.debug( + `No fixture matched for request (model=${body.model ?? "?"}, msg="${snippet}")`, + ); } const method = req.method ?? "POST"; diff --git a/src/speech.ts b/src/speech.ts index dddc9027..980d1799 100644 --- a/src/speech.ts +++ b/src/speech.ts @@ -93,6 +93,9 @@ export async function handleSpeech( if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); + defaults.logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + defaults.logger.debug(`No fixture matched for request`); } if ( diff --git a/src/transcription.ts b/src/transcription.ts index fa67d06c..1265cc79 100644 --- a/src/transcription.ts +++ b/src/transcription.ts @@ -56,6 +56,9 @@ export async function handleTranscription( if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); + defaults.logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + defaults.logger.debug(`No fixture matched for request`); } if ( diff --git a/src/video.ts b/src/video.ts index 6ee9c1bb..66182989 100644 --- a/src/video.ts +++ b/src/video.ts @@ -137,6 +137,9 @@ export async function handleVideoCreate( if (fixture) { journal.incrementFixtureMatchCount(fixture, fixtures, testId); + defaults.logger.debug(`Fixture matched: ${JSON.stringify(fixture.match).slice(0, 120)}`); + } else { + defaults.logger.debug(`No fixture matched for request`); } if (