Skip to content

Commit 700d012

Browse files
authored
fix(llm): emit structured input_image content for tool-result media in OpenAI Responses (anomalyco#28754)
1 parent 59e486a commit 700d012

5 files changed

Lines changed: 224 additions & 2 deletions

File tree

packages/llm/src/protocols/openai-responses.ts

Lines changed: 42 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import {
1414
type TextPart,
1515
type ToolCallPart,
1616
type ToolDefinition,
17+
type ToolResultContentPart,
18+
type ToolResultPart,
1719
} from "../schema"
1820
import { JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared"
1921
import { OpenAIOptions } from "./utils/openai-options"
@@ -55,6 +57,19 @@ const OpenAIResponsesReasoningItem = Schema.Struct({
5557
encrypted_content: optionalNull(Schema.String),
5658
})
5759

60+
// `function_call_output.output` accepts either a plain string or an ordered
61+
// array of content items so tools can return images in addition to text.
62+
// https://platform.openai.com/docs/api-reference/responses/object
63+
const OpenAIResponsesFunctionCallOutputContent = Schema.Union([
64+
OpenAIResponsesInputText,
65+
OpenAIResponsesInputImage,
66+
])
67+
68+
const OpenAIResponsesFunctionCallOutput = Schema.Union([
69+
Schema.String,
70+
Schema.Array(OpenAIResponsesFunctionCallOutputContent),
71+
])
72+
5873
const OpenAIResponsesInputItem = Schema.Union([
5974
Schema.Struct({ role: Schema.tag("system"), content: Schema.String }),
6075
Schema.Struct({ role: Schema.tag("user"), content: Schema.Array(OpenAIResponsesInputContent) }),
@@ -69,7 +84,7 @@ const OpenAIResponsesInputItem = Schema.Union([
6984
Schema.Struct({
7085
type: Schema.tag("function_call_output"),
7186
call_id: Schema.String,
72-
output: Schema.String,
87+
output: OpenAIResponsesFunctionCallOutput,
7388
}),
7489
])
7590
type OpenAIResponsesInputItem = Schema.Schema.Type<typeof OpenAIResponsesInputItem>
@@ -250,6 +265,27 @@ const lowerUserContent = Effect.fn("OpenAIResponses.lowerUserContent")(function*
250265
return yield* ProviderShared.unsupportedContent("OpenAI Responses", "user", ["text", "media"])
251266
})
252267

268+
// Tool results may carry structured text/images. Keep media as provider-native
269+
// content instead of JSON-stringifying base64 into a prompt string.
270+
const lowerToolResultContentItem = Effect.fn("OpenAIResponses.lowerToolResultContentItem")(function* (
271+
item: ToolResultContentPart,
272+
) {
273+
if (item.type === "text") return { type: "input_text" as const, text: item.text }
274+
if (item.mediaType.startsWith("image/"))
275+
return {
276+
type: "input_image" as const,
277+
image_url: ProviderShared.mediaDataUrl(item),
278+
}
279+
return yield* invalid(`OpenAI Responses tool-result media content only supports images, got ${item.mediaType}`)
280+
})
281+
282+
const lowerToolResultOutput = Effect.fn("OpenAIResponses.lowerToolResultOutput")(function* (part: ToolResultPart) {
283+
// Text/json/error results are encoded as a plain string for backward
284+
// compatibility with existing cassettes and provider expectations.
285+
if (part.result.type !== "content") return ProviderShared.toolResultText(part)
286+
return yield* Effect.forEach(part.result.value, lowerToolResultContentItem)
287+
})
288+
253289
const lowerMessages = Effect.fn("OpenAIResponses.lowerMessages")(function* (request: LLMRequest) {
254290
const system: OpenAIResponsesInputItem[] =
255291
request.system.length === 0 ? [] : [{ role: "system", content: ProviderShared.joinText(request.system) }]
@@ -298,7 +334,11 @@ const lowerMessages = Effect.fn("OpenAIResponses.lowerMessages")(function* (requ
298334
for (const part of message.content) {
299335
if (!ProviderShared.supportsContent(part, ["tool-result"]))
300336
return yield* ProviderShared.unsupportedContent("OpenAI Responses", "tool", ["tool-result"])
301-
input.push({ type: "function_call_output", call_id: part.id, output: ProviderShared.toolResultText(part) })
337+
input.push({
338+
type: "function_call_output",
339+
call_id: part.id,
340+
output: yield* lowerToolResultOutput(part),
341+
})
302342
}
303343
}
304344

0 commit comments

Comments
 (0)