Skip to content

Commit 9db90a0

Browse files
authored
fix(llm): emit structured image blocks for tool-result media in Anthropic Messages (anomalyco#28755)
1 parent 700d012 commit 9db90a0

4 files changed

Lines changed: 177 additions & 3 deletions

File tree

packages/llm/src/protocols/anthropic-messages.ts

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import {
1414
type ProviderMetadata,
1515
type ToolCallPart,
1616
type ToolDefinition,
17+
type ToolResultContentPart,
1718
type ToolResultPart,
1819
} from "../schema"
1920
import { JsonObject, optionalArray, optionalNull, ProviderShared } from "./shared"
@@ -96,10 +97,18 @@ const AnthropicServerToolResultBlock = Schema.Struct({
9697
})
9798
type AnthropicServerToolResultBlock = Schema.Schema.Type<typeof AnthropicServerToolResultBlock>
9899

100+
// Anthropic accepts either a plain string or an ordered array of text/image
101+
// blocks inside `tool_result.content`. The array form is required when a tool
102+
// returns image bytes (screenshot, image search, etc.) so they can be passed
103+
// to the model as proper image inputs instead of being JSON-stringified into
104+
// the prompt — which silently inflates context by megabytes and can push the
105+
// conversation over the model's token limit.
106+
const AnthropicToolResultContent = Schema.Union([AnthropicTextBlock, AnthropicImageBlock])
107+
99108
const AnthropicToolResultBlock = Schema.Struct({
100109
type: Schema.tag("tool_result"),
101110
tool_use_id: Schema.String,
102-
content: Schema.String,
111+
content: Schema.Union([Schema.String, Schema.Array(AnthropicToolResultContent)]),
103112
is_error: Schema.optional(Schema.Boolean),
104113
cache_control: Schema.optional(AnthropicCacheControl),
105114
})
@@ -298,6 +307,31 @@ const lowerImage = Effect.fn("AnthropicMessages.lowerImage")(function* (part: Me
298307
} satisfies AnthropicImageBlock
299308
})
300309

310+
// Tool results may carry structured text/images. Keep media as provider-native
311+
// content instead of JSON-stringifying base64 into a prompt string.
312+
const lowerToolResultContentItem = Effect.fn("AnthropicMessages.lowerToolResultContentItem")(function* (
313+
item: ToolResultContentPart,
314+
) {
315+
if (item.type === "text") return { type: "text" as const, text: item.text } satisfies AnthropicTextBlock
316+
if (item.mediaType.startsWith("image/"))
317+
return {
318+
type: "image" as const,
319+
source: {
320+
type: "base64" as const,
321+
media_type: item.mediaType,
322+
data: ProviderShared.mediaBase64(item),
323+
},
324+
} satisfies AnthropicImageBlock
325+
return yield* invalid(`Anthropic Messages tool-result media content only supports images, got ${item.mediaType}`)
326+
})
327+
328+
const lowerToolResultContent = Effect.fn("AnthropicMessages.lowerToolResultContent")(function* (part: ToolResultPart) {
329+
// Text / json / error results stay as a string for backward compatibility
330+
// with existing cassettes and provider expectations.
331+
if (part.result.type !== "content") return ProviderShared.toolResultText(part)
332+
return yield* Effect.forEach(part.result.value, lowerToolResultContentItem)
333+
})
334+
301335
const lowerMessages = Effect.fn("AnthropicMessages.lowerMessages")(function* (
302336
request: LLMRequest,
303337
breakpoints: Cache.Breakpoints,
@@ -360,7 +394,7 @@ const lowerMessages = Effect.fn("AnthropicMessages.lowerMessages")(function* (
360394
content.push({
361395
type: "tool_result",
362396
tool_use_id: part.id,
363-
content: ProviderShared.toolResultText(part),
397+
content: yield* lowerToolResultContent(part),
364398
is_error: part.result.type === "error" ? true : undefined,
365399
cache_control: cacheControl(breakpoints, part.cache),
366400
})

0 commit comments

Comments
 (0)