Skip to content

Commit 7fbe51a

Browse files
committed
Merge branch 'image-output'
Add OpenAI image_generation built-in tool and inline image context. - Wire OpenAI's image_generation built-in tool through the Responses API path so it works for openai, github-copilot (responses-api models), litellm, and custom providers (skipped on Codex OAuth). - New `image-generation?` model capability for gpt-4.1 family, gpt-4o-mini, gpt-5 family, and o3. - Stream generated images to clients as a new `ChatImageContent` variant (mediaType + base64) and persist them to chat history under a dedicated `image_generation_call` role so subsequent turns can iterate. - Replay generated images as a user-role `input_image` data URL across providers; Anthropic surfaces them the same way for cross-provider history compatibility. - Add an `ImageContext` request type so clients without filesystem access can attach base64 images inline. 🤖 Generated with [eca](https://eca.dev) Co-Authored-By: eca-agent <git@eca.dev> # Conflicts: # CHANGELOG.md
2 parents dd2a37e + 13e1f3e commit 7fbe51a

14 files changed

Lines changed: 537 additions & 34 deletions

File tree

CHANGELOG.md

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,11 @@
33
## Unreleased
44

55
- Add `${plugin:root}` dynamic interpolation for plugin-provided config, hooks, commands, and rules.
6-
- Bugfix: avoid `Divide by zero` crash in chat auto-compact when models.dev reports `0` for a model's context/output limits (e.g. `openai/chatgpt-image-latest`); such limits are now normalized to `nil` and `auto-compact?` skips models without a known positive context window.
6+
- Support OpenAI built-in `image_generation` tool via the Responses API for capable models (`openai/gpt-5.x`, `openai/gpt-4.1`). Generated images are streamed back as a new `image` chat content carrying `mediaType` + base64. Available on every provider whose api is `openai-responses` (`openai`, `github-copilot` responses-api models, `litellm`, custom providers).
7+
- Support image edits via the same `image_generation` tool: assistant-generated images now persist to chat history so subsequent turns can iterate ("now make it blue, smaller, with a red border"), resumed chats replay previously generated images, and clients can attach source images either by file path (existing `FileContext`) or via a new inline base64 `ImageContext` request type for clients without filesystem access.
78
- Fix inline completion crash when renewing auth tokens before completion requests. #437
9+
- Bugfix: avoid `Divide by zero` crash in chat auto-compact when models.dev reports `0` for a model's context/output limits (e.g. `openai/chatgpt-image-latest`); such limits are now normalized to `nil` and `auto-compact?` skips models without a known positive context window.
10+
- Bugfix: image edit follow-up turns no longer fail on the OpenAI Responses API when prior generations are replayed; generated images are now persisted under a dedicated `image_generation_call` history role and replayed as a user-role `input_image` data URL across providers.
811

912
## 0.130.1
1013

docs/protocol.md

Lines changed: 46 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -413,7 +413,7 @@ interface ChatPromptParams {
413413
*/
414414
type Model = string;
415415

416-
type ChatContext = FileContext | DirectoryContext | WebContext | RepoMapContext | CursorContext |McpResourceContext;
416+
type ChatContext = FileContext | DirectoryContext | WebContext | RepoMapContext | CursorContext | McpResourceContext | ImageContext;
417417

418418
/**
419419
* Context related to a file in the workspace
@@ -458,6 +458,28 @@ interface WebContext {
458458
url: string;
459459
}
460460

461+
/**
462+
* Inline image context supplied by the client.
463+
*
464+
* Use this when the client cannot supply a filesystem path the server can
465+
* read (e.g. web ECA). For filesystem-capable clients (vscode, intellij,
466+
* emacs, …) a `FileContext` pointing at a `.png/.jpg/.jpeg/.gif/.webp`
467+
* file is equivalent and can keep being used.
468+
*/
469+
interface ImageContext {
470+
type: 'image';
471+
472+
/**
473+
* MIME type of the image bytes (e.g. 'image/png', 'image/jpeg').
474+
*/
475+
mediaType: string;
476+
477+
/**
478+
* Raw base64-encoded image bytes (no `data:` URL prefix).
479+
*/
480+
base64: string;
481+
}
482+
461483
/**
462484
* Context about the workspaces repo-map, automatically calculated by server.
463485
* Clients should include this to chat by default but users may want exclude
@@ -589,6 +611,7 @@ interface ChatContentReceivedParams {
589611
type ChatContent =
590612
ChatTextContent
591613
| ChatURLContent
614+
| ChatImageContent
592615
| ChatProgressContent
593616
| ChatUsageContent
594617
| ChatReasonStartedContent
@@ -769,6 +792,28 @@ interface ChatURLContent {
769792
url: string;
770793
}
771794

795+
/**
796+
* Image content from the assistant, produced by a server-side image
797+
* generation tool (e.g. OpenAI's `image_generation` Responses-API tool).
798+
*
799+
* The image bytes are delivered inline as base64 so that web/remote ECA
800+
* clients (e.g. web.eca.dev) can render without filesystem access.
801+
*/
802+
interface ChatImageContent {
803+
type: 'image';
804+
805+
/**
806+
* The MIME type of the image bytes (e.g. 'image/png').
807+
*/
808+
mediaType: string;
809+
810+
/**
811+
* Raw base64-encoded image bytes (no `data:` URL prefix).
812+
* Clients should decode and render or wrap in a data URL as needed.
813+
*/
814+
base64: string;
815+
}
816+
772817
/**
773818
* Details about the chat's usage, like used tokens and costs.
774819
*/

src/eca/features/chat.clj

Lines changed: 88 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -119,16 +119,29 @@
119119
(case role
120120
("user"
121121
"system"
122-
"assistant") [{:role role
123-
:content (reduce
124-
(fn [m content]
125-
(case (:type content)
126-
:text (assoc m
127-
:type :text
128-
:text (str (:text m) "\n" (:text content)))
129-
m))
130-
(assoc-some {} :content-id content-id)
131-
message-content)}]
122+
"assistant") (let [text-content (reduce
123+
(fn [m content]
124+
(case (:type content)
125+
:text (assoc m
126+
:type :text
127+
:text (str (:text m) "\n" (:text content)))
128+
m))
129+
(assoc-some {} :content-id content-id)
130+
message-content)
131+
image-entries (keep
132+
(fn [content]
133+
(when (= :image (:type content))
134+
{:role role
135+
:content {:type :image
136+
:media-type (:media-type content)
137+
:base64 (:base64 content)}}))
138+
message-content)
139+
;; Drop the text entry when there's no actual text and no image-only content
140+
;; would have produced an empty `{}` content map.
141+
text-entries (if (:type text-content)
142+
[{:role role :content text-content}]
143+
[])]
144+
(vec (concat text-entries image-entries)))
132145
"tool_call" [{:role :assistant
133146
:content {:type :toolCallPrepare
134147
:origin (:origin message-content)
@@ -170,6 +183,10 @@
170183
:error (:error message-content)
171184
:id (:id message-content)
172185
:outputs (:contents (:output message-content))}}]
186+
"image_generation_call" [{:role :assistant
187+
:content {:type :image
188+
:media-type (:media-type message-content)
189+
:base64 (:base64 message-content)}}]
173190
"server_tool_use" [{:role :assistant
174191
:content {:type :toolCallPrepare
175192
:origin :server
@@ -734,6 +751,17 @@
734751
:text (do (swap! received-msgs* str (:text msg))
735752
(lifecycle/send-content! chat-ctx :assistant {:type :text :text (:text msg)}))
736753
:url (lifecycle/send-content! chat-ctx :assistant {:type :url :title (:title msg) :url (:url msg)})
754+
:image (let [client-content {:type :image
755+
:media-type (:media-type msg)
756+
:base64 (:base64 msg)}
757+
history-content (assoc-some
758+
{:media-type (:media-type msg)
759+
:base64 (:base64 msg)}
760+
:id (:id msg))]
761+
;; Provider normalize-messages converts this role back to a user-role image for replay.
762+
(add-to-history! {:role "image_generation_call"
763+
:content history-content})
764+
(lifecycle/send-content! chat-ctx :assistant client-content))
737765
:limit-reached (do (lifecycle/send-content!
738766
chat-ctx
739767
:system
@@ -897,6 +925,56 @@
897925
(tc/transition-tool-call! db* chat-ctx id :cleanup-finished
898926
{:name (get-in (tc/get-tool-call-state @db* chat-id id) [:name] "web_search")}))
899927
nil)))
928+
:on-server-image-generation (fn [{:keys [status id name]}]
929+
(lifecycle/assert-chat-not-stopped! chat-ctx)
930+
(let [summary "Generating image"]
931+
(case status
932+
:started (do
933+
(swap! server-tool-times* assoc id (System/currentTimeMillis))
934+
(tc/transition-tool-call! db* chat-ctx id :tool-prepare
935+
{:name name
936+
:server :llm
937+
:origin :server
938+
:arguments-text ""
939+
:summary summary})
940+
(tc/transition-tool-call! db* chat-ctx id :tool-run
941+
{:approved?* (promise)
942+
:future-cleanup-complete?* (promise)
943+
:name name
944+
:server :llm
945+
:origin :server
946+
:arguments {}
947+
:manual-approval false
948+
:summary summary})
949+
(tc/transition-tool-call! db* chat-ctx id :approval-allow
950+
{:reason :server-tool})
951+
(tc/transition-tool-call! db* chat-ctx id :execution-start
952+
{:delayed-future (delay nil)
953+
:origin :server
954+
:name name
955+
:server :llm
956+
:arguments {}
957+
:start-time (System/currentTimeMillis)
958+
:summary summary
959+
:progress-text "Generating image"}))
960+
:finished (let [start-time (get @server-tool-times* id)
961+
total-time-ms (if start-time
962+
(- (System/currentTimeMillis) start-time)
963+
0)
964+
resolved-name (get-in (tc/get-tool-call-state @db* chat-id id) [:name] "image_generation")]
965+
(tc/transition-tool-call! db* chat-ctx id :execution-end
966+
{:origin :server
967+
:name resolved-name
968+
:server :llm
969+
:arguments {}
970+
:error false
971+
:outputs [{:type :text :text "Generated image (png)"}]
972+
:total-time-ms total-time-ms
973+
:progress-text "Generating"
974+
:summary summary})
975+
(tc/transition-tool-call! db* chat-ctx id :cleanup-finished
976+
{:name resolved-name}))
977+
nil)))
900978
:on-error (fn [{:keys [message exception] :as error-data}]
901979
(let [{error-type :error/type} (llm-providers.errors/classify-error error-data)
902980
db @db*

src/eca/features/context.clj

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@
9494
(logger/warn logger-tag "File not found or unreadable at" path)))
9595

9696
(defn raw-contexts->refined [contexts db]
97-
(mapcat (fn [{:keys [type path lines-range position uri]}]
97+
(mapcat (fn [{:keys [type path lines-range position uri media-type mediaType base64]}]
9898
(case (name type)
9999
"file" (if-let [ctx (file->refined-context path lines-range)]
100100
[ctx]
@@ -104,6 +104,13 @@
104104
(keep (fn [path]
105105
(let [filename (str (fs/canonicalize path))]
106106
(file->refined-context filename nil)))))
107+
"image" (let [mt (or media-type mediaType)]
108+
(if (and mt base64)
109+
[{:type :image
110+
:media-type mt
111+
:base64 base64}]
112+
(do (logger/warn logger-tag "Image context missing mediaType or base64; ignoring")
113+
[])))
107114
"repoMap" [{:type :repoMap}]
108115
"cursor" [{:type :cursor
109116
:path path

src/eca/features/providers.clj

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,8 @@
142142
:capabilities {:reason (:reason? caps false)
143143
:vision (:image-input? caps false)
144144
:tools (:tools caps false)
145-
:web-search (:web-search caps false)}}
145+
:web-search (:web-search caps false)
146+
:image-generation (:image-generation? caps false)}}
146147
:cost (when (or (:input-token-cost caps) (:output-token-cost caps))
147148
{:input (:input-token-cost caps)
148149
:output (:output-token-cost caps)})

0 commit comments

Comments
 (0)