Skip to content

Commit 02cc782

Browse files
committed
Fix missing token usage in UI for Google/Gemini #414
Send `stream_options.include_usage` on streaming chat completion requests. The OpenAI streaming spec only emits a final `usage` chunk when the client opts in; Gemini's OpenAI-compat endpoint follows that strictly, so without the flag the usage callback never fired and the UI never received a `{:type :usage}` message. Also harden the openai-chat mock to only emit synthetic usage when the request carries the flag, so the integration tests can actually catch this. Closes #414
1 parent 41ab27e commit 02cc782

3 files changed

Lines changed: 53 additions & 35 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22

33
## Unreleased
44

5+
- Fix token usage not being reported in the UI for Google/Gemini (and other strict OpenAI-compat providers) by opting into `stream_options.include_usage` on streaming chat completion requests. #414
6+
57
## 0.129.0
68

79
- Restore the model used at chat creation when resuming a chat: `chat/open` and the `/resume` slash command now emit `config/updated` to realign the client's selected model to the persisted chat's `:model`, and the next `chat/prompt` prefers that stored model over the agent/global default (stale models still fall through gracefully). #417

integration-test/llm_mock/openai_chat.clj

Lines changed: 46 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@
88

99
(def ^:dynamic *thinking-tag* "think")
1010

11+
;; Matches the real OpenAI streaming contract: a `usage` chunk is only emitted
12+
;; when the request opted in via `stream_options.include_usage = true`.
13+
(def ^:dynamic *include-usage?* false)
14+
1115
(defn set-thinking-tag! [tag]
1216
(alter-var-root #'*thinking-tag* (constantly tag)))
1317

@@ -16,6 +20,12 @@
1620
[ch m]
1721
(hk/send! ch (str "data: " (json/generate-string m) "\n\n") false))
1822

23+
(defn ^:private send-usage!
24+
"Send a usage SSE chunk only when the client requested it."
25+
[ch payload]
26+
(when *include-usage?*
27+
(send-sse! ch {:usage payload})))
28+
1929
(defn ^:private messages->normalized-input
2030
"Transforms OpenAI Chat messages into the canonical ECA :input + :instructions format
2131
used by tests for assertions. We extract the first system message as :instructions
@@ -49,13 +59,13 @@
4959
;; Stream two content chunks, then a usage chunk, then a finish chunk
5060
(send-sse! ch {:choices [{:delta {:content "Knock"}}]})
5161
(send-sse! ch {:choices [{:delta {:content " knock!"}}]})
52-
(send-sse! ch {:usage {:prompt_tokens 10 :completion_tokens 20}})
62+
(send-usage! ch {:prompt_tokens 10 :completion_tokens 20})
5363
(send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
5464
(hk/close ch))
5565

5666
(defn ^:private simple-text-1 [ch]
5767
(send-sse! ch {:choices [{:delta {:content "Foo"}}]})
58-
(send-sse! ch {:usage {:prompt_tokens 10 :completion_tokens 5}})
68+
(send-usage! ch {:prompt_tokens 10 :completion_tokens 5})
5969
(send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
6070
(hk/close ch))
6171

@@ -64,7 +74,7 @@
6474
(send-sse! ch {:choices [{:delta {:content " bar!"}}]})
6575
(send-sse! ch {:choices [{:delta {:content "\n\n"}}]})
6676
(send-sse! ch {:choices [{:delta {:content "Ha!"}}]})
67-
(send-sse! ch {:usage {:prompt_tokens 5 :completion_tokens 15}})
77+
(send-usage! ch {:prompt_tokens 5 :completion_tokens 15})
6878
(send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
6979
(hk/close ch))
7080

@@ -75,7 +85,7 @@
7585
(send-sse! ch {:choices [{:delta {:content (str "</" *thinking-tag* ">")}}]})
7686
(send-sse! ch {:choices [{:delta {:content "hello"}}]})
7787
(send-sse! ch {:choices [{:delta {:content " there!"}}]})
78-
(send-sse! ch {:usage {:prompt_tokens 10 :completion_tokens 20}})
88+
(send-usage! ch {:prompt_tokens 10 :completion_tokens 20})
7989
(send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
8090
(hk/close ch))
8191

@@ -86,7 +96,7 @@
8696
(send-sse! ch {:choices [{:delta {:content (str "</" *thinking-tag* ">")}}]})
8797
(send-sse! ch {:choices [{:delta {:content "I'm "}}]})
8898
(send-sse! ch {:choices [{:delta {:content " fine"}}]})
89-
(send-sse! ch {:usage {:prompt_tokens 10 :completion_tokens 20}})
99+
(send-usage! ch {:prompt_tokens 10 :completion_tokens 20})
90100
(send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
91101
(hk/close ch))
92102

@@ -115,15 +125,15 @@
115125
:function {:arguments "{\"pat"}}]}}]})
116126
(send-sse! ch {:choices [{:delta {:tool_calls [{:index 0
117127
:function {:arguments (str "h\":\"" (h/json-escape-path path) "\"}")}}]}}]})
118-
(send-sse! ch {:usage {:prompt_tokens 5 :completion_tokens 30}})
128+
(send-usage! ch {:prompt_tokens 5 :completion_tokens 30})
119129
(send-sse! ch {:choices [{:delta {} :finish_reason "tool_calls"}]})
120130
(hk/close ch))
121131

122132
(defn ^:private tool-calling-with-thought-signature-1 [ch]
123133
;; Second stage response after tool output
124134
(send-sse! ch {:choices [{:delta {:content "The files I see:\n"}}]})
125135
(send-sse! ch {:choices [{:delta {:content "file1\nfile2\n"}}]})
126-
(send-sse! ch {:usage {:prompt_tokens 5 :completion_tokens 30}})
136+
(send-usage! ch {:prompt_tokens 5 :completion_tokens 30})
127137
(send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
128138
(hk/close ch))
129139

@@ -132,34 +142,36 @@
132142
(let [body (some-> (slurp (:body req)) (json/parse-string true))
133143
messages (:messages body)
134144
normalized (messages->normalized-input messages)
135-
normalized-body (merge normalized (select-keys body [:tools]))]
145+
normalized-body (merge normalized (select-keys body [:tools]))
146+
include-usage? (boolean (get-in body [:stream_options :include_usage]))]
136147
(hk/as-channel
137148
req
138149
{:on-open (fn [ch]
150+
(binding [*include-usage?* include-usage?]
139151
;; Send initial response headers for SSE
140-
(hk/send! ch {:status 200
141-
:headers {"Content-Type" "text/event-stream; charset=utf-8"
142-
"Cache-Control" "no-cache"
143-
"Connection" "keep-alive"}}
144-
false)
145-
(if (string/includes? (:content (first (:messages body))) llm.mocks/chat-title-generator-str)
146-
(chat-title-text-0 ch)
147-
(do
148-
(llm.mocks/set-req-body! llm.mocks/*case* normalized-body)
149-
(llm.mocks/set-raw-messages! llm.mocks/*case* messages)
150-
(let [has-tool-message? (some #(= "tool" (:role %)) messages)]
151-
(case llm.mocks/*case*
152-
:simple-text-0 (simple-text-0 ch)
153-
:simple-text-1 (simple-text-1 ch)
154-
:simple-text-2 (simple-text-2 ch)
155-
:reasoning-0 (reasoning-text-0 ch)
156-
:reasoning-1 (reasoning-text-1 ch)
157-
:tool-calling-with-thought-signature-0
158-
(if has-tool-message?
159-
(tool-calling-with-thought-signature-1 ch)
160-
(tool-calling-with-thought-signature-0 ch (h/project-path->canon-path "resources")))
161-
;; default fallback
162-
(do
163-
(send-sse! ch {:choices [{:delta {:content "hello"}}]})
164-
(send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
165-
(hk/close ch)))))))})))
152+
(hk/send! ch {:status 200
153+
:headers {"Content-Type" "text/event-stream; charset=utf-8"
154+
"Cache-Control" "no-cache"
155+
"Connection" "keep-alive"}}
156+
false)
157+
(if (string/includes? (:content (first (:messages body))) llm.mocks/chat-title-generator-str)
158+
(chat-title-text-0 ch)
159+
(do
160+
(llm.mocks/set-req-body! llm.mocks/*case* normalized-body)
161+
(llm.mocks/set-raw-messages! llm.mocks/*case* messages)
162+
(let [has-tool-message? (some #(= "tool" (:role %)) messages)]
163+
(case llm.mocks/*case*
164+
:simple-text-0 (simple-text-0 ch)
165+
:simple-text-1 (simple-text-1 ch)
166+
:simple-text-2 (simple-text-2 ch)
167+
:reasoning-0 (reasoning-text-0 ch)
168+
:reasoning-1 (reasoning-text-1 ch)
169+
:tool-calling-with-thought-signature-0
170+
(if has-tool-message?
171+
(tool-calling-with-thought-signature-1 ch)
172+
(tool-calling-with-thought-signature-0 ch (h/project-path->canon-path "resources")))
173+
;; default fallback
174+
(do
175+
(send-sse! ch {:choices [{:delta {:content "hello"}}]})
176+
(send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
177+
(hk/close ch))))))))})))

src/eca/llm_providers/openai_chat.clj

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,11 @@
453453
:stream stream?
454454
:max_completion_tokens 32000}
455455
:temperature temperature
456-
:tools (when (seq tools) (->tools tools)))
456+
:tools (when (seq tools) (->tools tools))
457+
;; Required by the OpenAI streaming spec to receive a final
458+
;; chunk with `usage`. Strict OpenAI-compat servers (e.g. Gemini
459+
;; via `/v1beta/openai`) omit usage without this flag.
460+
:stream_options (when stream? {:include_usage true}))
457461
extra-payload)
458462

459463
;; Atom to accumulate tool call data from streaming chunks.

0 commit comments

Comments
 (0)