Fix missing token usage in UI for Google/Gemini #414

ericdallo · ericdallo · commit 02cc782bbbcf · 2026-04-23T12:45:18.000-03:00
Send `stream_options.include_usage` on streaming chat completion requests. The OpenAI streaming spec only emits a final `usage` chunk when the client opts in; Gemini's OpenAI-compat endpoint follows that strictly, so without the flag the usage callback never fired and the UI never received a `{:type :usage}` message. Also harden the openai-chat mock to only emit synthetic usage when the request carries the flag, so the integration tests can actually catch this. Closes #414
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,8 @@
 
 ## Unreleased
 
+- Fix token usage not being reported in the UI for Google/Gemini (and other strict OpenAI-compat providers) by opting into `stream_options.include_usage` on streaming chat completion requests. #414
+
 ## 0.129.0
 
 - Restore the model used at chat creation when resuming a chat: `chat/open` and the `/resume` slash command now emit `config/updated` to realign the client's selected model to the persisted chat's `:model`, and the next `chat/prompt` prefers that stored model over the agent/global default (stale models still fall through gracefully). #417
diff --git a/integration-test/llm_mock/openai_chat.clj b/integration-test/llm_mock/openai_chat.clj
@@ -8,6 +8,10 @@
 
 (def ^:dynamic *thinking-tag* "think")
 
+;; Matches the real OpenAI streaming contract: a `usage` chunk is only emitted
+;; when the request opted in via `stream_options.include_usage = true`.
+(def ^:dynamic *include-usage?* false)
+
 (defn set-thinking-tag! [tag]
   (alter-var-root #'*thinking-tag* (constantly tag)))
 
@@ -16,6 +20,12 @@
   [ch m]
   (hk/send! ch (str "data: " (json/generate-string m) "\n\n") false))
 
+(defn ^:private send-usage!
+  "Send a usage SSE chunk only when the client requested it."
+  [ch payload]
+  (when *include-usage?*
+    (send-sse! ch {:usage payload})))
+
 (defn ^:private messages->normalized-input
   "Transforms OpenAI Chat messages into the canonical ECA :input + :instructions format
   used by tests for assertions. We extract the first system message as :instructions
@@ -49,13 +59,13 @@
   ;; Stream two content chunks, then a usage chunk, then a finish chunk
   (send-sse! ch {:choices [{:delta {:content "Knock"}}]})
   (send-sse! ch {:choices [{:delta {:content " knock!"}}]})
-  (send-sse! ch {:usage {:prompt_tokens 10 :completion_tokens 20}})
+  (send-usage! ch {:prompt_tokens 10 :completion_tokens 20})
   (send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
   (hk/close ch))
 
 (defn ^:private simple-text-1 [ch]
   (send-sse! ch {:choices [{:delta {:content "Foo"}}]})
-  (send-sse! ch {:usage {:prompt_tokens 10 :completion_tokens 5}})
+  (send-usage! ch {:prompt_tokens 10 :completion_tokens 5})
   (send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
   (hk/close ch))
 
@@ -64,7 +74,7 @@
   (send-sse! ch {:choices [{:delta {:content " bar!"}}]})
   (send-sse! ch {:choices [{:delta {:content "\n\n"}}]})
   (send-sse! ch {:choices [{:delta {:content "Ha!"}}]})
-  (send-sse! ch {:usage {:prompt_tokens 5 :completion_tokens 15}})
+  (send-usage! ch {:prompt_tokens 5 :completion_tokens 15})
   (send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
   (hk/close ch))
 
@@ -75,7 +85,7 @@
   (send-sse! ch {:choices [{:delta {:content (str "</" *thinking-tag* ">")}}]})
   (send-sse! ch {:choices [{:delta {:content "hello"}}]})
   (send-sse! ch {:choices [{:delta {:content " there!"}}]})
-  (send-sse! ch {:usage {:prompt_tokens 10 :completion_tokens 20}})
+  (send-usage! ch {:prompt_tokens 10 :completion_tokens 20})
   (send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
   (hk/close ch))
 
@@ -86,7 +96,7 @@
   (send-sse! ch {:choices [{:delta {:content (str "</" *thinking-tag* ">")}}]})
   (send-sse! ch {:choices [{:delta {:content "I'm "}}]})
   (send-sse! ch {:choices [{:delta {:content " fine"}}]})
-  (send-sse! ch {:usage {:prompt_tokens 10 :completion_tokens 20}})
+  (send-usage! ch {:prompt_tokens 10 :completion_tokens 20})
   (send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
   (hk/close ch))
 
@@ -115,15 +125,15 @@
                                                    :function {:arguments "{\"pat"}}]}}]})
   (send-sse! ch {:choices [{:delta {:tool_calls [{:index 0
                                                    :function {:arguments (str "h\":\"" (h/json-escape-path path) "\"}")}}]}}]})
-  (send-sse! ch {:usage {:prompt_tokens 5 :completion_tokens 30}})
+  (send-usage! ch {:prompt_tokens 5 :completion_tokens 30})
   (send-sse! ch {:choices [{:delta {} :finish_reason "tool_calls"}]})
   (hk/close ch))
 
 (defn ^:private tool-calling-with-thought-signature-1 [ch]
   ;; Second stage response after tool output
   (send-sse! ch {:choices [{:delta {:content "The files I see:\n"}}]})
   (send-sse! ch {:choices [{:delta {:content "file1\nfile2\n"}}]})
-  (send-sse! ch {:usage {:prompt_tokens 5 :completion_tokens 30}})
+  (send-usage! ch {:prompt_tokens 5 :completion_tokens 30})
   (send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
   (hk/close ch))
 
@@ -132,34 +142,36 @@
   (let [body (some-> (slurp (:body req)) (json/parse-string true))
         messages (:messages body)
         normalized (messages->normalized-input messages)
-        normalized-body (merge normalized (select-keys body [:tools]))]
+        normalized-body (merge normalized (select-keys body [:tools]))
+        include-usage? (boolean (get-in body [:stream_options :include_usage]))]
     (hk/as-channel
      req
      {:on-open (fn [ch]
+                 (binding [*include-usage?* include-usage?]
                   ;; Send initial response headers for SSE
-                 (hk/send! ch {:status 200
-                               :headers {"Content-Type" "text/event-stream; charset=utf-8"
-                                         "Cache-Control" "no-cache"
-                                         "Connection" "keep-alive"}}
-                           false)
-                 (if (string/includes? (:content (first (:messages body))) llm.mocks/chat-title-generator-str)
-                   (chat-title-text-0 ch)
-                   (do
-                     (llm.mocks/set-req-body! llm.mocks/*case* normalized-body)
-                     (llm.mocks/set-raw-messages! llm.mocks/*case* messages)
-                     (let [has-tool-message? (some #(= "tool" (:role %)) messages)]
-                       (case llm.mocks/*case*
-                         :simple-text-0 (simple-text-0 ch)
-                         :simple-text-1 (simple-text-1 ch)
-                         :simple-text-2 (simple-text-2 ch)
-                         :reasoning-0 (reasoning-text-0 ch)
-                         :reasoning-1 (reasoning-text-1 ch)
-                         :tool-calling-with-thought-signature-0
-                         (if has-tool-message?
-                           (tool-calling-with-thought-signature-1 ch)
-                           (tool-calling-with-thought-signature-0 ch (h/project-path->canon-path "resources")))
-                         ;; default fallback
-                         (do
-                           (send-sse! ch {:choices [{:delta {:content "hello"}}]})
-                           (send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
-                           (hk/close ch)))))))})))
+                   (hk/send! ch {:status 200
+                                 :headers {"Content-Type" "text/event-stream; charset=utf-8"
+                                           "Cache-Control" "no-cache"
+                                           "Connection" "keep-alive"}}
+                             false)
+                   (if (string/includes? (:content (first (:messages body))) llm.mocks/chat-title-generator-str)
+                     (chat-title-text-0 ch)
+                     (do
+                       (llm.mocks/set-req-body! llm.mocks/*case* normalized-body)
+                       (llm.mocks/set-raw-messages! llm.mocks/*case* messages)
+                       (let [has-tool-message? (some #(= "tool" (:role %)) messages)]
+                         (case llm.mocks/*case*
+                           :simple-text-0 (simple-text-0 ch)
+                           :simple-text-1 (simple-text-1 ch)
+                           :simple-text-2 (simple-text-2 ch)
+                           :reasoning-0 (reasoning-text-0 ch)
+                           :reasoning-1 (reasoning-text-1 ch)
+                           :tool-calling-with-thought-signature-0
+                           (if has-tool-message?
+                             (tool-calling-with-thought-signature-1 ch)
+                             (tool-calling-with-thought-signature-0 ch (h/project-path->canon-path "resources")))
+                           ;; default fallback
+                           (do
+                             (send-sse! ch {:choices [{:delta {:content "hello"}}]})
+                             (send-sse! ch {:choices [{:delta {} :finish_reason "stop"}]})
+                             (hk/close ch))))))))})))
diff --git a/src/eca/llm_providers/openai_chat.clj b/src/eca/llm_providers/openai_chat.clj
@@ -453,7 +453,11 @@
                 :stream stream?
                 :max_completion_tokens 32000}
                :temperature temperature
-               :tools (when (seq tools) (->tools tools)))
+               :tools (when (seq tools) (->tools tools))
+               ;; Required by the OpenAI streaming spec to receive a final
+               ;; chunk with `usage`. Strict OpenAI-compat servers (e.g. Gemini
+               ;; via `/v1beta/openai`) omit usage without this flag.
+               :stream_options (when stream? {:include_usage true}))
               extra-payload)
 
         ;; Atom to accumulate tool call data from streaming chunks.