Fix Copilot provider retry loop from stale tool-call atom

ericdallo · eca-agent · ericdallo · commit 51ce4afe85bf · 2026-04-09T10:11:41.000-03:00
Three root causes combined to produce an infinite retry loop exclusively with the Copilot provider: 1. response.output_item.done used item_id for atom lookup, but Copilot encrypts IDs differently across SSE events. This created orphan atom entries ({:arguments "..."} with no :id/:full-name). 2. The doseq/dissoc cleanup used response.completed's item_id which never matched the streaming item_id, so stale entries accumulated. 3. The catch block in base-responses-request! labeled ALL exceptions as "Connection error:", causing internal state-machine errors to be classified as :overloaded and retried up to 10 times. Fixes: - response.output_item.done now looks up existing atom entries by call_id (consistent across Copilot events) instead of item_id - Replace doseq/dissoc with reset! to clear the entire atom before recursive requests - Catch block distinguishes ex-info (Internal error) from IO exceptions (Connection error) to prevent misclassification Adds 4 unit tests covering: primary path, fallback path, phantom tool call prevention, and Copilot-style mismatched item IDs. Refs #398 🤖 Generated with [eca](https://eca.dev) Co-Authored-By: eca <git@eca.dev>
diff --git a/src/eca/llm_providers/openai.clj b/src/eca/llm_providers/openai.clj
@@ -96,8 +96,10 @@
               (llm-util/log-response logger-tag rid "response" body)
               (response-body->result body)))))
       (catch Exception e
-        (on-error {:exception e
-                   :message (format "Connection error: %s" (or (ex-message e) (.getName (class e))))})))))
+        (let [msg (or (ex-message e) (.getName (class e)))
+              prefix (if (ex-data e) "Internal error" "Connection error")]
+          (on-error {:exception e
+                     :message (format "%s: %s" prefix msg)}))))))
 
 (defn ^:private normalize-messages [messages supports-image?]
   (keep (fn [{:keys [role content] :as msg}]
@@ -199,8 +201,17 @@
                                 (on-reason {:status :finished
                                             :id (-> data :item :id)
                                             :external-id (-> data :item :encrypted_content)}))
-                "function_call" (swap! tool-call-by-item-id* update (-> data :item :id)
-                                       assoc :arguments (-> data :item :arguments))
+                "function_call" (let [done-item-id (-> data :item :id)
+                                      done-call-id (-> data :item :call_id)
+                                      args (-> data :item :arguments)]
+                                  (swap! tool-call-by-item-id*
+                                         (fn [m]
+                                           (if-let [existing-key (or (when (contains? m done-item-id) done-item-id)
+                                                                     (->> m
+                                                                          (some (fn [[k v]]
+                                                                                  (when (= done-call-id (:id v)) k)))))]
+                                             (assoc-in m [existing-key :arguments] args)
+                                             (assoc m done-item-id {:arguments args})))))
                 "web_search_call" (on-server-web-search {:status :finished
                                                          :id (-> data :item :id)
                                                          :output nil})
@@ -277,8 +288,7 @@
                                      :input-cache-read-tokens input-cache-read-tokens}))
                 (if (seq tool-calls)
                   (when-let [{:keys [new-messages tools fresh-api-key provider-auth]} (on-tools-called tool-calls)]
-                    (doseq [tool-call tool-calls]
-                      (swap! tool-call-by-item-id* dissoc (:item-id tool-call)))
+                    (reset! tool-call-by-item-id* {})
                     (base-responses-request!
                      {:rid (llm-util/gen-rid)
                       :body (assoc body
diff --git a/test/eca/llm_providers/openai_test.clj b/test/eca/llm_providers/openai_test.clj
@@ -211,3 +211,229 @@
           [{:role "user" :content [{:type :text :text "Check diagnostics"}]}
            {:role "tool_call" :content {:id "call-1" :full-name "eca__editor_diagnostics" :arguments nil}}]
           true)))))
+
+(defn- base-provider-params []
+  {:model "gpt-test"
+   :user-messages [{:role "user" :content [{:type :text :text "hi"}]}]
+   :instructions "test"
+   :reason? false
+   :supports-image? false
+   :api-key "fake-key"
+   :api-url "http://localhost:1"
+   :past-messages []
+   :tools [{:full-name "eca__shell_command" :description "run" :parameters {:type "object"}}]
+   :web-search false
+   :extra-payload {}
+   :extra-headers nil
+   :auth-type :auth/api-key
+   :account-id nil})
+
+(defn- base-callbacks [{:keys [on-prepare-tool-call on-tools-called on-message-received on-error]
+                        :or {on-prepare-tool-call (fn [_])
+                             on-tools-called (fn [_] {:new-messages [] :tools []})
+                             on-message-received (fn [_])
+                             on-error (fn [e] (throw (ex-info "unexpected error in test" e)))}}]
+  {:on-message-received on-message-received
+   :on-error on-error
+   :on-prepare-tool-call on-prepare-tool-call
+   :on-tools-called on-tools-called
+   :on-reason (fn [_])
+   :on-usage-updated (fn [_])
+   :on-server-web-search (fn [_])})
+
+(deftest create-response-tool-calls-via-output-test
+  (testing "tool calls in response.completed output trigger callbacks correctly"
+    (let [prepare-calls* (atom [])
+          tools-called* (atom [])
+          requests* (atom [])]
+      (with-redefs [llm-providers.openai/base-responses-request!
+                    (fn [{:keys [on-stream] :as opts}]
+                      (swap! requests* conj opts)
+                      (when (= 1 (count @requests*))
+                        (on-stream "response.output_item.added"
+                                   {:item {:type "function_call"
+                                           :id "item-1"
+                                           :call_id "call-1"
+                                           :name "eca__shell_command"
+                                           :arguments ""}})
+                        (on-stream "response.function_call_arguments.delta"
+                                   {:item_id "item-1"
+                                    :delta "{\"command\":\"ls\"}"})
+                        (on-stream "response.output_item.done"
+                                   {:item {:type "function_call"
+                                           :id "item-1"
+                                           :call_id "call-1"
+                                           :name "eca__shell_command"
+                                           :arguments "{\"command\":\"ls\"}"}})
+                        (on-stream "response.completed"
+                                   {:response {:output [{:type "function_call"
+                                                         :id "item-1"
+                                                         :call_id "call-1"
+                                                         :name "eca__shell_command"
+                                                         :arguments "{\"command\":\"ls\"}"}]
+                                               :usage {:input_tokens 10
+                                                       :output_tokens 5}
+                                               :status "completed"}})))]
+        (llm-providers.openai/create-response!
+         (base-provider-params)
+         (base-callbacks
+          {:on-prepare-tool-call (fn [data] (swap! prepare-calls* conj data))
+           :on-tools-called (fn [tool-calls]
+                              (swap! tools-called* conj tool-calls)
+                              {:new-messages [] :tools []})}))
+        (is (pos? (count @prepare-calls*)))
+        (is (= "call-1" (:id (first @prepare-calls*))))
+        (is (= "eca__shell_command" (:full-name (first @prepare-calls*))))
+        (is (= 1 (count @tools-called*)))
+        (is (match? [{:id "call-1"
+                      :full-name "eca__shell_command"
+                      :arguments {"command" "ls"}}]
+                    (first @tools-called*)))
+        (is (= 2 (count @requests*)))))))
+
+(deftest create-response-tool-calls-fallback-via-atom-test
+  (testing "empty output in response.completed still triggers on-tools-called via atom fallback"
+    (let [tools-called* (atom [])
+          requests* (atom [])]
+      (with-redefs [llm-providers.openai/base-responses-request!
+                    (fn [{:keys [on-stream] :as opts}]
+                      (swap! requests* conj opts)
+                      (when (= 1 (count @requests*))
+                        (on-stream "response.output_item.added"
+                                   {:item {:type "function_call"
+                                           :id "item-1"
+                                           :call_id "call-1"
+                                           :name "eca__shell_command"
+                                           :arguments ""}})
+                        (on-stream "response.function_call_arguments.delta"
+                                   {:item_id "item-1"
+                                    :delta "{\"command\":\"ls\"}"})
+                        (on-stream "response.output_item.done"
+                                   {:item {:type "function_call"
+                                           :id "item-1"
+                                           :call_id "call-1"
+                                           :name "eca__shell_command"
+                                           :arguments "{\"command\":\"ls\"}"}})
+                        ;; response.completed with EMPTY output — fallback must kick in
+                        (on-stream "response.completed"
+                                   {:response {:output []
+                                               :usage {:input_tokens 10
+                                                       :output_tokens 5}
+                                               :status "completed"}})))]
+        (llm-providers.openai/create-response!
+         (base-provider-params)
+         (base-callbacks
+          {:on-tools-called (fn [tool-calls]
+                              (swap! tools-called* conj tool-calls)
+                              {:new-messages [] :tools []})}))
+        (is (= 1 (count @tools-called*)))
+        (is (match? [{:id "call-1"
+                      :full-name "eca__shell_command"
+                      :arguments {"command" "ls"}}]
+                    (first @tools-called*)))
+        (is (= 2 (count @requests*)))))))
+
+(deftest create-response-text-only-no-phantom-calls-test
+  (testing "text-only final response doesn't produce phantom tool calls from stale atom entries"
+    (let [tools-called* (atom [])
+          finish-received* (atom false)
+          requests* (atom [])]
+      (with-redefs [llm-providers.openai/base-responses-request!
+                    (fn [{:keys [on-stream] :as opts}]
+                      (swap! requests* conj opts)
+                      (case (count @requests*)
+                        ;; First call: tool call with Copilot-style mismatched item IDs
+                        1 (do
+                            (on-stream "response.output_item.added"
+                                       {:item {:type "function_call"
+                                               :id "stream-added-id"
+                                               :call_id "call-1"
+                                               :name "eca__shell_command"
+                                               :arguments ""}})
+                            (on-stream "response.function_call_arguments.delta"
+                                       {:item_id "stream-added-id"
+                                        :delta "{\"command\":\"ls\"}"})
+                            (on-stream "response.output_item.done"
+                                       {:item {:type "function_call"
+                                               :id "stream-done-id"
+                                               :call_id "call-1"
+                                               :name "eca__shell_command"
+                                               :arguments "{\"command\":\"ls\"}"}})
+                            (on-stream "response.completed"
+                                       {:response {:output [{:type "function_call"
+                                                             :id "output-id"
+                                                             :call_id "call-1"
+                                                             :name "eca__shell_command"
+                                                             :arguments "{\"command\":\"ls\"}"}]
+                                                   :usage {:input_tokens 10 :output_tokens 5}
+                                                   :status "completed"}}))
+                        ;; Second call: text-only response (no tool calls)
+                        2 (on-stream "response.completed"
+                                     {:response {:output [{:type "message"
+                                                           :id "msg-1"
+                                                           :content [{:text "Done."}]}]
+                                                 :usage {:input_tokens 5 :output_tokens 3}
+                                                 :status "completed"}})
+                        nil))]
+        (llm-providers.openai/create-response!
+         (base-provider-params)
+         (base-callbacks
+          {:on-message-received (fn [msg]
+                                  (when (= :finish (:type msg))
+                                    (reset! finish-received* true)))
+           :on-tools-called (fn [tool-calls]
+                              (swap! tools-called* conj tool-calls)
+                              {:new-messages [] :tools []})}))
+        (is (= 1 (count @tools-called*))
+            "on-tools-called should fire exactly once, not for phantom calls")
+        (is (true? @finish-received*)
+            "text-only response should trigger :finish")
+        (is (= 2 (count @requests*))
+            "should make exactly 2 requests, no retry loop")))))
+
+(deftest create-response-mismatched-item-ids-test
+  (testing "different item IDs across streaming events still produce correct tool calls"
+    (let [tools-called* (atom [])
+          requests* (atom [])]
+      (with-redefs [llm-providers.openai/base-responses-request!
+                    (fn [{:keys [on-stream] :as opts}]
+                      (swap! requests* conj opts)
+                      (when (= 1 (count @requests*))
+                        ;; Copilot-style: different encrypted IDs for the same tool call
+                        (on-stream "response.output_item.added"
+                                   {:item {:type "function_call"
+                                           :id "encrypted-added-id"
+                                           :call_id "call-1"
+                                           :name "eca__shell_command"
+                                           :arguments ""}})
+                        (on-stream "response.function_call_arguments.delta"
+                                   {:item_id "encrypted-added-id"
+                                    :delta "{\"command\":\"ls\"}"})
+                        ;; output_item.done uses a DIFFERENT encrypted id
+                        (on-stream "response.output_item.done"
+                                   {:item {:type "function_call"
+                                           :id "encrypted-done-id"
+                                           :call_id "call-1"
+                                           :name "eca__shell_command"
+                                           :arguments "{\"command\":\"ls\"}"}})
+                        ;; response.completed uses yet ANOTHER encrypted id
+                        (on-stream "response.completed"
+                                   {:response {:output [{:type "function_call"
+                                                         :id "encrypted-output-id"
+                                                         :call_id "call-1"
+                                                         :name "eca__shell_command"
+                                                         :arguments "{\"command\":\"ls\"}"}]
+                                               :usage {:input_tokens 10 :output_tokens 5}
+                                               :status "completed"}})))]
+        (llm-providers.openai/create-response!
+         (base-provider-params)
+         (base-callbacks
+          {:on-tools-called (fn [tool-calls]
+                              (swap! tools-called* conj tool-calls)
+                              {:new-messages [] :tools []})}))
+        (is (= 1 (count @tools-called*)))
+        (is (match? [{:id "call-1"
+                      :full-name "eca__shell_command"
+                      :arguments {"command" "ls"}}]
+                    (first @tools-called*)))
+        (is (= 2 (count @requests*)))))))