Skip to content

Commit dee7fec

Browse files
ericdalloeca-agent
andcommitted
Classify LLM provider connection errors with actionable hints
When the JVM cannot validate a custom provider's TLS certificate (common with corporate / private CAs), users were shown a raw PKIX stack trace with no hint about how to fix it — even though ECA already supports `network.caCertFile` and `SSL_CERT_FILE`. Add `llm-util/classify-connection-exception`, which walks the exception cause chain and produces user-friendly messages for SSL/TLS, DNS, connection-refused and timeout failures, pointing users to the relevant config when one applies. The five catch sites in the OpenAI, OpenAI-chat, Anthropic and Ollama providers now share this helper. Closes #457 🤖 Generated with [eca](https://eca.dev) Co-Authored-By: eca-agent <git@eca.dev>
1 parent 17ff358 commit dee7fec

7 files changed

Lines changed: 155 additions & 11 deletions

File tree

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
## Unreleased
44

55
- Improve CPU usage while streaming tool-call arguments by reusing the prompt's tool list.
6+
- Improve connection error messages from LLM providers. #457
67

78
## 0.133.4
89

src/eca/llm_providers/anthropic.clj

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -161,7 +161,7 @@
161161

162162
:else
163163
(on-error {:exception e
164-
:message (format "Connection error: %s" (or (ex-message e) (.getName (class e))))}))))
164+
:message (llm-util/connection-error-message e)}))))
165165
(finally
166166
(stop-fn))))
167167
(do
@@ -170,7 +170,7 @@
170170
{:output-text (:text (last (:content body)))})))))
171171
(catch Exception e
172172
(on-error {:exception e
173-
:message (format "Connection error: %s" (or (ex-message e) (.getName (class e))))})))
173+
:message (llm-util/connection-error-message e)})))
174174
@response*))
175175

176176
(defn ^:private normalize-messages [past-messages supports-image?]

src/eca/llm_providers/ollama.clj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@
9090
{:output-text (:content (:message body))})))))
9191
(catch Exception e
9292
(on-error {:exception e
93-
:message (format "Connection error: %s" (or (ex-message e) (.getName (class e))))})))
93+
:message (llm-util/connection-error-message e)})))
9494
@response*))
9595

9696
(defn ^:private ->tools [tools]

src/eca/llm_providers/openai.clj

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -96,10 +96,10 @@
9696
(llm-util/log-response logger-tag rid "response" body)
9797
(response-body->result body)))))
9898
(catch Exception e
99-
(let [msg (or (ex-message e) (.getName (class e)))
100-
prefix (if (ex-data e) "Internal error" "Connection error")]
101-
(on-error {:exception e
102-
:message (format "%s: %s" prefix msg)}))))))
99+
(on-error {:exception e
100+
:message (if (ex-data e)
101+
(format "Internal error: %s" (or (ex-message e) (.getName (class e))))
102+
(llm-util/connection-error-message e))})))))
103103

104104
(defn ^:private normalize-messages [messages supports-image?]
105105
;; Each history entry maps to one or more provider messages. Switched from

src/eca/llm_providers/openai_chat.clj

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -154,7 +154,7 @@
154154
(response-body->result body on-tools-called-wrapper)))))
155155
(catch Exception e
156156
(on-error {:exception e
157-
:message (format "Connection error: %s" (or (ex-message e) (.getName (class e))))})))))
157+
:message (llm-util/connection-error-message e)})))))
158158

159159
(defn ^:private transform-message
160160
"Transform a single ECA message to OpenAI format. Returns nil for unsupported roles.

src/eca/llm_util.clj

Lines changed: 77 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88
[eca.secrets :as secrets]
99
[eca.shared :as shared])
1010
(:import
11-
[java.io BufferedReader Closeable]))
11+
[java.io BufferedReader Closeable]
12+
[java.net ConnectException SocketTimeoutException UnknownHostException]
13+
[java.net.http HttpConnectTimeoutException]
14+
[javax.net.ssl SSLException]))
1215

1316
(set! *warn-on-reflection* true)
1417

@@ -156,3 +159,76 @@
156159
(some-> (get-in config [:providers (name provider) :urlEnv]) config/get-env)) ;; legacy
157160
shared/normalize-api-url
158161
not-empty))
162+
163+
(defn ^:private cause-chain
164+
"Returns a seq of `e` followed by every nested cause."
165+
[^Throwable e]
166+
(->> (iterate (fn [^Throwable t] (.getCause t)) e)
167+
(take-while some?)))
168+
169+
(defn ^:private root-message [^Throwable e]
170+
(or (ex-message e) (.getName (class e))))
171+
172+
(defn classify-connection-exception
173+
"Walks the cause chain of `e` and classifies common HTTP/TLS failures
174+
into a user-friendly map: {:kind <keyword> :message <string>}.
175+
176+
Recognized kinds:
177+
- :tls-untrusted - PKIX path building failed (private/corporate CA not trusted)
178+
- :tls-other - other TLS/SSL handshake errors
179+
- :dns - UnknownHostException
180+
- :connect-refused - ConnectException (connection refused, etc.)
181+
- :timeout - connection/socket timeouts
182+
- :unknown - fallback; keeps the historical 'Connection error: ...' format"
183+
[^Throwable e]
184+
(let [msg (root-message e)
185+
causes (cause-chain e)
186+
pkix? (some (fn [^Throwable c]
187+
(some-> (ex-message c)
188+
(string/includes? "PKIX path building failed")))
189+
causes)
190+
ssl? (some #(instance? SSLException %) causes)
191+
dns? (some #(instance? UnknownHostException %) causes)
192+
connect-refused? (some #(instance? ConnectException %) causes)
193+
timeout? (some #(or (instance? HttpConnectTimeoutException %)
194+
(instance? SocketTimeoutException %))
195+
causes)]
196+
(cond
197+
pkix?
198+
{:kind :tls-untrusted
199+
:message (str "TLS certificate not trusted: PKIX path building failed. "
200+
"The server's certificate is signed by a CA not in the JVM truststore "
201+
"(common with private/corporate CAs). "
202+
"Fix: set `network.caCertFile` in your ECA config or the `SSL_CERT_FILE` "
203+
"env var to a PEM bundle containing the missing CA. "
204+
"See docs/config/network.md for details. Original error: " msg)}
205+
206+
ssl?
207+
{:kind :tls-other
208+
:message (str "TLS error: " msg
209+
". See docs/config/network.md for trust and mTLS configuration.")}
210+
211+
dns?
212+
{:kind :dns
213+
:message (str "DNS resolution failed: " msg
214+
". Check the provider URL and your network/proxy settings.")}
215+
216+
connect-refused?
217+
{:kind :connect-refused
218+
:message (str "Could not connect: " msg
219+
". Check the provider URL and whether the server is reachable. "
220+
"Corporate networks may require HTTP_PROXY / HTTPS_PROXY env vars.")}
221+
222+
timeout?
223+
{:kind :timeout
224+
:message (str "Connection timed out: " msg ".")}
225+
226+
:else
227+
{:kind :unknown
228+
:message (format "Connection error: %s" msg)})))
229+
230+
(defn connection-error-message
231+
"Returns a user-friendly message describing a connection-level exception.
232+
Always non-nil. See `classify-connection-exception` for recognized error kinds."
233+
[^Throwable e]
234+
(:message (classify-connection-exception e)))

test/eca/llm_util_test.clj

Lines changed: 69 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77
[eca.secrets :as secrets]
88
[matcher-combinators.test :refer [match?]])
99
(:import
10-
[java.io ByteArrayInputStream]))
10+
[java.io ByteArrayInputStream]
11+
[java.net ConnectException SocketTimeoutException UnknownHostException]
12+
[javax.net.ssl SSLException SSLHandshakeException]))
1113

1214
(deftest event-data-seq-test
1315
(testing "when there is a event line and another data line"
@@ -185,4 +187,69 @@
185187

186188
(testing "returns nil for blank url"
187189
(with-redefs [config/get-env (constantly nil)]
188-
(is (nil? (llm-util/provider-api-url "openai" {:providers {"openai" {:url " "}}}))))))
190+
(is (nil? (llm-util/provider-api-url "openai" {:providers {"openai" {:url " "}}}))))))
191+
192+
(deftest classify-connection-exception-test
193+
(testing "PKIX path building failed -> :tls-untrusted with actionable hint"
194+
(let [e (SSLHandshakeException. "PKIX path building failed: unable to find valid certification path to requested target")
195+
{:keys [kind message]} (llm-util/classify-connection-exception e)]
196+
(is (= :tls-untrusted kind))
197+
(is (re-find #"TLS certificate not trusted" message))
198+
(is (re-find #"network\.caCertFile" message))
199+
(is (re-find #"SSL_CERT_FILE" message))
200+
(is (re-find #"docs/config/network\.md" message))))
201+
202+
(testing "PKIX detected even when wrapped in a non-SSL outer exception"
203+
(let [root (Exception. "PKIX path building failed: unable to find valid certification path to requested target")
204+
wrapped (RuntimeException. "wrapper" root)
205+
{:keys [kind message]} (llm-util/classify-connection-exception wrapped)]
206+
(is (= :tls-untrusted kind))
207+
(is (re-find #"network\.caCertFile" message))))
208+
209+
(testing "Generic SSLException (no PKIX) -> :tls-other"
210+
(let [e (SSLException. "handshake_failure")
211+
{:keys [kind message]} (llm-util/classify-connection-exception e)]
212+
(is (= :tls-other kind))
213+
(is (re-find #"TLS error" message))
214+
(is (re-find #"docs/config/network\.md" message))))
215+
216+
(testing "UnknownHostException -> :dns"
217+
(let [e (UnknownHostException. "no-such-host.example")
218+
{:keys [kind message]} (llm-util/classify-connection-exception e)]
219+
(is (= :dns kind))
220+
(is (re-find #"DNS resolution failed" message))))
221+
222+
(testing "ConnectException (e.g. Connection refused) -> :connect-refused"
223+
(let [e (ConnectException. "Connection refused")
224+
{:keys [kind message]} (llm-util/classify-connection-exception e)]
225+
(is (= :connect-refused kind))
226+
(is (re-find #"Could not connect" message))
227+
(is (re-find #"HTTP_PROXY" message))))
228+
229+
(testing "SocketTimeoutException -> :timeout"
230+
(let [e (SocketTimeoutException. "Read timed out")
231+
{:keys [kind message]} (llm-util/classify-connection-exception e)]
232+
(is (= :timeout kind))
233+
(is (re-find #"Connection timed out" message))))
234+
235+
(testing "Unknown exception falls back to legacy 'Connection error:' format"
236+
(let [e (Exception. "boom")
237+
{:keys [kind message]} (llm-util/classify-connection-exception e)]
238+
(is (= :unknown kind))
239+
(is (= "Connection error: boom" message))))
240+
241+
(testing "Exception with nil message uses class name as fallback"
242+
(let [e (Exception.)
243+
{:keys [kind message]} (llm-util/classify-connection-exception e)]
244+
(is (= :unknown kind))
245+
(is (re-find #"Connection error: java\.lang\.Exception" message)))))
246+
247+
(deftest connection-error-message-test
248+
(testing "returns the :message from classify-connection-exception"
249+
(is (re-find #"TLS certificate not trusted"
250+
(llm-util/connection-error-message
251+
(SSLHandshakeException. "PKIX path building failed: ...")))))
252+
(testing "is non-nil for any exception"
253+
(is (string? (llm-util/connection-error-message (Exception. "x"))))
254+
(is (string? (llm-util/connection-error-message (Exception.))))))
255+

0 commit comments

Comments
 (0)