Skip to content

Commit ebb6a68

Browse files
brycelelbachclaude
andcommitted
fix: produce friendly error when api.ngc.nvidia.com is unreachable
Network failures (DNS timeout, dial refused) during login or token refresh used to surface a multi-line stack trace plus repeated resty WARN/ERROR blocks. Detect transport-level errors at the auth boundary and render them as a short, actionable message with a directive. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent a295d3a commit ebb6a68

7 files changed

Lines changed: 420 additions & 1 deletion

File tree

pkg/auth/kas.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ import (
66
"fmt"
77
"io"
88
"net/http"
9+
"net/url"
910
"os"
1011
"strings"
1112
"time"
@@ -16,6 +17,14 @@ import (
1617
"github.com/google/uuid"
1718
)
1819

20+
func hostFromURL(rawURL string) string {
21+
parsed, err := url.Parse(rawURL)
22+
if err != nil || parsed == nil {
23+
return ""
24+
}
25+
return parsed.Host
26+
}
27+
1928
var _ OAuth = KasAuthenticator{}
2029

2130
const CredentialProviderKAS entity.CredentialProvider = "kas"
@@ -103,7 +112,7 @@ func (a KasAuthenticator) MakeLoginCall(id, email string) (LoginCallResponse, er
103112
client := &http.Client{}
104113
resp, err := client.Do(req)
105114
if err != nil {
106-
return LoginCallResponse{}, breverrors.WrapAndTrace(err)
115+
return LoginCallResponse{}, breverrors.WrapAndTrace(breverrors.WrapNetworkError(err, hostFromURL(a.BaseURL)))
107116
}
108117
defer resp.Body.Close() //nolint:errcheck // fine
109118

@@ -219,6 +228,9 @@ func (a KasAuthenticator) retrieveIDToken(sessionKey, deviceID string) (string,
219228

220229
tokenResp, err := client.Do(tokenReq)
221230
if err != nil {
231+
if breverrors.IsNetworkError(err) {
232+
return "", breverrors.WrapNetworkError(err, hostFromURL(a.BaseURL))
233+
}
222234
return "", fmt.Errorf("error sending token request: %v", err)
223235
}
224236
defer tokenResp.Body.Close() //nolint:errcheck // fine

pkg/auth/kas_test.go

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
package auth
2+
3+
import (
4+
stderrors "errors"
5+
"net/http"
6+
"net/http/httptest"
7+
"net/url"
8+
"testing"
9+
10+
breverrors "github.com/brevdev/brev-cli/pkg/errors"
11+
"github.com/stretchr/testify/assert"
12+
)
13+
14+
func Test_hostFromURL(t *testing.T) {
15+
cases := []struct {
16+
in string
17+
want string
18+
}{
19+
{"https://api.ngc.nvidia.com", "api.ngc.nvidia.com"},
20+
{"https://api.ngc.nvidia.com/token", "api.ngc.nvidia.com"},
21+
{"http://localhost:8080", "localhost:8080"},
22+
{"", ""},
23+
{"://nonsense", ""},
24+
}
25+
for _, tc := range cases {
26+
t.Run(tc.in, func(t *testing.T) {
27+
assert.Equal(t, tc.want, hostFromURL(tc.in))
28+
})
29+
}
30+
}
31+
32+
// closedServerURL returns a URL that is guaranteed to fail to connect: an
33+
// httptest.Server that has already been Close()d.
34+
func closedServerURL(t *testing.T) string {
35+
t.Helper()
36+
server := httptest.NewServer(http.HandlerFunc(func(http.ResponseWriter, *http.Request) {}))
37+
server.Close()
38+
return server.URL
39+
}
40+
41+
// MakeLoginCall against an unreachable host should surface a friendly
42+
// *NetworkError instead of the raw transport error, and the host should
43+
// match the BaseURL we tried to reach.
44+
func TestMakeLoginCall_NetworkError(t *testing.T) {
45+
baseURL := closedServerURL(t)
46+
auth := KasAuthenticator{BaseURL: baseURL}
47+
48+
_, err := auth.MakeLoginCall("device-id", "user@example.com")
49+
if !assert.Error(t, err) {
50+
return
51+
}
52+
53+
var netErr *breverrors.NetworkError
54+
if !assert.True(t, stderrors.As(err, &netErr), "expected *NetworkError, got %T: %v", err, err) {
55+
return
56+
}
57+
58+
expectedHost, _ := url.Parse(baseURL)
59+
assert.Equal(t, expectedHost.Host, netErr.Host)
60+
assert.Contains(t, netErr.Error(), "internet connection")
61+
}
62+
63+
// retrieveIDToken against an unreachable host should surface a friendly
64+
// *NetworkError. This is the path hit by `brev shell` when the cached
65+
// access token has expired and api.ngc.nvidia.com is unreachable.
66+
func TestRetrieveIDToken_NetworkError(t *testing.T) {
67+
baseURL := closedServerURL(t)
68+
auth := KasAuthenticator{BaseURL: baseURL}
69+
70+
_, err := auth.retrieveIDToken("session-key", "device-id")
71+
if !assert.Error(t, err) {
72+
return
73+
}
74+
75+
var netErr *breverrors.NetworkError
76+
if !assert.True(t, stderrors.As(err, &netErr), "expected *NetworkError, got %T: %v", err, err) {
77+
return
78+
}
79+
80+
expectedHost, _ := url.Parse(baseURL)
81+
assert.Equal(t, expectedHost.Host, netErr.Host)
82+
}
83+
84+
// retrieveIDToken against a server that returns HTTP 4xx/5xx (i.e. a
85+
// non-network error) should NOT be classified as a network error.
86+
func TestRetrieveIDToken_NonNetworkErrorUnchanged(t *testing.T) {
87+
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) {
88+
w.WriteHeader(http.StatusInternalServerError)
89+
_, _ = w.Write([]byte(`{"requestStatus":{"statusCode":"INTERNAL_ERROR"}}`))
90+
}))
91+
defer server.Close()
92+
93+
auth := KasAuthenticator{BaseURL: server.URL}
94+
_, err := auth.retrieveIDToken("session-key", "device-id")
95+
if !assert.Error(t, err) {
96+
return
97+
}
98+
99+
var netErr *breverrors.NetworkError
100+
assert.False(t, stderrors.As(err, &netErr), "HTTP error should not be classified as a network error: %v", err)
101+
assert.Contains(t, err.Error(), "status code: 500")
102+
}

pkg/cmd/cmderrors/cmderrors.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,11 @@ func DisplayAndHandleError(err error) {
3333
case breverrors.ValidationError:
3434
// do not report error
3535
prettyErr = (t.Yellow(errors.Cause(err).Error()))
36+
case *breverrors.NetworkError:
37+
// network failure is a user-facing condition, not a bug — show
38+
// a friendly message and skip Sentry reporting.
39+
netErr, _ := errors.Cause(err).(*breverrors.NetworkError)
40+
prettyErr = t.Yellow(netErr.Error()) + "\n" + t.Yellow(netErr.Directive())
3641
case breverrors.WorkspaceNotRunning: // report error to track when this occurs, but don't print stacktrace to user unless in dev mode
3742
er.ReportError(err)
3843
prettyErr = (t.Yellow(errors.Cause(err).Error()))
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,68 @@
11
package cmderrors
2+
3+
import (
4+
"bytes"
5+
"io"
6+
"os"
7+
"testing"
8+
9+
breverrors "github.com/brevdev/brev-cli/pkg/errors"
10+
"github.com/stretchr/testify/assert"
11+
"github.com/stretchr/testify/require"
12+
)
13+
14+
// captureStderr runs fn while collecting everything written to os.Stderr.
15+
func captureStderr(t *testing.T, fn func()) string {
16+
t.Helper()
17+
orig := os.Stderr
18+
r, w, err := os.Pipe()
19+
require.NoError(t, err)
20+
os.Stderr = w
21+
22+
done := make(chan struct{})
23+
var buf bytes.Buffer
24+
go func() {
25+
_, _ = io.Copy(&buf, r)
26+
close(done)
27+
}()
28+
29+
fn()
30+
31+
// Close the writer before reading buf so the io.Copy goroutine sees
32+
// EOF and returns. Without this, the deferred close ran after the
33+
// return statement and buf was always empty.
34+
_ = w.Close()
35+
<-done
36+
_ = r.Close()
37+
os.Stderr = orig
38+
39+
return buf.String()
40+
}
41+
42+
// A NetworkError wrapped through WrapAndTrace should render as a short,
43+
// friendly message — no stack trace, no "github.com/brevdev/..." lines.
44+
func TestDisplayAndHandleError_NetworkErrorIsFriendly(t *testing.T) {
45+
netErr := &breverrors.NetworkError{Host: "api.ngc.nvidia.com"}
46+
wrapped := breverrors.WrapAndTrace(breverrors.WrapAndTrace(netErr))
47+
48+
out := captureStderr(t, func() {
49+
DisplayAndHandleError(wrapped)
50+
})
51+
52+
assert.Contains(t, out, "api.ngc.nvidia.com")
53+
assert.Contains(t, out, "internet connection")
54+
// The hallmark of the old convoluted output: stack trace lines pointing
55+
// at github.com/brevdev/brev-cli source paths. The friendly message
56+
// must not include them.
57+
assert.NotContains(t, out, "github.com/brevdev/brev-cli/pkg/auth")
58+
assert.NotContains(t, out, "[error]")
59+
}
60+
61+
// A non-network error should still render through the default red path.
62+
func TestDisplayAndHandleError_PlainError(t *testing.T) {
63+
err := breverrors.New("something else broke")
64+
out := captureStderr(t, func() {
65+
DisplayAndHandleError(err)
66+
})
67+
assert.Contains(t, out, "something else broke")
68+
}

pkg/errors/errors.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package errors
22

33
import (
44
"fmt"
5+
"net"
6+
"net/url"
57
"runtime"
68
"strconv"
79
"time"
@@ -130,6 +132,87 @@ func (d *DeclineToLoginError) Directive() string { return "log in to run this co
130132

131133
var NetworkErrorMessage = "possible internet connection problem"
132134

135+
// NetworkError is a user-facing error for transport-level failures (DNS
136+
// lookup failures, dial timeouts, connection refused, etc.) when reaching a
137+
// remote service. It hides the underlying stacktrace and produces a short,
138+
// actionable message suitable for end users.
139+
type NetworkError struct {
140+
// Host is the host the CLI was trying to reach (e.g. "api.ngc.nvidia.com").
141+
// Empty if no host could be derived from the original error.
142+
Host string
143+
// Cause is the underlying transport error.
144+
Cause error
145+
}
146+
147+
func (e *NetworkError) Error() string {
148+
if e.Host != "" {
149+
return fmt.Sprintf("Could not reach %s — check your internet connection and try again", e.Host)
150+
}
151+
return "Could not reach the network — check your internet connection and try again"
152+
}
153+
154+
func (e *NetworkError) Directive() string {
155+
if e.Host != "" {
156+
return fmt.Sprintf("Verify you can resolve %s and that no firewall or proxy is blocking it. If the host is reachable, the service may be temporarily unavailable.", e.Host)
157+
}
158+
return "Verify your internet connection. If the network is healthy, the service may be temporarily unavailable."
159+
}
160+
161+
func (e *NetworkError) Unwrap() error { return e.Cause }
162+
163+
// IsNetworkError reports whether err (or any error in its chain) is a
164+
// transport-level network failure such as a DNS lookup failure, dial
165+
// timeout, or connection refusal.
166+
func IsNetworkError(err error) bool {
167+
if err == nil {
168+
return false
169+
}
170+
var dnsErr *net.DNSError
171+
if stderrors.As(err, &dnsErr) {
172+
return true
173+
}
174+
var opErr *net.OpError
175+
if stderrors.As(err, &opErr) {
176+
return true
177+
}
178+
var netErr net.Error
179+
if stderrors.As(err, &netErr) {
180+
return true
181+
}
182+
return false
183+
}
184+
185+
// HostFromURLError returns the host from a *url.Error in err's chain, or ""
186+
// if no URL is available. Useful when wrapping HTTP client errors.
187+
func HostFromURLError(err error) string {
188+
var urlErr *url.Error
189+
if !stderrors.As(err, &urlErr) {
190+
return ""
191+
}
192+
parsed, perr := url.Parse(urlErr.URL)
193+
if perr != nil || parsed == nil {
194+
return ""
195+
}
196+
return parsed.Host
197+
}
198+
199+
// WrapNetworkError returns a *NetworkError wrapping err if err is a
200+
// transport-level network failure; otherwise it returns err unchanged. The
201+
// fallbackHost is used only if the host cannot be derived from err.
202+
func WrapNetworkError(err error, fallbackHost string) error {
203+
if err == nil {
204+
return nil
205+
}
206+
if !IsNetworkError(err) {
207+
return err
208+
}
209+
host := HostFromURLError(err)
210+
if host == "" {
211+
host = fallbackHost
212+
}
213+
return &NetworkError{Host: host, Cause: err}
214+
}
215+
133216
type CredentialsFileNotFound struct{}
134217

135218
func (e *CredentialsFileNotFound) Directive() string {

0 commit comments

Comments
 (0)