|
| 1 | +// SPDX-FileCopyrightText: Copyright 2025 Stacklok, Inc. |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | + |
| 4 | +package registry |
| 5 | + |
| 6 | +import ( |
| 7 | + "context" |
| 8 | + "errors" |
| 9 | + "fmt" |
| 10 | + "net/http" |
| 11 | + "net/http/httptest" |
| 12 | + "sync/atomic" |
| 13 | + "testing" |
| 14 | + |
| 15 | + "github.com/stretchr/testify/require" |
| 16 | + |
| 17 | + "github.com/stacklok/toolhive/pkg/registry/api" |
| 18 | + "github.com/stacklok/toolhive/pkg/registry/auth" |
| 19 | +) |
| 20 | + |
| 21 | +// TestCachedProvider_AuthErrorNotMaskedByStaleCache reproduces a bug in |
| 22 | +// refreshCache() at provider_cached.go:116. |
| 23 | +// |
| 24 | +// Scenario: a user has an existing cache populated from a previous successful |
| 25 | +// fetch. Later, their registry returns 401 (token revoked, credentials |
| 26 | +// rejected server-side). The CLI should surface the auth error, because the |
| 27 | +// user's authorization state may have changed since the cache was populated. |
| 28 | +// |
| 29 | +// Current behavior: refreshCache() treats ANY error from the upstream fetch |
| 30 | +// as a signal to fall back to stale cached data, including authentication |
| 31 | +// errors. The CLI silently prints stale registry contents with no error. |
| 32 | +// |
| 33 | +// This test covers the 401/403 branch (server-side rejection). |
| 34 | +func TestCachedProvider_AuthErrorNotMaskedByStaleCache(t *testing.T) { |
| 35 | + t.Parallel() |
| 36 | + |
| 37 | + var returnUnauthorized atomic.Bool |
| 38 | + |
| 39 | + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { |
| 40 | + if returnUnauthorized.Load() { |
| 41 | + w.WriteHeader(http.StatusUnauthorized) |
| 42 | + return |
| 43 | + } |
| 44 | + w.Header().Set("Content-Type", "application/json") |
| 45 | + w.WriteHeader(http.StatusOK) |
| 46 | + _, _ = w.Write([]byte(`{"servers":[],"metadata":{"next_cursor":""}}`)) |
| 47 | + })) |
| 48 | + t.Cleanup(srv.Close) |
| 49 | + |
| 50 | + // allowPrivateIp=true because httptest binds to 127.0.0.1 |
| 51 | + // usePersistent=false exercises only the in-memory cache path |
| 52 | + // tokenSource=nil so the constructor's validation probe runs against the healthy server |
| 53 | + provider, err := NewCachedAPIRegistryProvider(srv.URL, true, false, nil) |
| 54 | + require.NoError(t, err, "constructor should succeed while server is healthy") |
| 55 | + |
| 56 | + // Populate the in-memory cache with a successful fetch. |
| 57 | + _, err = provider.ListServers() |
| 58 | + require.NoError(t, err, "first fetch should succeed and populate cache") |
| 59 | + |
| 60 | + // Flip the server to 401 — equivalent to the registry now rejecting |
| 61 | + // the user's credentials server-side. |
| 62 | + returnUnauthorized.Store(true) |
| 63 | + |
| 64 | + // Force a refresh — mirrors what happens when the in-memory cache TTL |
| 65 | + // expires (default 1h) and the next `thv registry list` triggers a |
| 66 | + // fresh fetch. The upstream API call will fail with 401. |
| 67 | + err = provider.ForceRefresh() |
| 68 | + |
| 69 | + require.Error(t, err, |
| 70 | + "expected auth error to propagate on refresh; got nil — "+ |
| 71 | + "stale cache is masking the auth failure (bug)") |
| 72 | + require.True(t, |
| 73 | + errors.Is(err, api.ErrRegistryUnauthorized) || |
| 74 | + errors.Is(err, auth.ErrRegistryAuthRequired), |
| 75 | + "expected ErrRegistryUnauthorized or ErrRegistryAuthRequired; got: %v", err) |
| 76 | +} |
| 77 | + |
| 78 | +// failingTokenSource is a test double for auth.TokenSource. |
| 79 | +// It returns an empty token (no Authorization header added) when fail is |
| 80 | +// false, and returns an error wrapped like oauthTokenSource.Token() would |
| 81 | +// when its browser OAuth flow fails, when fail is true. |
| 82 | +type failingTokenSource struct { |
| 83 | + fail atomic.Bool |
| 84 | +} |
| 85 | + |
| 86 | +// Token mirrors the exact error-wrapping oauthTokenSource.Token() applies |
| 87 | +// when its browser flow times out / is cancelled. See: |
| 88 | +// - pkg/registry/auth/oauth_token_source.go:61-67 (outer wrap) |
| 89 | +// - pkg/registry/auth/oauth_token_source.go:110-113 (inner wrap) |
| 90 | +// |
| 91 | +// The point of matching the wrapping is to verify that the eventual |
| 92 | +// refreshCache() fallback behavior does not depend on the specific error |
| 93 | +// type — any error causes stale cache to be served. |
| 94 | +func (s *failingTokenSource) Token(_ context.Context) (string, error) { |
| 95 | + if s.fail.Load() { |
| 96 | + inner := fmt.Errorf("oauth flow start failed: %w", |
| 97 | + errors.New("authorization timed out waiting for browser callback")) |
| 98 | + return "", fmt.Errorf("oauth flow failed: %w", inner) |
| 99 | + } |
| 100 | + // No auth header needed — the httptest server does not validate credentials. |
| 101 | + return "", nil |
| 102 | +} |
| 103 | + |
| 104 | +// TestCachedProvider_OAuthFlowFailureNotMaskedByStaleCache reproduces the |
| 105 | +// same masking bug via the OAuth-browser-flow-failure code path — the one |
| 106 | +// actually described in the bug report: |
| 107 | +// |
| 108 | +// "thv registry list sent me a URL to OAuth. I did nothing. After a minute |
| 109 | +// it exited the OAuth flow and went to another OAuth flow. I also did |
| 110 | +// nothing. In another minute it exited that OAuth flow and then it |
| 111 | +// showed me everything in the registry." |
| 112 | +// |
| 113 | +// This path differs from the 401/403 path because the error returned by |
| 114 | +// oauthTokenSource.Token() when the browser flow times out is a generic |
| 115 | +// wrapped error — it does NOT match errors.Is(err, auth.ErrRegistryAuthRequired) |
| 116 | +// or errors.Is(err, api.ErrRegistryUnauthorized). It is propagated through: |
| 117 | +// |
| 118 | +// oauthTokenSource.Token ("oauth flow failed: ...") |
| 119 | +// -> auth.Transport.RoundTrip ("failed to get auth token: ...") |
| 120 | +// -> api.Client.fetchServersPage ("failed to fetch servers: ...") |
| 121 | +// -> APIRegistryProvider.GetRegistry (wrapped into *UnavailableError) |
| 122 | +// -> refreshCache (swallowed; stale cache returned with nil err) |
| 123 | +// |
| 124 | +// A sentinel-check fix like |
| 125 | +// |
| 126 | +// if errors.Is(err, auth.ErrRegistryAuthRequired) || |
| 127 | +// errors.Is(err, api.ErrRegistryUnauthorized) { return nil, err } |
| 128 | +// |
| 129 | +// will NOT catch this path — the OAuth browser-flow error carries neither |
| 130 | +// sentinel. A correct fix has to classify token-acquisition failures as |
| 131 | +// auth errors before they are flattened into UnavailableError. |
| 132 | +func TestCachedProvider_OAuthFlowFailureNotMaskedByStaleCache(t *testing.T) { |
| 133 | + t.Parallel() |
| 134 | + |
| 135 | + // Server always serves a valid empty list. All failures in this test |
| 136 | + // come from the token source, not the server — simulating an OAuth |
| 137 | + // flow that never completes while the registry itself is reachable. |
| 138 | + srv := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, _ *http.Request) { |
| 139 | + w.Header().Set("Content-Type", "application/json") |
| 140 | + w.WriteHeader(http.StatusOK) |
| 141 | + _, _ = w.Write([]byte(`{"servers":[],"metadata":{"next_cursor":""}}`)) |
| 142 | + })) |
| 143 | + t.Cleanup(srv.Close) |
| 144 | + |
| 145 | + ts := &failingTokenSource{} |
| 146 | + |
| 147 | + // Non-nil tokenSource causes NewAPIRegistryProvider to skip its |
| 148 | + // construction-time validation probe (provider_api.go:57) — matching |
| 149 | + // the real OAuth-configured code path where the probe is skipped |
| 150 | + // because a browser flow cannot complete within 10 seconds. |
| 151 | + provider, err := NewCachedAPIRegistryProvider(srv.URL, true, false, ts) |
| 152 | + require.NoError(t, err, "constructor should succeed (probe skipped when tokenSource != nil)") |
| 153 | + |
| 154 | + // Populate the in-memory cache with a successful fetch. |
| 155 | + // ts.fail == false, so Token() returns ("", nil) and the request passes |
| 156 | + // through the auth transport without an Authorization header. |
| 157 | + _, err = provider.ListServers() |
| 158 | + require.NoError(t, err, "first fetch should succeed and populate cache") |
| 159 | + |
| 160 | + // Simulate the OAuth browser flow failing / timing out on the next |
| 161 | + // token acquisition. |
| 162 | + ts.fail.Store(true) |
| 163 | + |
| 164 | + err = provider.ForceRefresh() |
| 165 | + |
| 166 | + // DESIRED: an OAuth-flow failure must not be hidden by stale cache. |
| 167 | + // CURRENT BUG: refreshCache returns cached data with nil error. |
| 168 | + require.Error(t, err, |
| 169 | + "expected OAuth flow failure to propagate on refresh; got nil — "+ |
| 170 | + "stale cache is masking the OAuth failure (bug, "+ |
| 171 | + "matches user-reported scenario)") |
| 172 | +} |
0 commit comments