Skip to content

Commit a15feae

Browse files
committed
Fall back to file cache on read when keyring is unreachable
Mirrors the login-path fallback for the read path. When the resolver returns (mode=Secure, source=Default) and the keyring probes as definitively unreachable, ResolveCache now returns the file cache instead of the keyring. This means users upgrading on systems without a usable keyring (e.g. Linux containers without a D-Bus session bus) keep reading from their pre-upgrade token-cache.json without any manual configuration. Two behaviours intentionally differ from the login-path fallback: 1. The probe is read-only (ProbeKeyringRead does a Get on a non-existent account name, treating keyring.ErrNotFound as "reachable"). The login-path probe still uses the write+delete cycle because login needs to validate write capability before committing tokens. 2. The read-path fallback does NOT persist auth_storage = plaintext to [__settings__]. A read failure could be transient (e.g. a momentarily broken D-Bus connection) and we don't have strong enough evidence to pin the user out of the keyring permanently. Pinning stays the responsibility of the login command, which has the stronger write-probe signal and an explicit user action. Timeouts in the read probe stay on the keyring (same logic as the login path): a locked keyring being unlocked is the most common timeout case, and a misdiagnosed hang fails the actual Lookup, which is preferable to silently routing reads to a different backend. Explicit-secure callers (env var or config) get the keyring cache regardless of probe result, so an explicit "I want secure" request surfaces the unreachability instead of being silently downgraded. Co-authored-by: Isaac
1 parent 03c3d02 commit a15feae

5 files changed

Lines changed: 280 additions & 16 deletions

File tree

NEXT_CHANGELOG.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
### Notable Changes
66

7-
* Breaking change: OAuth tokens for interactive logins (`auth_type = databricks-cli`) are now stored in the OS-native secure store by default (Keychain on macOS, Credential Manager on Windows, Secret Service on Linux) instead of `~/.databricks/token-cache.json`. After upgrading, run `databricks auth login` once per profile to re-authenticate; cached tokens from older versions are not migrated. To keep the previous file-backed storage, set `DATABRICKS_AUTH_STORAGE=plaintext` or add `auth_storage = plaintext` under `[__settings__]` in `~/.databrickscfg` (the env var takes precedence over the config setting), then re-run `databricks auth login`.
7+
* Breaking change: OAuth tokens for interactive logins (`auth_type = databricks-cli`) are now stored in the OS-native secure store by default (Keychain on macOS, Credential Manager on Windows, Secret Service on Linux) instead of `~/.databricks/token-cache.json`. After upgrading, run `databricks auth login` once per profile to re-authenticate; cached tokens from older versions are not migrated. To keep the previous file-backed storage, set `DATABRICKS_AUTH_STORAGE=plaintext` or add `auth_storage = plaintext` under `[__settings__]` in `~/.databrickscfg` (the env var takes precedence over the config setting), then re-run `databricks auth login`. On systems where the OS keyring is not reachable (e.g. Linux containers without a D-Bus session bus), the CLI transparently falls back to the file cache when reading tokens so legacy `token-cache.json` entries remain accessible without manual configuration.
88

99
### CLI
1010

libs/auth/storage/cache.go

Lines changed: 76 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,17 +16,19 @@ import (
1616
// so unit tests can inject stubs without hitting the real OS keyring or
1717
// filesystem. Production code uses defaultCacheFactories().
1818
type cacheFactories struct {
19-
newFile func(context.Context) (cache.TokenCache, error)
20-
newKeyring func() cache.TokenCache
21-
probeKeyring func() error
19+
newFile func(context.Context) (cache.TokenCache, error)
20+
newKeyring func() cache.TokenCache
21+
probeKeyring func() error
22+
probeKeyringRead func() error
2223
}
2324

2425
// defaultCacheFactories returns the production factory set.
2526
func defaultCacheFactories() cacheFactories {
2627
return cacheFactories{
27-
newFile: func(ctx context.Context) (cache.TokenCache, error) { return NewFileTokenCache(ctx) },
28-
newKeyring: NewKeyringCache,
29-
probeKeyring: ProbeKeyring,
28+
newFile: func(ctx context.Context) (cache.TokenCache, error) { return NewFileTokenCache(ctx) },
29+
newKeyring: NewKeyringCache,
30+
probeKeyring: ProbeKeyring,
31+
probeKeyringRead: ProbeKeyringRead,
3032
}
3133
}
3234

@@ -37,11 +39,18 @@ func defaultCacheFactories() cacheFactories {
3739
// override is usually the command-level flag value. Pass "" when the command
3840
// has no flag; precedence then falls through to env -> config -> default.
3941
//
42+
// When the resolver returns (mode=Secure, source=Default) and the OS
43+
// keyring is definitively unreachable (a non-timeout probe error), reads
44+
// fall back to the plaintext file cache so post-upgrade users with legacy
45+
// token-cache.json entries are not stranded. Unlike the login path, this
46+
// fallback does not persist auth_storage = plaintext to [__settings__];
47+
// pinning happens only on successful login.
48+
//
4049
// Every CLI code path that calls u2m.NewPersistentAuth must route the result
4150
// through u2m.WithTokenCache, otherwise the SDK defaults to the file cache
4251
// and splits the user's tokens across two backends.
4352
func ResolveCache(ctx context.Context, override StorageMode) (cache.TokenCache, StorageMode, error) {
44-
inner, mode, err := resolveCacheWith(ctx, override, defaultCacheFactories())
53+
inner, mode, err := resolveCacheForReadWith(ctx, override, defaultCacheFactories())
4554
if err != nil {
4655
return nil, "", err
4756
}
@@ -89,8 +98,11 @@ func WrapForOAuthArgument(tokenCache cache.TokenCache, mode StorageMode, arg u2m
8998
return NewDualWritingTokenCache(tokenCache, arg)
9099
}
91100

92-
// resolveCacheWith is the pure form of ResolveCache. It takes the factory
93-
// set as a parameter so tests can inject stubs.
101+
// resolveCacheWith is the pure form of ResolveCache without the read-path
102+
// fallback. Takes the factory set as a parameter so tests can inject stubs.
103+
// Used directly by ResolveCacheForLogin (which has its own fallback rules)
104+
// and indirectly by ResolveCache (which adds the read-path fallback in
105+
// resolveCacheForReadWith).
94106
func resolveCacheWith(ctx context.Context, override StorageMode, f cacheFactories) (cache.TokenCache, StorageMode, error) {
95107
mode, err := ResolveStorageMode(ctx, override)
96108
if err != nil {
@@ -110,6 +122,61 @@ func resolveCacheWith(ctx context.Context, override StorageMode, f cacheFactorie
110122
}
111123
}
112124

125+
// resolveCacheForReadWith is the pure form of ResolveCache. It applies the
126+
// read-path fallback: when mode is secure-from-default and the keyring
127+
// probes as definitively unavailable, return the file cache instead.
128+
// Timeouts keep the keyring (could be transient).
129+
func resolveCacheForReadWith(ctx context.Context, override StorageMode, f cacheFactories) (cache.TokenCache, StorageMode, error) {
130+
mode, source, err := ResolveStorageModeWithSource(ctx, override)
131+
if err != nil {
132+
return nil, "", err
133+
}
134+
return applyReadFallback(ctx, mode, source.Explicit(), f)
135+
}
136+
137+
// applyReadFallback realizes the read-path fallback. Mirrors
138+
// applyLoginFallback but:
139+
//
140+
// - Uses a read-only probe (ProbeKeyringRead) so calls do not write to
141+
// the keyring on every CLI invocation.
142+
// - Does not persist auth_storage = plaintext. Pinning happens only on
143+
// successful login, where the write-probe gives us stronger evidence
144+
// that the keyring is truly unavailable on this machine.
145+
//
146+
// Explicit secure is honored: callers who asked for secure get the keyring
147+
// cache even if the probe fails, so the actual Lookup error surfaces the
148+
// unreachability instead of silently using a different backend.
149+
func applyReadFallback(ctx context.Context, mode StorageMode, explicit bool, f cacheFactories) (cache.TokenCache, StorageMode, error) {
150+
switch mode {
151+
case StorageModePlaintext:
152+
c, err := f.newFile(ctx)
153+
if err != nil {
154+
return nil, "", fmt.Errorf("open file token cache: %w", err)
155+
}
156+
return c, mode, nil
157+
case StorageModeSecure:
158+
if explicit {
159+
return f.newKeyring(), mode, nil
160+
}
161+
if probeErr := f.probeKeyringRead(); probeErr != nil {
162+
var timeoutErr *TimeoutError
163+
if errors.As(probeErr, &timeoutErr) {
164+
log.Debugf(ctx, "keyring read probe timed out (%v); staying on keyring", probeErr)
165+
return f.newKeyring(), mode, nil
166+
}
167+
log.Debugf(ctx, "secure storage unavailable on read path (%v), using file cache", probeErr)
168+
fileCache, fileErr := f.newFile(ctx)
169+
if fileErr != nil {
170+
return nil, "", fmt.Errorf("open file token cache: %w", fileErr)
171+
}
172+
return fileCache, StorageModePlaintext, nil
173+
}
174+
return f.newKeyring(), mode, nil
175+
default:
176+
return nil, "", fmt.Errorf("unsupported storage mode %q", string(mode))
177+
}
178+
}
179+
113180
// resolveCacheForLoginWith is the pure form of ResolveCacheForLogin. It takes
114181
// the factory set as a parameter so tests can inject stubs.
115182
func resolveCacheForLoginWith(ctx context.Context, override StorageMode, f cacheFactories) (cache.TokenCache, StorageMode, error) {

libs/auth/storage/cache_test.go

Lines changed: 115 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,10 @@ func (stubCache) Lookup(string) (*oauth2.Token, error) { return nil, cache.ErrNo
2828
func fakeFactories(t *testing.T) cacheFactories {
2929
t.Helper()
3030
return cacheFactories{
31-
newFile: func(context.Context) (cache.TokenCache, error) { return stubCache{source: "file"}, nil },
32-
newKeyring: func() cache.TokenCache { return stubCache{source: "keyring"} },
33-
probeKeyring: func() error { return nil },
31+
newFile: func(context.Context) (cache.TokenCache, error) { return stubCache{source: "file"}, nil },
32+
newKeyring: func() cache.TokenCache { return stubCache{source: "keyring"} },
33+
probeKeyring: func() error { return nil },
34+
probeKeyringRead: func() error { return nil },
3435
}
3536
}
3637

@@ -111,9 +112,10 @@ func TestResolveCache_FileFactoryErrorPropagates(t *testing.T) {
111112
ctx := t.Context()
112113
boom := errors.New("disk full")
113114
factories := cacheFactories{
114-
newFile: func(context.Context) (cache.TokenCache, error) { return nil, boom },
115-
newKeyring: func() cache.TokenCache { return stubCache{source: "keyring"} },
116-
probeKeyring: func() error { return nil },
115+
newFile: func(context.Context) (cache.TokenCache, error) { return nil, boom },
116+
newKeyring: func() cache.TokenCache { return stubCache{source: "keyring"} },
117+
probeKeyring: func() error { return nil },
118+
probeKeyringRead: func() error { return nil },
117119
}
118120

119121
_, _, err := resolveCacheWith(ctx, StorageModePlaintext, factories)
@@ -122,6 +124,113 @@ func TestResolveCache_FileFactoryErrorPropagates(t *testing.T) {
122124
assert.ErrorIs(t, err, boom)
123125
}
124126

127+
// applyReadFallback mirrors applyLoginFallback's logic on the read path:
128+
// keyring is probed read-only, definitive failures fall through to the file
129+
// cache so legacy plaintext tokens stay reachable, timeouts stay on the
130+
// keyring, and explicit-secure is honored even when the probe fails.
131+
132+
func TestApplyReadFallback_PlaintextSkipsProbe(t *testing.T) {
133+
hermetic(t)
134+
ctx := t.Context()
135+
probed := false
136+
f := fakeFactories(t)
137+
f.probeKeyringRead = func() error {
138+
probed = true
139+
return nil
140+
}
141+
142+
got, mode, err := applyReadFallback(ctx, StorageModePlaintext, false, f)
143+
144+
require.NoError(t, err)
145+
assert.Equal(t, StorageModePlaintext, mode)
146+
assert.Equal(t, "file", got.(stubCache).source)
147+
assert.False(t, probed, "probe must not run when mode is already plaintext")
148+
}
149+
150+
func TestApplyReadFallback_ExplicitSecureSkipsProbe(t *testing.T) {
151+
hermetic(t)
152+
ctx := t.Context()
153+
probed := false
154+
f := fakeFactories(t)
155+
f.probeKeyringRead = func() error {
156+
probed = true
157+
return errors.New("unreachable")
158+
}
159+
160+
got, mode, err := applyReadFallback(ctx, StorageModeSecure, true, f)
161+
162+
require.NoError(t, err)
163+
assert.Equal(t, StorageModeSecure, mode)
164+
assert.Equal(t, "keyring", got.(stubCache).source)
165+
assert.False(t, probed, "probe must not run when user is explicit about secure mode")
166+
}
167+
168+
func TestApplyReadFallback_DefaultSecure_ProbeOK_UsesKeyring(t *testing.T) {
169+
hermetic(t)
170+
ctx := t.Context()
171+
172+
got, mode, err := applyReadFallback(ctx, StorageModeSecure, false, fakeFactories(t))
173+
174+
require.NoError(t, err)
175+
assert.Equal(t, StorageModeSecure, mode)
176+
assert.Equal(t, "keyring", got.(stubCache).source)
177+
}
178+
179+
func TestApplyReadFallback_DefaultSecure_ProbeFail_FallsBack(t *testing.T) {
180+
hermetic(t)
181+
ctx := t.Context()
182+
configPath := env.Get(ctx, "DATABRICKS_CONFIG_FILE")
183+
184+
f := fakeFactories(t)
185+
f.probeKeyringRead = func() error { return errors.New("no keyring") }
186+
187+
got, mode, err := applyReadFallback(ctx, StorageModeSecure, false, f)
188+
189+
require.NoError(t, err)
190+
assert.Equal(t, StorageModePlaintext, mode)
191+
assert.Equal(t, "file", got.(stubCache).source)
192+
193+
// Read-path fallback must NOT pin: pinning is reserved for login,
194+
// where the write-probe gives stronger evidence of unavailability.
195+
persisted, gerr := databrickscfg.GetConfiguredAuthStorage(ctx, configPath)
196+
require.NoError(t, gerr)
197+
assert.Equal(t, "", persisted, "read-path fallback must not persist auth_storage")
198+
}
199+
200+
// A timeout could mean a locked keyring that will work once the user unlocks
201+
// it. Stay on the keyring so the actual Lookup surfaces the real outcome
202+
// rather than silently routing reads to the file cache.
203+
func TestApplyReadFallback_DefaultSecure_ProbeTimeout_StaysOnKeyring(t *testing.T) {
204+
cases := []struct {
205+
name string
206+
probeErr error
207+
}{
208+
{"bare TimeoutError", &TimeoutError{Op: "get"}},
209+
{"wrapped TimeoutError", fmt.Errorf("get: %w", &TimeoutError{Op: "get"})},
210+
}
211+
212+
for _, tc := range cases {
213+
t.Run(tc.name, func(t *testing.T) {
214+
hermetic(t)
215+
ctx := t.Context()
216+
configPath := env.Get(ctx, "DATABRICKS_CONFIG_FILE")
217+
218+
f := fakeFactories(t)
219+
f.probeKeyringRead = func() error { return tc.probeErr }
220+
221+
got, mode, err := applyReadFallback(ctx, StorageModeSecure, false, f)
222+
223+
require.NoError(t, err)
224+
assert.Equal(t, StorageModeSecure, mode)
225+
assert.Equal(t, "keyring", got.(stubCache).source)
226+
227+
persisted, gerr := databrickscfg.GetConfiguredAuthStorage(ctx, configPath)
228+
require.NoError(t, gerr)
229+
assert.Equal(t, "", persisted, "probe timeout must not persist anything")
230+
})
231+
}
232+
}
233+
125234
func TestResolveCacheForLogin_PlaintextSkipsProbe(t *testing.T) {
126235
hermetic(t)
127236
ctx := t.Context()

libs/auth/storage/keyring.go

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ func NewKeyringCache() cache.TokenCache {
9292
// cycle within the standard timeout. *TimeoutError means the keyring
9393
// was unresponsive (locked or hung, indistinguishable here); other
9494
// errors are definitive failures.
95+
//
96+
// Used by the login path, where we want to validate both read and write
97+
// capability before committing to the keyring backend.
9598
func ProbeKeyring() error {
9699
return probeWithBackend(zalandoBackend{}, defaultKeyringTimeout)
97100
}
@@ -113,6 +116,35 @@ func probeWithBackend(backend keyringBackend, timeout time.Duration) error {
113116
return nil
114117
}
115118

119+
// ProbeKeyringRead returns nil if the OS keyring accepted a Get for a
120+
// non-existent account within the standard timeout (i.e. the backend is
121+
// reachable and responded with keyring.ErrNotFound). *TimeoutError means
122+
// the keyring was unresponsive; other errors are definitive failures.
123+
//
124+
// Used by the read path so probing does not write to the keyring. A
125+
// successful probe is indistinguishable from the user not having an
126+
// entry for that probe account; we treat both as "reachable".
127+
func ProbeKeyringRead() error {
128+
return probeReadWithBackend(zalandoBackend{}, defaultKeyringTimeout)
129+
}
130+
131+
func probeReadWithBackend(backend keyringBackend, timeout time.Duration) error {
132+
c := &keyringCache{
133+
backend: backend,
134+
timeout: timeout,
135+
keyringSvcName: keyringServiceName,
136+
}
137+
account := keyringProbeAccountPrefix + uuid.NewString()
138+
err := c.withTimeout("get", func() error {
139+
_, gerr := c.backend.Get(c.keyringSvcName, account)
140+
return gerr
141+
})
142+
if errors.Is(err, keyring.ErrNotFound) {
143+
return nil
144+
}
145+
return err
146+
}
147+
116148
// Store stores t under key. Nil t deletes the entry; deleting a missing
117149
// entry is not an error.
118150
func (k *keyringCache) Store(key string, t *oauth2.Token) error {

libs/auth/storage/keyring_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -296,3 +296,59 @@ func TestProbeKeyring(t *testing.T) {
296296
})
297297
}
298298
}
299+
300+
func TestProbeKeyringRead(t *testing.T) {
301+
boom := errors.New("backend boom")
302+
cases := []struct {
303+
name string
304+
getErr error
305+
getBlock bool
306+
timeout time.Duration
307+
wantErr error
308+
wantTimeout bool
309+
}{
310+
{
311+
// keyring.ErrNotFound is the success signal: the backend
312+
// responded that no entry exists for our probe account,
313+
// which means it is reachable.
314+
name: "ErrNotFound counts as reachable",
315+
getErr: keyring.ErrNotFound,
316+
timeout: 100 * time.Millisecond,
317+
},
318+
{
319+
name: "other backend error propagates",
320+
getErr: boom,
321+
timeout: 100 * time.Millisecond,
322+
wantErr: boom,
323+
},
324+
{
325+
name: "get times out",
326+
getBlock: true,
327+
timeout: 50 * time.Millisecond,
328+
wantTimeout: true,
329+
},
330+
}
331+
332+
for _, tc := range cases {
333+
t.Run(tc.name, func(t *testing.T) {
334+
backend := newFakeBackend()
335+
backend.getErr = tc.getErr
336+
backend.getBlock = tc.getBlock
337+
338+
err := probeReadWithBackend(backend, tc.timeout)
339+
340+
switch {
341+
case tc.wantErr != nil:
342+
require.Error(t, err)
343+
assert.ErrorIs(t, err, tc.wantErr)
344+
case tc.wantTimeout:
345+
require.Error(t, err)
346+
var timeoutErr *TimeoutError
347+
assert.ErrorAs(t, err, &timeoutErr)
348+
default:
349+
require.NoError(t, err)
350+
assert.Empty(t, backend.items, "read probe must not write to the keyring")
351+
}
352+
})
353+
}
354+
}

0 commit comments

Comments
 (0)