From 6cc96a5adb0dd727ad7df94e57b620b34a8508d8 Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 7 May 2026 18:39:06 +0200 Subject: [PATCH 1/4] auth: keep keyring backend on probe timeout during login A locked-but-reachable OS keyring surfaces as a probe TimeoutError. Today that's treated the same as a genuinely unavailable keyring: login commits to plaintext, even though the user is mid-typing in the unlock dialog and the keyring is about to be unlocked. Distinguish *TimeoutError from other probe errors. On timeout, stay on keyring regardless of whether secure was explicit; the unlock continues in parallel with the OAuth flow and the final Store completes against an unlocked keyring. Other probe errors keep current behavior. --- NEXT_CHANGELOG.md | 2 ++ cmd/auth/login.go | 10 +++++---- libs/auth/storage/cache.go | 20 ++++++++++++++++- libs/auth/storage/cache_test.go | 40 +++++++++++++++++++++++++++++++++ libs/auth/storage/keyring.go | 21 +++++++++++------ 5 files changed, 81 insertions(+), 12 deletions(-) diff --git a/NEXT_CHANGELOG.md b/NEXT_CHANGELOG.md index 409a875ad1..a0b41a2dd8 100644 --- a/NEXT_CHANGELOG.md +++ b/NEXT_CHANGELOG.md @@ -4,6 +4,8 @@ ### CLI +* `auth login` no longer falls back to plaintext when the OS keyring is reachable but locked. The unlock prompt shown by the probe now runs in parallel with the OAuth flow, and the token is stored in the keyring once the user has typed their password. + ### Bundles * Fixed `--force-pull` on `bundle summary` and `bundle open` so the flag bypasses the local state cache and reads state from the workspace. diff --git a/cmd/auth/login.go b/cmd/auth/login.go index c4d0851011..b6d0b3cb64 100644 --- a/cmd/auth/login.go +++ b/cmd/auth/login.go @@ -146,10 +146,12 @@ a new profile is created. ctx := cmd.Context() profileName := cmd.Flag("profile").Value.String() - // Resolve the cache before the browser step so a missing/locked keyring - // surfaces here rather than after the user completes OAuth. When secure - // is selected but the keyring is unreachable, this silently falls back - // to plaintext and persists auth_storage = plaintext for next time. + // Resolve the cache before the browser step. The probe also acts as + // a side-channel that triggers the OS keyring unlock prompt early, + // so the user can answer it while OAuth is running. A genuinely + // unavailable keyring still surfaces here rather than after OAuth; + // a locked keyring (probe times out) is non-fatal and login stays + // on the keyring backend. tokenCache, mode, err := storage.ResolveCacheForLogin(ctx, "") if err != nil { return err diff --git a/libs/auth/storage/cache.go b/libs/auth/storage/cache.go index 8e8b779afe..37db38b78f 100644 --- a/libs/auth/storage/cache.go +++ b/libs/auth/storage/cache.go @@ -2,6 +2,7 @@ package storage import ( "context" + "errors" "fmt" "github.com/databricks/cli/libs/databrickscfg" @@ -54,8 +55,13 @@ func ResolveCache(ctx context.Context, override StorageMode) (cache.TokenCache, // 2. When the user explicitly asked for secure (override, env var, or // config) but the keyring is unreachable, return an error. An explicit // "I want secure" is honored strictly: never silently downgrade. +// 3. When the probe times out, the keyring is reachable but locked — the +// OS unlock prompt is on screen and the user is mid-typing. Stay on +// keyring regardless of explicit. The unlock continues in parallel +// with the OAuth flow, and by the time the final Store runs the +// keyring will be unlocked. // -// Both rules are dormant today: the resolver default is plaintext, so +// Rules 1 and 2 are dormant today: the resolver default is plaintext, so // (mode=Secure, explicit=false) is unreachable. They activate when the // default flips to secure (MS4 / cli-ga). Wiring lands now so MS4 is a // single-line default flip plus pin-on-success additions. @@ -133,6 +139,18 @@ func applyLoginFallback(ctx context.Context, mode StorageMode, explicit bool, f return c, mode, nil case StorageModeSecure: if probeErr := f.probeKeyring(); probeErr != nil { + // A timeout means the keyring is reachable but locked: the OS + // unlock prompt is on screen and the user is mid-typing. Stay on + // keyring regardless of explicit; by the time OAuth finishes the + // prompt has been answered and the final Store will succeed + // against an unlocked keyring. Mirrors gh CLI's accidentally- + // similar flow where AuthTokenWriteable's early keyring.Get + // triggers the same dialog without committing to plaintext. + var timeoutErr *TimeoutError + if errors.As(probeErr, &timeoutErr) { + log.Debugf(ctx, "keyring probe timed out (%v); user is likely unlocking, staying on keyring", probeErr) + return f.newKeyring(), mode, nil + } if explicit { return nil, "", fmt.Errorf("secure storage was requested but the OS keyring is not reachable: %w", probeErr) } diff --git a/libs/auth/storage/cache_test.go b/libs/auth/storage/cache_test.go index 34d3f38c13..db059299dc 100644 --- a/libs/auth/storage/cache_test.go +++ b/libs/auth/storage/cache_test.go @@ -3,6 +3,7 @@ package storage import ( "context" "errors" + "fmt" "os" "path/filepath" "testing" @@ -232,6 +233,45 @@ func TestApplyLoginFallback_ExplicitSecure_ProbeFail_Errors(t *testing.T) { assert.Equal(t, "", persisted, "explicit-secure error must not write config") } +// A locked keyring with a slow user surfaces as TimeoutError. We want login +// to stay on the keyring so the final Store lands there once the user has +// finished unlocking, regardless of whether secure was explicit. Cover both +// the bare TimeoutError (in case probe wraps thinner in the future) and the +// real wrapped form returned by probeWithBackend. +func TestApplyLoginFallback_ProbeTimeout_StaysOnKeyring(t *testing.T) { + cases := []struct { + name string + explicit bool + probeErr error + }{ + {"default-secure, bare TimeoutError", false, &TimeoutError{Op: "set"}}, + {"default-secure, wrapped TimeoutError", false, fmt.Errorf("write: %w", &TimeoutError{Op: "set"})}, + {"explicit-secure, bare TimeoutError", true, &TimeoutError{Op: "set"}}, + {"explicit-secure, wrapped TimeoutError", true, fmt.Errorf("write: %w", &TimeoutError{Op: "set"})}, + } + + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + hermetic(t) + ctx := t.Context() + configPath := env.Get(ctx, "DATABRICKS_CONFIG_FILE") + + f := fakeFactories(t) + f.probeKeyring = func() error { return tc.probeErr } + + got, mode, err := applyLoginFallback(ctx, StorageModeSecure, tc.explicit, f) + + require.NoError(t, err) + assert.Equal(t, StorageModeSecure, mode) + assert.Equal(t, "keyring", got.(stubCache).source) + + persisted, gerr := databrickscfg.GetConfiguredAuthStorage(ctx, configPath) + require.NoError(t, gerr) + assert.Equal(t, "", persisted, "probe timeout must not persist plaintext fallback") + }) + } +} + func TestWrapForOAuthArgument(t *testing.T) { const ( host = "https://example.com" diff --git a/libs/auth/storage/keyring.go b/libs/auth/storage/keyring.go index 4f00b88152..caf92fa63e 100644 --- a/libs/auth/storage/keyring.go +++ b/libs/auth/storage/keyring.go @@ -88,14 +88,21 @@ func NewKeyringCache() cache.TokenCache { } } -// ProbeKeyring returns nil if the OS keyring is reachable and accepts a -// write+delete cycle within the standard timeout. A non-nil error means the -// keyring cannot be used in this environment (no backend, headless Linux -// session waiting on a UI prompt, locked keychain refusing access, etc.). +// ProbeKeyring returns nil if a write+delete cycle completed within the +// standard timeout. Callers distinguish two non-nil shapes via errors.As: // -// Used by databricks auth login to decide whether to silently fall back to -// plaintext storage before opening the browser, so the user does not -// complete an OAuth flow only to fail at the final Store call. +// - *TimeoutError: the keyring is reachable but locked. On Linux this +// usually means the GUI unlock prompt is up and the user has not +// finished typing yet. The login path treats this as "stay on keyring" +// because the prompt continues in parallel with OAuth and the final +// Store will succeed against the by-then-unlocked keyring. +// - any other error: the keyring is genuinely unavailable (no daemon, +// headless session with no secret service, ...). Login falls back to +// plaintext now rather than failing after OAuth. +// +// Probing also has a useful side effect: triggering the unlock prompt up +// front, before the browser step. The user can answer it while OAuth is in +// flight instead of after. func ProbeKeyring() error { return probeWithBackend(zalandoBackend{}, defaultKeyringTimeout) } From 2ec22bf0a7e9dafe52dad513fa9b6f7574a8cbeb Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 7 May 2026 18:56:18 +0200 Subject: [PATCH 2/4] auth: clarify timeout is indeterminate in probe contract The wrapper produces TimeoutError for any operation that misses the deadline, not only locked-but-reachable keyrings. The login path's optimistic stay-on-keyring is still right (cost of guessing wrong is one wasted OAuth ceremony, not a silently-plaintext token), but the docstring and inline comment overclaimed by labeling timeout as 'reachable but locked'. --- libs/auth/storage/cache.go | 16 ++++++++-------- libs/auth/storage/keyring.go | 22 ++++++++++++++-------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/libs/auth/storage/cache.go b/libs/auth/storage/cache.go index 37db38b78f..9e669115ae 100644 --- a/libs/auth/storage/cache.go +++ b/libs/auth/storage/cache.go @@ -139,16 +139,16 @@ func applyLoginFallback(ctx context.Context, mode StorageMode, explicit bool, f return c, mode, nil case StorageModeSecure: if probeErr := f.probeKeyring(); probeErr != nil { - // A timeout means the keyring is reachable but locked: the OS - // unlock prompt is on screen and the user is mid-typing. Stay on - // keyring regardless of explicit; by the time OAuth finishes the - // prompt has been answered and the final Store will succeed - // against an unlocked keyring. Mirrors gh CLI's accidentally- - // similar flow where AuthTokenWriteable's early keyring.Get - // triggers the same dialog without committing to plaintext. + // Timeout is indeterminate: usually a locked keyring with the + // GUI unlock prompt up and the user typing, but a hung or slow + // daemon produces the same error. Stay on keyring optimistically. + // If the user is unlocking, the final Store at end of OAuth lands + // in the now-unlocked keyring. If the keyring is genuinely stuck, + // the final Store also times out and login fails late, same + // outcome as failing here, but with no silently-plaintext token. var timeoutErr *TimeoutError if errors.As(probeErr, &timeoutErr) { - log.Debugf(ctx, "keyring probe timed out (%v); user is likely unlocking, staying on keyring", probeErr) + log.Debugf(ctx, "keyring probe timed out (%v); staying on keyring optimistically", probeErr) return f.newKeyring(), mode, nil } if explicit { diff --git a/libs/auth/storage/keyring.go b/libs/auth/storage/keyring.go index caf92fa63e..3bcca0a629 100644 --- a/libs/auth/storage/keyring.go +++ b/libs/auth/storage/keyring.go @@ -91,14 +91,20 @@ func NewKeyringCache() cache.TokenCache { // ProbeKeyring returns nil if a write+delete cycle completed within the // standard timeout. Callers distinguish two non-nil shapes via errors.As: // -// - *TimeoutError: the keyring is reachable but locked. On Linux this -// usually means the GUI unlock prompt is up and the user has not -// finished typing yet. The login path treats this as "stay on keyring" -// because the prompt continues in parallel with OAuth and the final -// Store will succeed against the by-then-unlocked keyring. -// - any other error: the keyring is genuinely unavailable (no daemon, -// headless session with no secret service, ...). Login falls back to -// plaintext now rather than failing after OAuth. +// - *TimeoutError: indeterminate. The keyring did not respond within +// the timeout. The common cause on Linux is a locked collection +// waiting on a GUI unlock prompt with the user mid-typing, but a +// hung or slow daemon produces the same shape. The login path +// optimistically stays on keyring: if the user is unlocking, the +// prompt continues in parallel with OAuth and the final Store +// succeeds against an unlocked keyring; if the keyring is genuinely +// stuck, the final Store also times out and login fails late +// instead of early. The cost of guessing wrong is one wasted OAuth +// ceremony, not a silently-plaintext token. +// - any other error: the keyring returned a definitive failure (no +// daemon, headless session with no secret service, dismissed prompt, +// ...). Login falls back to plaintext now rather than failing after +// OAuth. // // Probing also has a useful side effect: triggering the unlock prompt up // front, before the browser step. The user can answer it while OAuth is in From 1330327c875003c3b7648aac1fd70d841f83e03f Mon Sep 17 00:00:00 2001 From: simon Date: Thu, 7 May 2026 19:00:03 +0200 Subject: [PATCH 3/4] auth: trim verbose comments around keyring probe The doc and inline comments narrated the trade-off in too much detail. Comments should explain WHY when non-obvious, not walk through every branch. The decision and its trade-off are clear in the code; the comment just needs to flag that the timeout case is intentional and not a silent downgrade. --- cmd/auth/login.go | 10 ++++------ libs/auth/storage/cache.go | 21 ++++++++------------- libs/auth/storage/keyring.go | 25 ++++--------------------- 3 files changed, 16 insertions(+), 40 deletions(-) diff --git a/cmd/auth/login.go b/cmd/auth/login.go index b6d0b3cb64..814a4fe505 100644 --- a/cmd/auth/login.go +++ b/cmd/auth/login.go @@ -146,12 +146,10 @@ a new profile is created. ctx := cmd.Context() profileName := cmd.Flag("profile").Value.String() - // Resolve the cache before the browser step. The probe also acts as - // a side-channel that triggers the OS keyring unlock prompt early, - // so the user can answer it while OAuth is running. A genuinely - // unavailable keyring still surfaces here rather than after OAuth; - // a locked keyring (probe times out) is non-fatal and login stays - // on the keyring backend. + // Resolve the cache before the browser step so an unavailable + // keyring surfaces here rather than after OAuth. The probe also + // triggers the OS unlock prompt, which the user can answer during + // OAuth. tokenCache, mode, err := storage.ResolveCacheForLogin(ctx, "") if err != nil { return err diff --git a/libs/auth/storage/cache.go b/libs/auth/storage/cache.go index 9e669115ae..e60a0bb997 100644 --- a/libs/auth/storage/cache.go +++ b/libs/auth/storage/cache.go @@ -55,11 +55,9 @@ func ResolveCache(ctx context.Context, override StorageMode) (cache.TokenCache, // 2. When the user explicitly asked for secure (override, env var, or // config) but the keyring is unreachable, return an error. An explicit // "I want secure" is honored strictly: never silently downgrade. -// 3. When the probe times out, the keyring is reachable but locked — the -// OS unlock prompt is on screen and the user is mid-typing. Stay on -// keyring regardless of explicit. The unlock continues in parallel -// with the OAuth flow, and by the time the final Store runs the -// keyring will be unlocked. +// 3. When the probe times out, stay on keyring regardless of explicit. +// The timeout is ambiguous (locked vs hung); a misdiagnosis fails +// the final Store rather than silently downgrading to plaintext. // // Rules 1 and 2 are dormant today: the resolver default is plaintext, so // (mode=Secure, explicit=false) is unreachable. They activate when the @@ -139,16 +137,13 @@ func applyLoginFallback(ctx context.Context, mode StorageMode, explicit bool, f return c, mode, nil case StorageModeSecure: if probeErr := f.probeKeyring(); probeErr != nil { - // Timeout is indeterminate: usually a locked keyring with the - // GUI unlock prompt up and the user typing, but a hung or slow - // daemon produces the same error. Stay on keyring optimistically. - // If the user is unlocking, the final Store at end of OAuth lands - // in the now-unlocked keyring. If the keyring is genuinely stuck, - // the final Store also times out and login fails late, same - // outcome as failing here, but with no silently-plaintext token. + // Stay on keyring on timeout: a locked keyring being unlocked + // during OAuth is the common case, and a misdiagnosed hang + // fails the final Store anyway, which is better than a + // silent plaintext downgrade. var timeoutErr *TimeoutError if errors.As(probeErr, &timeoutErr) { - log.Debugf(ctx, "keyring probe timed out (%v); staying on keyring optimistically", probeErr) + log.Debugf(ctx, "keyring probe timed out (%v); staying on keyring", probeErr) return f.newKeyring(), mode, nil } if explicit { diff --git a/libs/auth/storage/keyring.go b/libs/auth/storage/keyring.go index 3bcca0a629..8ce87525b3 100644 --- a/libs/auth/storage/keyring.go +++ b/libs/auth/storage/keyring.go @@ -88,27 +88,10 @@ func NewKeyringCache() cache.TokenCache { } } -// ProbeKeyring returns nil if a write+delete cycle completed within the -// standard timeout. Callers distinguish two non-nil shapes via errors.As: -// -// - *TimeoutError: indeterminate. The keyring did not respond within -// the timeout. The common cause on Linux is a locked collection -// waiting on a GUI unlock prompt with the user mid-typing, but a -// hung or slow daemon produces the same shape. The login path -// optimistically stays on keyring: if the user is unlocking, the -// prompt continues in parallel with OAuth and the final Store -// succeeds against an unlocked keyring; if the keyring is genuinely -// stuck, the final Store also times out and login fails late -// instead of early. The cost of guessing wrong is one wasted OAuth -// ceremony, not a silently-plaintext token. -// - any other error: the keyring returned a definitive failure (no -// daemon, headless session with no secret service, dismissed prompt, -// ...). Login falls back to plaintext now rather than failing after -// OAuth. -// -// Probing also has a useful side effect: triggering the unlock prompt up -// front, before the browser step. The user can answer it while OAuth is in -// flight instead of after. +// ProbeKeyring returns nil if the OS keyring accepted a write+delete +// cycle within the standard timeout. *TimeoutError means the keyring +// was unresponsive (locked or hung, indistinguishable here); other +// errors are definitive failures. func ProbeKeyring() error { return probeWithBackend(zalandoBackend{}, defaultKeyringTimeout) } From 89bc483b747c85d923e5887f1d8280eff8b9e7d0 Mon Sep 17 00:00:00 2001 From: simon Date: Fri, 8 May 2026 15:23:07 +0200 Subject: [PATCH 4/4] auth: clarify dormant rule comments in storage resolver Drops internal milestone jargon (MS4 / cli-ga) from doc comments and aligns wording with the package doc in mode.go (which already says "at GA"). No behavior change. Co-authored-by: Isaac --- libs/auth/storage/cache.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/libs/auth/storage/cache.go b/libs/auth/storage/cache.go index e60a0bb997..069156c0d3 100644 --- a/libs/auth/storage/cache.go +++ b/libs/auth/storage/cache.go @@ -61,8 +61,7 @@ func ResolveCache(ctx context.Context, override StorageMode) (cache.TokenCache, // // Rules 1 and 2 are dormant today: the resolver default is plaintext, so // (mode=Secure, explicit=false) is unreachable. They activate when the -// default flips to secure (MS4 / cli-ga). Wiring lands now so MS4 is a -// single-line default flip plus pin-on-success additions. +// default flips to secure at GA. // // Login-specific. Read paths (auth token, bundle commands) keep the original // keyring error so they don't silently mint plaintext copies of tokens that @@ -124,7 +123,7 @@ func resolveCacheForLoginWith(ctx context.Context, override StorageMode, f cache // // Pin-on-success across modes (locking in the first working behavior to // insulate users from keyring flakiness) is intentionally not implemented -// here. It lands with MS4 alongside the default flip; pinning before the +// here. It lands at GA alongside the default flip; pinning before the // flip would freeze every default user into plaintext and make the flip a // no-op for them. func applyLoginFallback(ctx context.Context, mode StorageMode, explicit bool, f cacheFactories) (cache.TokenCache, StorageMode, error) { @@ -172,7 +171,7 @@ func applyLoginFallback(ctx context.Context, mode StorageMode, explicit bool, f // Only called on the (mode=Secure, explicit=false) probe-failure branch. That // branch is unreachable today (resolver default is plaintext), so this is // dormant infrastructure: it activates when the default flips to secure -// (MS4) and lets default-on-broken-keyring users avoid a 3s probe on every +// at GA and lets default-on-broken-keyring users avoid a 3s probe on every // command. func persistPlaintextFallback(ctx context.Context) error { configPath := env.Get(ctx, "DATABRICKS_CONFIG_FILE")