From a65ec779f94041323a2058e118ec629a038fca79 Mon Sep 17 00:00:00 2001 From: James Sadler Date: Thu, 18 Jun 2026 11:29:34 +1000 Subject: [PATCH 1/2] fix: correct access-key token expiry interpretation in vendored stack-auth (CIP-3233) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit stack-auth's AccessKeyRefresher computed expires_at as `now + auth_resp.expiry`, but CTS /api/authorise returns `expiry` as an ABSOLUTE Unix epoch (the JWT `exp` claim), not a relative duration. The sum landed ~decades in the future, so AutoRefresh never considered the token expired and never refreshed it; ZeroKMS enforced the real ~15-min exp, so encrypt/decrypt failed ~15 min after startup until the pod restarted. Use the value as-is: `expires_at: auth_resp.expiry`. Also corrects the access-key test fixtures, which mocked `expiry` as a small relative value (e.g. 3600) and thereby hid the bug — they now model an absolute epoch (now + N) like the real CTS. Adds a regression test asserting an absolute `expiry` yields expires_in ~= the intended TTL (fails under the pre-fix `now + expiry` arithmetic). This is the actual root cause of the customer's 15-minute failures; the 2.2.3 CancelGuard backport (CIP-3159) is unrelated hardening and did not help. Confirmed against a live production token: response.expiry == JWT exp (absolute), exp - iat == 900. --- CHANGELOG.md | 4 + vendor/stack-auth/src/access_key_refresher.rs | 76 ++++++++++++++++--- 2 files changed, 71 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8abd6201..8656c386 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +### Fixed + +- **ZeroKMS authentication failures ~15 minutes after startup (access keys)**: Fixed the root cause of access tokens never being renewed when authenticating with an access key. The token's lifetime was misread, so renewal never triggered and every encrypt/decrypt operation began failing (`ZeroKMS error: Request not authorized`, "Could not decrypt data") roughly 15 minutes — the token lifetime — after connecting, recovering only on restart. Tokens now renew correctly ahead of expiry. This resolves the remaining cases not addressed by the 2.2.3 fix. + ## [2.2.3] - 2026-06-17 ### Fixed diff --git a/vendor/stack-auth/src/access_key_refresher.rs b/vendor/stack-auth/src/access_key_refresher.rs index f424cb04..396babcc 100644 --- a/vendor/stack-auth/src/access_key_refresher.rs +++ b/vendor/stack-auth/src/access_key_refresher.rs @@ -1,5 +1,4 @@ use std::sync::Arc; -use std::time::{SystemTime, UNIX_EPOCH}; use url::Url; @@ -67,15 +66,17 @@ impl Refresher for AccessKeyRefresher { } let auth_resp: AuthoriseResponse = resp.json().await?; - let now = SystemTime::now() - .duration_since(UNIX_EPOCH) - .unwrap_or_default() - .as_secs(); Ok(Token { access_token: auth_resp.access_token, token_type: "Bearer".to_string(), - expires_at: now + auth_resp.expiry, + // CTS `/api/authorise` returns `expiry` as an ABSOLUTE Unix epoch (it is + // the JWT `exp` claim), NOT a relative duration. The previous `now + expiry` + // pushed the local expiry decades into the future, so `AutoRefresh` never + // considered the token expired and never refreshed it — the token then + // silently died at its real (~15 min) `exp` and every request failed until + // the process restarted. Use the value as-is. See CIP-3233. + expires_at: auth_resp.expiry, refresh_token: None, region: None, client_id: None, @@ -107,10 +108,17 @@ mod tests { use std::sync::Arc; use std::time::{SystemTime, UNIX_EPOCH}; - fn auth_response_json(access: &str, expiry: u64) -> serde_json::Value { + /// Build a mock `/api/authorise` response. CTS returns `expiry` as an + /// ABSOLUTE Unix epoch (the JWT `exp` claim), so model that faithfully: the + /// token is valid for `expires_in_secs` from now. + fn auth_response_json(access: &str, expires_in_secs: u64) -> serde_json::Value { + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); serde_json::json!({ "accessToken": access, - "expiry": expiry + "expiry": now + expires_in_secs }) } @@ -146,6 +154,50 @@ mod tests { } } + // ---- Regression: CTS `expiry` is an absolute epoch (CIP-3233) ---- + + /// CTS `/api/authorise` returns `expiry` as an ABSOLUTE Unix epoch (the JWT + /// `exp` claim), not a relative duration. The refresher must use it as-is. + /// + /// Pre-fix (`expires_at = now + expiry`), this token's `expires_at` lands + /// ~decades in the future, so `is_expired()` is never true — the token never + /// refreshes and silently dies at its real ~15-minute `exp`. The assertion + /// below fails under the pre-fix arithmetic (`expires_in()` ≈ 1.7e9) and + /// passes with the fix (`expires_in()` ≈ 900). + #[tokio::test] + async fn access_key_expiry_is_absolute_epoch_not_relative() { + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); + let absolute_expiry = now + 900; // a 15-minute token, as an absolute epoch + + let mut mocks = MockSet::new(); + mocks.mock(move |when, then| { + when.post().path("/api/authorise"); + then.json(serde_json::json!({ + "accessToken": "tok", + "expiry": absolute_expiry + })); + }); + let server = start_server(mocks).await; + + let refresher = + AccessKeyRefresher::new(SecretToken::new("CSAKid.secret"), server.url(""), None); + let token = refresher.refresh(&()).await.unwrap(); + + assert!( + token.expires_in() <= 1000, + "expires_in should be ~900s (absolute `expiry` used as-is); got {} \ + — pre-fix `now + expiry` yields ~1.7e9", + token.expires_in() + ); + assert!( + !token.is_expired(), + "a fresh 15-minute token must not be reported as already expired" + ); + } + // ---- Initial auth tests ---- #[tokio::test] @@ -405,9 +457,15 @@ mod tests { state.counting.enter(); tokio::time::sleep(state.delay).await; state.counting.exit(); + // CTS returns `expiry` as an absolute epoch (JWT `exp`); model a token + // valid for 1 hour from now. + let now = SystemTime::now() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs(); axum::Json(serde_json::json!({ "accessToken": "refreshed-token", - "expiry": 3600 + "expiry": now + 3600 })) } From a239ebc1eca994d2bf99d8ec8bc7d798426b82fd Mon Sep 17 00:00:00 2001 From: James Sadler Date: Thu, 18 Jun 2026 11:30:29 +1000 Subject: [PATCH 2/2] chore: prepare v2.2.4 release Patch release carrying the access-key token-expiry fix (CIP-3233): bump workspace version 2.2.3 -> 2.2.4 and promote the Unreleased CHANGELOG entry to [2.2.4]. --- CHANGELOG.md | 5 ++++- Cargo.lock | 6 +++--- Cargo.toml | 2 +- 3 files changed, 8 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8656c386..db3602a8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). ## [Unreleased] +## [2.2.4] - 2026-06-18 + ### Fixed - **ZeroKMS authentication failures ~15 minutes after startup (access keys)**: Fixed the root cause of access tokens never being renewed when authenticating with an access key. The token's lifetime was misread, so renewal never triggered and every encrypt/decrypt operation began failing (`ZeroKMS error: Request not authorized`, "Could not decrypt data") roughly 15 minutes — the token lifetime — after connecting, recovering only on restart. Tokens now renew correctly ahead of expiry. This resolves the remaining cases not addressed by the 2.2.3 fix. @@ -271,7 +273,8 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/). - Integration with CipherStash ZeroKMS. - Encrypt Query Language (EQL) for indexing and searching encrypted data. -[Unreleased]: https://github.com/cipherstash/proxy/compare/v2.2.3...HEAD +[Unreleased]: https://github.com/cipherstash/proxy/compare/v2.2.4...HEAD +[2.2.4]: https://github.com/cipherstash/proxy/compare/v2.2.3...v2.2.4 [2.2.3]: https://github.com/cipherstash/proxy/compare/v2.2.2...v2.2.3 [2.2.2]: https://github.com/cipherstash/proxy/compare/v2.2.1...v2.2.2 [2.2.1]: https://github.com/cipherstash/proxy/compare/v2.2.0-alpha.1...v2.2.1 diff --git a/Cargo.lock b/Cargo.lock index bf816df2..d2e0c684 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -835,7 +835,7 @@ dependencies = [ [[package]] name = "cipherstash-proxy" -version = "2.2.3" +version = "2.2.4" dependencies = [ "arc-swap", "async-trait", @@ -1527,7 +1527,7 @@ dependencies = [ [[package]] name = "eql-mapper-macros" -version = "2.2.3" +version = "2.2.4" dependencies = [ "pretty_assertions", "proc-macro2", @@ -4212,7 +4212,7 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "showcase" -version = "2.2.3" +version = "2.2.4" dependencies = [ "rand 0.9.2", "rustls", diff --git a/Cargo.toml b/Cargo.toml index e35976ea..5cbfa718 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,7 +5,7 @@ members = ["packages/*"] exclude = ["vendor/stack-auth"] [workspace.package] -version = "2.2.3" +version = "2.2.4" edition = "2021" [profile.dev]