Skip to content

Commit 3aa759a

Browse files
emilkclaude
andcommitted
Cache Azure user delegation key to avoid GetUserDelegationKey throttling
The Azure SAS signer fetched a fresh user delegation key (a GetUserDelegationKey network round-trip) on every signing request. Under load Azure throttles these with HTTP 503, which surfaced as QueryDataset Internal errors in redap. Cache a long-lived (6h) user delegation key on AzureClient and reuse it to mint many short-lived SAS tokens, refreshing only when it no longer covers the requested window. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 93f6f88 commit 3aa759a

1 file changed

Lines changed: 118 additions & 8 deletions

File tree

src/azure/client.rs

Lines changed: 118 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ use http::{
4646
use rand::RngExt;
4747
use serde::{Deserialize, Serialize};
4848
use std::collections::HashMap;
49-
use std::sync::Arc;
49+
use std::sync::{Arc, Mutex};
5050
use std::time::Duration;
5151
use url::Url;
5252

@@ -662,16 +662,54 @@ async fn parse_blob_batch_delete_body(
662662
Ok(results)
663663
}
664664

665+
/// How long a freshly fetched user delegation key is requested to remain valid.
666+
///
667+
/// The SAS tokens we sign with it stay short-lived; this only bounds how often
668+
/// we call `GetUserDelegationKey`. Azure caps the key lifetime at 7 days.
669+
const USER_DELEGATION_KEY_VALIDITY: Duration = Duration::from_secs(6 * 60 * 60);
670+
671+
/// Refresh margin so we rotate a cached key before it actually lapses, and to
672+
/// absorb clock skew between us and Azure.
673+
const USER_DELEGATION_KEY_REFRESH_MARGIN: Duration = Duration::from_secs(5 * 60);
674+
675+
/// A user delegation key plus the signing key derived from it, cached so we can
676+
/// sign many SAS URLs without re-fetching the key on every request.
677+
#[derive(Debug, Clone)]
678+
struct CachedDelegationKey {
679+
signing_key: AzureAccessKey,
680+
key: UserDelegationKey,
681+
valid_from: DateTime<Utc>,
682+
valid_until: DateTime<Utc>,
683+
}
684+
685+
impl CachedDelegationKey {
686+
/// Whether this key is valid for the entire `[start, expiry]` SAS window,
687+
/// keeping a refresh margin so we rotate before it lapses.
688+
fn covers(&self, start: DateTime<Utc>, expiry: DateTime<Utc>) -> bool {
689+
self.valid_from <= start && expiry + USER_DELEGATION_KEY_REFRESH_MARGIN <= self.valid_until
690+
}
691+
}
692+
665693
#[derive(Debug)]
666694
pub(crate) struct AzureClient {
667695
config: AzureConfig,
668696
client: HttpClient,
697+
/// Cached user delegation key used to sign SAS URLs.
698+
///
699+
/// Fetching a key is a network round-trip (`GetUserDelegationKey`), so we
700+
/// request a long-lived key once and reuse it to mint many short-lived SAS
701+
/// tokens, refreshing only when it no longer covers the requested window.
702+
delegation_key_cache: Mutex<Option<CachedDelegationKey>>,
669703
}
670704

671705
impl AzureClient {
672706
/// create a new instance of [AzureClient]
673707
pub(crate) fn new(config: AzureConfig, client: HttpClient) -> Self {
674-
Self { config, client }
708+
Self {
709+
config,
710+
client,
711+
delegation_key_cache: Mutex::new(None),
712+
}
675713
}
676714

677715
/// Returns the config
@@ -1019,16 +1057,15 @@ impl AzureClient {
10191057
let signed_expiry = signed_start + expires_in;
10201058
match credential.as_deref() {
10211059
Some(AzureCredential::BearerToken(_)) => {
1022-
let key = self
1023-
.get_user_delegation_key(&signed_start, &signed_expiry)
1060+
let cached = self
1061+
.cached_user_delegation_key(signed_start, signed_expiry)
10241062
.await?;
1025-
let signing_key = AzureAccessKey::try_new(&key.value)?;
10261063
Ok(AzureSigner::new(
1027-
signing_key,
1064+
cached.signing_key,
10281065
self.config.account.clone(),
10291066
signed_start,
10301067
signed_expiry,
1031-
Some(key),
1068+
Some(cached.key),
10321069
))
10331070
}
10341071
Some(AzureCredential::AccessKey(key)) => Ok(AzureSigner::new(
@@ -1043,6 +1080,51 @@ impl AzureClient {
10431080
}
10441081
}
10451082

1083+
/// Return a user delegation key that is valid for the whole `[start, expiry]`
1084+
/// SAS window, reusing the cached key when possible.
1085+
///
1086+
/// `GetUserDelegationKey` is a network round-trip and Azure throttles it
1087+
/// (HTTP 503) under load, so we fetch a longer-lived key once and reuse it
1088+
/// to sign many short-lived SAS URLs.
1089+
async fn cached_user_delegation_key(
1090+
&self,
1091+
start: DateTime<Utc>,
1092+
expiry: DateTime<Utc>,
1093+
) -> Result<CachedDelegationKey> {
1094+
// Fast path: a cached key that still covers the requested window.
1095+
if let Some(cached) = self.delegation_key_cache.lock().unwrap().clone() {
1096+
if cached.covers(start, expiry) {
1097+
return Ok(cached);
1098+
}
1099+
}
1100+
1101+
// Slow path: fetch a fresh, long-lived key. We may race other callers
1102+
// here and fetch more than once, but that is rare and self-correcting.
1103+
let key_start = start;
1104+
let key_expiry = expiry.max(start + USER_DELEGATION_KEY_VALIDITY);
1105+
let key = self
1106+
.get_user_delegation_key(&key_start, &key_expiry)
1107+
.await?;
1108+
let signing_key = AzureAccessKey::try_new(&key.value)?;
1109+
1110+
// Trust the validity window Azure actually granted (it may clamp it).
1111+
let valid_from = DateTime::parse_from_rfc3339(&key.signed_start)
1112+
.map(|t| t.with_timezone(&Utc))
1113+
.unwrap_or(key_start);
1114+
let valid_until = DateTime::parse_from_rfc3339(&key.signed_expiry)
1115+
.map(|t| t.with_timezone(&Utc))
1116+
.unwrap_or(key_expiry);
1117+
1118+
let cached = CachedDelegationKey {
1119+
signing_key,
1120+
key,
1121+
valid_from,
1122+
valid_until,
1123+
};
1124+
*self.delegation_key_cache.lock().unwrap() = Some(cached.clone());
1125+
Ok(cached)
1126+
}
1127+
10461128
#[cfg(test)]
10471129
pub(crate) async fn get_blob_tagging(&self, path: &Path) -> Result<HttpResponse> {
10481130
let credential = self.get_credential().await?;
@@ -1385,7 +1467,7 @@ impl BlockList {
13851467
}
13861468
}
13871469

1388-
#[derive(Debug, Clone, PartialEq, Deserialize)]
1470+
#[derive(Debug, Clone, Default, PartialEq, Deserialize)]
13891471
#[serde(rename_all = "PascalCase")]
13901472
pub(crate) struct UserDelegationKey {
13911473
pub signed_oid: String,
@@ -1594,6 +1676,34 @@ mod tests {
15941676
quick_xml::de::from_str(S).unwrap();
15951677
}
15961678

1679+
#[test]
1680+
fn test_cached_delegation_key_covers() {
1681+
let cached = CachedDelegationKey {
1682+
signing_key: AzureAccessKey::try_new("Zm9vYmFy").unwrap(),
1683+
key: UserDelegationKey::default(),
1684+
valid_from: DateTime::parse_from_rfc3339("2026-06-25T00:00:00Z")
1685+
.unwrap()
1686+
.with_timezone(&Utc),
1687+
valid_until: DateTime::parse_from_rfc3339("2026-06-25T06:00:00Z")
1688+
.unwrap()
1689+
.with_timezone(&Utc),
1690+
};
1691+
1692+
let at = |s: &str| DateTime::parse_from_rfc3339(s).unwrap().with_timezone(&Utc);
1693+
1694+
// SAS window comfortably inside the key validity: reuse.
1695+
assert!(cached.covers(at("2026-06-25T01:00:00Z"), at("2026-06-25T02:00:00Z")));
1696+
1697+
// SAS starts before the key is valid: refetch.
1698+
assert!(!cached.covers(at("2026-06-24T23:59:00Z"), at("2026-06-25T00:30:00Z")));
1699+
1700+
// SAS expiry past the key expiry: refetch.
1701+
assert!(!cached.covers(at("2026-06-25T05:00:00Z"), at("2026-06-25T06:30:00Z")));
1702+
1703+
// SAS expiry within the refresh margin of the key expiry: refetch.
1704+
assert!(!cached.covers(at("2026-06-25T05:00:00Z"), at("2026-06-25T05:58:00Z")));
1705+
}
1706+
15971707
#[cfg(feature = "reqwest")]
15981708
#[tokio::test]
15991709
async fn test_build_bulk_delete_body() {

0 commit comments

Comments
 (0)