Skip to content

Commit aec29ba

Browse files
authored
azdls.sas-token.<account> support for vended credentials (#27)
* added load_table_credentials * Support catalog credentials (both during load and during * Draft azdls.sas-token. prefix support * cargo fmt * cargo fmt * fix test - we have to validate scheme, to ensure at least there's one of these two. But we allow both. * . * . * comment
1 parent 346b704 commit aec29ba

1 file changed

Lines changed: 107 additions & 19 deletions

File tree

crates/iceberg/src/io/storage_azdls.rs

Lines changed: 107 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ pub(crate) fn azdls_config_parse(mut properties: HashMap<String, String>) -> Res
8080
config.account_key = Some(account_key);
8181
}
8282

83-
if let Some(sas_token) = properties.remove(ADLS_SAS_TOKEN) {
83+
if let Some(sas_token) = find_sas_token(&properties, config.account_name.as_deref()) {
8484
config.sas_token = Some(sas_token);
8585
}
8686

@@ -103,6 +103,38 @@ pub(crate) fn azdls_config_parse(mut properties: HashMap<String, String>) -> Res
103103
Ok(config)
104104
}
105105

106+
/// Finds the appropriate SAS token from properties based on account name.
107+
///
108+
/// Strategy:
109+
/// 1. If account name is known, search for keys matching `adls.sas-token.<account_name>` prefix
110+
/// 2. If not found, fall back to searching for keys matching `adls.sas-token` prefix
111+
/// 3. Return the shortest matching key (least specific)
112+
/// 4. Trim leading '?' from the token if present
113+
fn find_sas_token(
114+
properties: &HashMap<String, String>,
115+
account_name: Option<&str>,
116+
) -> Option<String> {
117+
// Helper function to search for token with a given prefix
118+
let find_with_prefix = |prefix: &str| {
119+
properties
120+
.iter()
121+
.filter(|(key, _)| key.as_str() == prefix || key.starts_with(&format!("{}.", prefix)))
122+
.min_by_key(|(key, _)| key.len())
123+
.map(|(_, value)| value.strip_prefix('?').unwrap_or(value).to_string())
124+
};
125+
126+
// Try account-specific prefix first if account name is known, then fall back to base
127+
if let Some(account) = account_name {
128+
let account_prefix = format!("{}.{}", ADLS_SAS_TOKEN, account);
129+
if let Some(token) = find_with_prefix(&account_prefix) {
130+
return Some(token);
131+
}
132+
}
133+
134+
// Fall back to base prefix (adls.sas-token)
135+
find_with_prefix(ADLS_SAS_TOKEN)
136+
}
137+
106138
/// Builds an OpenDAL operator from the AzdlsConfig and path.
107139
///
108140
/// The path is expected to include the scheme in a format like:
@@ -331,24 +363,19 @@ fn validate_storage_and_scheme(
331363
scheme_str: &str,
332364
) -> Result<AzureStorageScheme> {
333365
let scheme = scheme_str.parse::<AzureStorageScheme>()?;
334-
match scheme {
335-
AzureStorageScheme::Abfss | AzureStorageScheme::Abfs => {
336-
ensure_data_valid!(
337-
storage_service == "dfs",
338-
"AzureStoragePath: Unexpected storage service for abfs[s]: {}",
339-
storage_service
340-
);
341-
Ok(scheme)
342-
}
343-
AzureStorageScheme::Wasbs | AzureStorageScheme::Wasb => {
344-
ensure_data_valid!(
345-
storage_service == "blob",
346-
"AzureStoragePath: Unexpected storage service for wasb[s]: {}",
347-
storage_service
348-
);
349-
Ok(scheme)
350-
}
351-
}
366+
// Azure actually is oblivious to what we use for the scheme here.
367+
// It actually supports both dfs and blob endpoints for all storage kinds.
368+
// We should route those to different OpenDAL operators, but given that we don't
369+
// do that today but map both schemes/endpoints to the same ADLS OpenDAL operator
370+
// (which uses dfs endpoint), we might as well accept wasb URL for dfs endpoint,
371+
// and abfs URL for blob endpoint. Especially since some implementations (e.g. Snowflake)
372+
// always use abfs in URL, regardless of the endpoint.
373+
ensure_data_valid!(
374+
storage_service == "dfs" || storage_service == "blob",
375+
"AzureStoragePath: Unexpected storage service for abfs[s]: {}",
376+
storage_service
377+
);
378+
Ok(scheme)
352379
}
353380

354381
#[cfg(test)]
@@ -423,6 +450,67 @@ mod tests {
423450
..Default::default()
424451
}),
425452
),
453+
(
454+
"account-specific SAS token with full domain",
455+
HashMap::from([
456+
(
457+
super::ADLS_ACCOUNT_NAME.to_string(),
458+
"azteststorage".to_string(),
459+
),
460+
(
461+
"adls.sas-token.azteststorage.blob.core.windows.net".to_string(),
462+
"token-full".to_string(),
463+
),
464+
(
465+
"adls.sas-token.azteststorage".to_string(),
466+
"token-account".to_string(),
467+
),
468+
]),
469+
Some(AzdlsConfig {
470+
account_name: Some("azteststorage".to_string()),
471+
sas_token: Some("token-account".to_string()), // Should pick the shorter one
472+
..Default::default()
473+
}),
474+
),
475+
(
476+
"account-specific SAS token with only full domain",
477+
HashMap::from([
478+
(
479+
super::ADLS_ACCOUNT_NAME.to_string(),
480+
"myaccount".to_string(),
481+
),
482+
(
483+
"adls.sas-token.myaccount.blob.core.windows.net".to_string(),
484+
"token-specific".to_string(),
485+
),
486+
]),
487+
Some(AzdlsConfig {
488+
account_name: Some("myaccount".to_string()),
489+
sas_token: Some("token-specific".to_string()),
490+
..Default::default()
491+
}),
492+
),
493+
(
494+
"SAS token without account name picks shortest",
495+
HashMap::from([
496+
(
497+
super::ADLS_SAS_TOKEN.to_string(),
498+
"token-generic".to_string(),
499+
),
500+
(
501+
"adls.sas-token.someaccount".to_string(),
502+
"token-account".to_string(),
503+
),
504+
(
505+
"adls.sas-token.someaccount.blob.core.windows.net".to_string(),
506+
"token-specific".to_string(),
507+
),
508+
]),
509+
Some(AzdlsConfig {
510+
sas_token: Some("token-generic".to_string()), // Should pick the shortest one
511+
..Default::default()
512+
}),
513+
),
426514
];
427515

428516
for (name, properties, expected) in test_cases {

0 commit comments

Comments
 (0)