diff --git a/sdk/cosmos/azure_data_cosmos/CHANGELOG.md b/sdk/cosmos/azure_data_cosmos/CHANGELOG.md index e93f437fb9..6c70c789a5 100644 --- a/sdk/cosmos/azure_data_cosmos/CHANGELOG.md +++ b/sdk/cosmos/azure_data_cosmos/CHANGELOG.md @@ -4,10 +4,16 @@ ### Features Added +- `CosmosClient::database_client` and `DatabaseClient::container_client` now accept anything convertible into the new `ResourceIdentity` type, so databases and containers can be addressed either by name (a `&str`/`String`, as before) or by resource ID (RID) via the new `ResourceId` newtype. Addressing modes cannot be mixed across the hierarchy: a RID-addressed database yields only RID-addressed containers, and a name-addressed database yields only name-addressed containers; mixing them fails fast with a client-side error. `DatabaseClient` gains `id()` (returning the `ResourceIdentity`), `name()`, and `rid()` accessors, and skips the extra database read when resolving throughput for a RID-addressed database. ([#4613](https://github.com/Azure/azure-sdk-for-rust/pull/4613)) + ### Breaking Changes +- `CosmosClient::database_client` and `DatabaseClient::container_client` now take `impl Into` instead of `&str`. Passing a string literal, `&str`, `String`, or `&String` is unchanged (it still selects name addressing), but call sites that previously relied on deref coercion to `&str` — e.g. passing a `&Cow`, `&Box`, or another `Deref` smart string — no longer compile against the generic bound and must dereference explicitly (`&*value` or `value.as_ref()`). This is a compile-time-only source change with no runtime behavior difference. ([#4613](https://github.com/Azure/azure-sdk-for-rust/pull/4613)) + ### Bugs Fixed +- RID-addressed data-plane requests are now authenticated correctly. The driver signs RID-addressed databases, containers, and their feeds/children over the lowercased resource RID (matching the service's `is_name_based = false` rule) and sends the RID raw in the request URL instead of percent-encoding it. Previously the driver always signed the full name-style resource link and percent-encoded the RID, which made the gateway reject RID reads (e.g. `401` for a database or container read by RID). Reading a database or container by RID, listing/querying containers under a RID database, and querying items, reading throughput, and creating items on a RID-addressed container now work end-to-end. ([#4613](https://github.com/Azure/azure-sdk-for-rust/pull/4613)) + ### Other Changes ## 0.36.0 (2026-06-19) @@ -49,7 +55,6 @@ - The `allow_invalid_certificates` Cargo feature has been removed. The capability is now in the default feature set but requires explicit opt-in via `CosmosRuntimeBuilder::with_connection_pool(ConnectionPoolOptionsBuilder::new().with_server_certificate_validation(ServerCertificateValidation::RequiredUnlessEmulator).build())`. The new `RequiredUnlessEmulator` policy is not a blanket "disable validation" knob — it validates the server certificate normally and only relaxes validation for detected Cosmos DB emulator hosts (via `AccountEndpoint` + `Region` heuristics, or the `AZURE_COSMOS_EMULATOR_HOST` environment variable). See the driver CHANGELOG for the underlying `EmulatorServerCertValidation` → `ServerCertificateValidation` rename. - Per-account driver caching has been removed from the underlying runtime — each `CosmosClient::build(...)` now constructs a fresh `CosmosDriver`. Clients sharing the same `CosmosRuntime` continue to share transport pools, sampler, account cache, etc.; only the per-account `CosmosDriver` instance is no longer reused. ([#4588](https://github.com/Azure/azure-sdk-for-rust/pull/4588)) - ### Bugs Fixed - `403/1008 (DatabaseAccountNotFound)` and `403/3 (WriteForbidden)` now trigger an account-topology refresh and retry against the refreshed endpoints instead of bubbling up. ([#4590](https://github.com/Azure/azure-sdk-for-rust/pull/4590)) diff --git a/sdk/cosmos/azure_data_cosmos/src/clients/container_client.rs b/sdk/cosmos/azure_data_cosmos/src/clients/container_client.rs index 9a9476d219..819a941c57 100644 --- a/sdk/cosmos/azure_data_cosmos/src/clients/container_client.rs +++ b/sdk/cosmos/azure_data_cosmos/src/clients/container_client.rs @@ -12,7 +12,7 @@ use crate::{ Precondition, QueryOptions, ReadContainerOptions, ReadFeedRangesOptions, ReplaceContainerOptions, SessionToken, ThroughputOptions, }, - PartitionKey, Query, + PartitionKey, Query, ResourceIdentity, }; use super::ThroughputPoller; @@ -33,21 +33,72 @@ pub struct ContainerClient { impl ContainerClient { pub(crate) async fn new( context: ClientContext, - container_id: &str, - database_id: &str, + database: &ResourceIdentity, + container: ResourceIdentity, ) -> crate::Result { - // Eagerly resolve immutable container metadata from the driver. - let container_ref = context - .driver - .resolve_container(database_id, container_id) - .await - .map_err(|e| { - azure_data_cosmos_driver::error::CosmosErrorBuilder::from_error(e) - .with_context(format!( - "failed to resolve container metadata for '{database_id}/{container_id}'" - )) + // The container's addressing mode must match the database's: name-with-name + // or RID-with-RID. Mixing the two is not supported by the service routing. + let container_ref = match (database, &container) { + (ResourceIdentity::Name(db_name), ResourceIdentity::Name(container_name)) => context + .driver + .resolve_container(db_name, container_name) + .await + .map_err(|e| { + azure_data_cosmos_driver::error::CosmosErrorBuilder::from_error(e) + .with_context(format!( + "failed to resolve container metadata for '{db_name}/{container_name}'" + )) + .build() + })?, + (ResourceIdentity::Rid(db_rid), ResourceIdentity::Rid(container_rid)) => { + let resolved = context + .driver + .resolve_container_by_rid(container_rid.as_str()) + .await + .map_err(|e| { + azure_data_cosmos_driver::error::CosmosErrorBuilder::from_error(e) + .with_context(format!( + "failed to resolve container metadata for RID '{}'", + container_rid.as_str() + )) + .build() + })?; + + // The parent database RID is derived from the container RID, not + // taken from this `DatabaseClient`. Reject a container whose parent + // database does not match the addressed database so callers can't + // accidentally reach into a different database. + if resolved.database_rid() != db_rid.as_str() { + return Err(azure_data_cosmos_driver::error::CosmosError::builder() + .with_status( + azure_data_cosmos_driver::error::CosmosStatus::CLIENT_INVALID_RESOURCE_ID, + ) + .with_message(format!( + "container RID '{}' belongs to database '{}', not the addressed database '{}'", + container_rid.as_str(), + resolved.database_rid(), + db_rid.as_str() + )) + .build() + .into()); + } + + resolved + } + (ResourceIdentity::Name(_), ResourceIdentity::Rid(_)) + | (ResourceIdentity::Rid(_), ResourceIdentity::Name(_)) => { + return Err(azure_data_cosmos_driver::error::CosmosError::builder() + .with_status( + azure_data_cosmos_driver::error::CosmosStatus::CLIENT_MIXED_NAME_RID_ADDRESSING, + ) + .with_message( + "database and container must use the same addressing mode: \ + address both by name or both by RID", + ) .build() - })?; + .into()); + } + }; Ok(Self { container_ref, diff --git a/sdk/cosmos/azure_data_cosmos/src/clients/cosmos_client.rs b/sdk/cosmos/azure_data_cosmos/src/clients/cosmos_client.rs index db25d62ae1..ee1f2f5999 100644 --- a/sdk/cosmos/azure_data_cosmos/src/clients/cosmos_client.rs +++ b/sdk/cosmos/azure_data_cosmos/src/clients/cosmos_client.rs @@ -7,7 +7,7 @@ use crate::{ models::DatabaseProperties, models::ResourceResponse, options::{CreateDatabaseOptions, QueryDatabasesOptions}, - Query, + Query, ResourceIdentity, }; use azure_core::http::Url; use azure_data_cosmos_driver::models::CosmosOperation; @@ -95,12 +95,29 @@ impl CosmosClient { CosmosClientBuilder::new() } - /// Gets a [`DatabaseClient`] that can be used to access the database with the specified ID. + /// Gets a [`DatabaseClient`] that can be used to access the database with the + /// specified identity. + /// + /// The database may be addressed either by name or by [`ResourceId`](crate::ResourceId) + /// (RID). Anything that converts into a [`ResourceIdentity`](crate::ResourceIdentity) + /// is accepted — a `&str`/`String` selects name addressing, a `ResourceId` + /// selects RID addressing. /// /// # Arguments - /// * `id` - The ID of the database. - pub fn database_client(&self, id: &str) -> DatabaseClient { - DatabaseClient::new(self.context.clone(), id) + /// * `database` - The name or RID of the database. + /// + /// # Examples + /// + /// ```rust,no_run + /// # use azure_data_cosmos::{CosmosClient, ResourceId}; + /// # let client: CosmosClient = panic!("this is a non-running example"); + /// // By name: + /// let db = client.database_client("my-database"); + /// // By RID: + /// let db = client.database_client(ResourceId::from("abc123==")); + /// ``` + pub fn database_client(&self, database: impl Into) -> DatabaseClient { + DatabaseClient::new(self.context.clone(), database.into()) } /// Gets the endpoint of the database account this client is connected to. diff --git a/sdk/cosmos/azure_data_cosmos/src/clients/database_client.rs b/sdk/cosmos/azure_data_cosmos/src/clients/database_client.rs index 42dea72dd2..b83c9fb268 100644 --- a/sdk/cosmos/azure_data_cosmos/src/clients/database_client.rs +++ b/sdk/cosmos/azure_data_cosmos/src/clients/database_client.rs @@ -10,7 +10,7 @@ use crate::{ CreateContainerOptions, DeleteDatabaseOptions, QueryContainersOptions, ReadDatabaseOptions, ThroughputOptions, }, - Query, + Query, ResourceId, ResourceIdentity, }; use azure_data_cosmos_driver::models::{CosmosOperation, DatabaseReference}; @@ -20,43 +20,68 @@ use super::ThroughputPoller; /// /// You can get a `DatabaseClient` by calling [`CosmosClient::database_client()`](crate::CosmosClient::database_client()). pub struct DatabaseClient { - database_id: String, + identity: ResourceIdentity, context: ClientContext, database_ref: DatabaseReference, } impl DatabaseClient { - pub(crate) fn new(context: ClientContext, database_id: &str) -> Self { - let database_id = database_id.to_string(); - let database_ref = - DatabaseReference::from_name(context.driver.account().clone(), database_id.clone()); + pub(crate) fn new(context: ClientContext, identity: ResourceIdentity) -> Self { + let account = context.driver.account().clone(); + let database_ref = match &identity { + ResourceIdentity::Name(name) => { + DatabaseReference::from_name(account, name.clone().into_owned()) + } + ResourceIdentity::Rid(rid) => { + DatabaseReference::from_rid(account, rid.as_str().to_owned()) + } + }; Self { - database_id, + identity, context, database_ref, } } - /// Gets a [`ContainerClient`] that can be used to access the collection with the specified name. + /// Gets a [`ContainerClient`] that can be used to access the container with the + /// specified identity. /// /// This method eagerly resolves immutable container metadata (resource ID and partition key /// definition) from the service, so the returned client is ready for immediate use without /// per-operation cache lookups. /// + /// The container's addressing mode must match this database's: a name-addressed + /// database accepts only name-addressed containers, and a RID-addressed database + /// accepts only [`ResourceId`](crate::ResourceId)-addressed containers. + /// /// # Arguments - /// * `name` - The name of the container. + /// * `container` - The name or RID of the container. /// /// # Errors /// - /// Returns an error if the container does not exist or the metadata cannot be resolved. - pub async fn container_client(&self, name: &str) -> crate::Result { - ContainerClient::new(self.context.clone(), name, &self.database_id).await + /// Returns an error if the container does not exist, the metadata cannot be + /// resolved, or the addressing mode does not match this database's. + pub async fn container_client( + &self, + container: impl Into, + ) -> crate::Result { + ContainerClient::new(self.context.clone(), &self.identity, container.into()).await + } + + /// Returns the identity (name or RID) used to construct this client. + pub fn id(&self) -> &ResourceIdentity { + &self.identity + } + + /// Returns the database name, or `None` if this client was addressed by RID. + pub fn name(&self) -> Option<&str> { + self.identity.as_name() } - /// Returns the identifier of the Cosmos database. - pub fn id(&self) -> &str { - &self.database_id + /// Returns the database RID, or `None` if this client was addressed by name. + pub fn rid(&self) -> Option<&ResourceId> { + self.identity.as_rid() } /// Reads the properties of the database. @@ -208,6 +233,17 @@ impl DatabaseClient { )) } + /// Returns the database RID, using the client's identity directly when it is + /// already RID-addressed, or reading the database from the service to obtain + /// the `_rid` when addressed by name. + async fn resource_id(&self) -> crate::Result { + if let Some(rid) = self.rid() { + return Ok(rid.as_str().to_owned()); + } + let db = self.read(None).await?.into_model()?; + resource_id_or_error(db.system_properties.resource_id, "database") + } + /// Reads database throughput properties, if any. /// /// This will return `None` if the database does not have a throughput offer configured. @@ -219,9 +255,7 @@ impl DatabaseClient { options: Option, ) -> crate::Result> { let options = options.unwrap_or_default(); - // We need to get the RID for the database. - let db = self.read(None).await?.into_model()?; - let resource_id = resource_id_or_error(db.system_properties.resource_id, "database")?; + let resource_id = self.resource_id().await?; offers_client::find_offer( &self.context.driver, @@ -263,9 +297,7 @@ impl DatabaseClient { options: Option, ) -> crate::Result { let options = options.unwrap_or_default(); - // We need to get the RID for the database. - let db = self.read(None).await?.into_model()?; - let resource_id = resource_id_or_error(db.system_properties.resource_id, "database")?; + let resource_id = self.resource_id().await?; offers_client::begin_replace( self.context.driver.clone(), @@ -312,7 +344,8 @@ mod tests { fn _assert_futures_are_send() { fn assert_send(_: T) {} let client: &DatabaseClient = todo!(); - assert_send(client.container_client(todo!())); + let container_identity: ResourceIdentity = todo!(); + assert_send(client.container_client(container_identity)); assert_send(client.read(todo!())); assert_send(client.query_containers(Query::from("SELECT * FROM c"), todo!())); assert_send(client.create_container(todo!(), todo!())); diff --git a/sdk/cosmos/azure_data_cosmos/src/lib.rs b/sdk/cosmos/azure_data_cosmos/src/lib.rs index b44fedf1f5..9eb3e54202 100644 --- a/sdk/cosmos/azure_data_cosmos/src/lib.rs +++ b/sdk/cosmos/azure_data_cosmos/src/lib.rs @@ -17,6 +17,7 @@ pub use error::{CosmosError, CosmosStatus, Result, SubStatusCode}; pub use feed::{FeedScope, Query}; pub use models::{PartitionKey, TransactionalBatch}; pub use options::RoutingStrategy; +pub use resource_identity::{ResourceId, ResourceIdentity}; pub use runtime::{CosmosRuntime, CosmosRuntimeBuilder}; // ========================================================================= @@ -42,6 +43,7 @@ mod constants; mod credential; mod driver_bridge; mod region_proximity; +mod resource_identity; mod runtime; mod session_helpers; diff --git a/sdk/cosmos/azure_data_cosmos/src/resource_identity.rs b/sdk/cosmos/azure_data_cosmos/src/resource_identity.rs new file mode 100644 index 0000000000..5c0587f841 --- /dev/null +++ b/sdk/cosmos/azure_data_cosmos/src/resource_identity.rs @@ -0,0 +1,204 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +//! Public types for addressing Cosmos DB databases and containers by name or RID. + +use std::borrow::Cow; + +/// A Cosmos DB resource identifier (RID). +/// +/// RIDs are stable, Base64-encoded identifiers assigned by Cosmos DB. Unlike a +/// user-provided name, a RID does not change when a resource is renamed, so it +/// can be used to address a database or container regardless of its current name. +/// +/// Use [`ResourceId`] together with [`ResourceIdentity`] to obtain RID-addressed +/// clients via [`CosmosClient::database_client`](crate::CosmosClient::database_client) +/// and [`DatabaseClient::container_client`](crate::clients::DatabaseClient::container_client). +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub struct ResourceId(Cow<'static, str>); + +impl ResourceId { + /// Creates a resource identifier from a static string without allocating. + pub const fn from_static(rid: &'static str) -> Self { + Self(Cow::Borrowed(rid)) + } + + /// Returns the RID as a string slice. + pub fn as_str(&self) -> &str { + &self.0 + } +} + +impl From<&str> for ResourceId { + fn from(rid: &str) -> Self { + Self(Cow::Owned(rid.to_owned())) + } +} + +impl From for ResourceId { + fn from(rid: String) -> Self { + Self(Cow::Owned(rid)) + } +} + +impl From<&String> for ResourceId { + fn from(rid: &String) -> Self { + Self(Cow::Owned(rid.clone())) + } +} + +impl AsRef for ResourceId { + fn as_ref(&self) -> &str { + self.as_str() + } +} + +impl std::fmt::Display for ResourceId { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.write_str(&self.0) + } +} + +/// Identifies a Cosmos DB database or container, either by user-provided name or +/// by [`ResourceId`] (RID). +/// +/// This is the parameter type accepted by +/// [`CosmosClient::database_client`](crate::CosmosClient::database_client) and +/// [`DatabaseClient::container_client`](crate::clients::DatabaseClient::container_client). +/// Both accept `impl Into`, so a plain `&str`/`String` selects +/// name addressing and a [`ResourceId`] selects RID addressing: +/// +/// ```rust +/// use azure_data_cosmos::{ResourceId, ResourceIdentity}; +/// +/// let by_name: ResourceIdentity = "my-database".into(); +/// let by_rid: ResourceIdentity = ResourceId::from("abc123==").into(); +/// ``` +/// +/// Name and RID addressing cannot be mixed across the database/container +/// hierarchy: a RID-addressed database yields only RID-addressed containers, and +/// a name-addressed database yields only name-addressed containers. +/// +/// This type does not carry a lifetime parameter (per the repository's guidance +/// against lifetimes in public types); converting from a borrowed `&str` +/// allocates an owned copy. +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[non_exhaustive] +pub enum ResourceIdentity { + /// Address the resource by its user-provided name. + Name(Cow<'static, str>), + /// Address the resource by its [`ResourceId`] (RID). + Rid(ResourceId), +} + +impl ResourceIdentity { + /// Returns the name when this identity addresses a resource by name. + pub(crate) fn as_name(&self) -> Option<&str> { + match self { + Self::Name(name) => Some(name), + Self::Rid(_) => None, + } + } + + /// Returns the RID when this identity addresses a resource by RID. + pub(crate) fn as_rid(&self) -> Option<&ResourceId> { + match self { + Self::Rid(rid) => Some(rid), + Self::Name(_) => None, + } + } + + /// Returns `true` if this identity addresses a resource by RID. + pub fn is_rid(&self) -> bool { + matches!(self, Self::Rid(_)) + } +} + +impl From<&str> for ResourceIdentity { + fn from(name: &str) -> Self { + Self::Name(Cow::Owned(name.to_owned())) + } +} + +impl From for ResourceIdentity { + fn from(name: String) -> Self { + Self::Name(Cow::Owned(name)) + } +} + +impl From<&String> for ResourceIdentity { + fn from(name: &String) -> Self { + Self::Name(Cow::Owned(name.clone())) + } +} + +impl From for ResourceIdentity { + fn from(rid: ResourceId) -> Self { + Self::Rid(rid) + } +} + +impl From<&ResourceId> for ResourceIdentity { + fn from(rid: &ResourceId) -> Self { + Self::Rid(rid.clone()) + } +} + +impl From<&ResourceIdentity> for ResourceIdentity { + fn from(identity: &ResourceIdentity) -> Self { + identity.clone() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn str_converts_to_name() { + let id: ResourceIdentity = "mydb".into(); + assert_eq!(id.as_name(), Some("mydb")); + assert!(id.as_rid().is_none()); + assert!(!id.is_rid()); + } + + #[test] + fn string_converts_to_name() { + let id: ResourceIdentity = String::from("mydb").into(); + assert_eq!(id.as_name(), Some("mydb")); + } + + #[test] + fn resource_id_converts_to_rid() { + let id: ResourceIdentity = ResourceId::from("abc123").into(); + assert!(id.is_rid()); + assert_eq!(id.as_rid().map(|r| r.as_str()), Some("abc123")); + assert!(id.as_name().is_none()); + } + + #[test] + fn resource_id_from_static_does_not_allocate() { + const RID: ResourceId = ResourceId::from_static("static-rid"); + assert_eq!(RID.as_str(), "static-rid"); + } + + #[test] + fn resource_id_display_matches_str() { + let rid = ResourceId::from("xyz=="); + assert_eq!(rid.to_string(), "xyz=="); + } + + #[test] + fn identity_ref_round_trips_preserving_addressing() { + let by_name: ResourceIdentity = "mydb".into(); + let name_again: ResourceIdentity = (&by_name).into(); + assert_eq!(name_again, by_name); + assert_eq!(name_again.as_name(), Some("mydb")); + + let by_rid: ResourceIdentity = ResourceId::from("abc123==").into(); + let rid_again: ResourceIdentity = (&by_rid).into(); + assert_eq!(rid_again, by_rid); + assert!(rid_again.is_rid()); + assert_eq!(rid_again.as_rid().map(|r| r.as_str()), Some("abc123==")); + } +} diff --git a/sdk/cosmos/azure_data_cosmos/tests/emulator_tests/cosmos_containers.rs b/sdk/cosmos/azure_data_cosmos/tests/emulator_tests/cosmos_containers.rs index 8e57394e19..f85d9237bf 100644 --- a/sdk/cosmos/azure_data_cosmos/tests/emulator_tests/cosmos_containers.rs +++ b/sdk/cosmos/azure_data_cosmos/tests/emulator_tests/cosmos_containers.rs @@ -89,7 +89,7 @@ pub async fn container_crud_simple() -> Result<(), Box> { } assert_eq!(vec![properties.id.clone()], ids); - let container_client = db_client.container_client(&properties.id).await?; + let container_client = db_client.container_client(&*properties.id).await?; let mut updated_indexing_policy = IndexingPolicy::default(); updated_indexing_policy.automatic = false; updated_indexing_policy.indexing_mode = Some(IndexingMode::None); diff --git a/sdk/cosmos/azure_data_cosmos/tests/emulator_tests/cosmos_rid_addressing.rs b/sdk/cosmos/azure_data_cosmos/tests/emulator_tests/cosmos_rid_addressing.rs new file mode 100644 index 0000000000..ea5df0a44a --- /dev/null +++ b/sdk/cosmos/azure_data_cosmos/tests/emulator_tests/cosmos_rid_addressing.rs @@ -0,0 +1,289 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// Use the shared test framework declared in `tests/emulator/mod.rs`. +use super::framework; + +use std::error::Error; + +use azure_core::Uuid; +use azure_data_cosmos::{ + clients::ContainerClient, + feed::FeedScope, + models::{ContainerProperties, ThroughputProperties}, + options::CreateContainerOptions, + CosmosStatus, Query, ResourceId, +}; +use futures::TryStreamExt; +use serde::{Deserialize, Serialize}; + +use framework::{TestClient, TestOptions}; + +#[derive(Clone, Debug, Deserialize, Serialize, PartialEq, Eq)] +struct RidItem { + id: String, + pk: String, + value: i32, +} + +/// Collects every item produced by a query into a `Vec`, draining the pager. +async fn collect_items( + container: &ContainerClient, + query: Query, + scope: FeedScope, +) -> Result, Box> { + let mut pager = container.query_items::(query, scope, None).await?; + let mut items = Vec::new(); + while let Some(item) = pager.try_next().await? { + items.push(item); + } + Ok(items) +} + +/// Exercises the full RID-addressing flow end to end: create a database and +/// container by name, discover their service-assigned `_rid`s, then re-address +/// both purely by RID and confirm every read/write/query operation resolves to +/// the same resources. +#[tokio::test] +#[cfg_attr( + not(any(test_category = "emulator", test_category = "emulator_vnext")), + ignore = "requires test_category 'emulator' or 'emulator_vnext'" +)] +#[cfg_attr( + test_category = "emulator_vnext", + ignore = "skipped on vnext emulator: RID addressing not verified" +)] +pub async fn database_and_container_addressed_by_rid() -> Result<(), Box> { + TestClient::run_with_unique_db( + async |run_context, db_client| { + let container_name = format!("rid-container-{}", Uuid::new_v4()); + let name_container = run_context + .create_container( + db_client, + ContainerProperties::new(container_name.clone(), "/pk".into()), + Some( + CreateContainerOptions::default() + .with_throughput(ThroughputProperties::manual(400)), + ), + ) + .await?; + + // Capture the service-assigned RIDs (`_rid`) for both the database + // and the container — these are what callers would address by. + let db_rid = db_client + .read(None) + .await? + .into_model()? + .system_properties + .resource_id + .expect("database read should return a _rid"); + let container_rid = name_container + .read(None) + .await? + .into_model()? + .system_properties + .resource_id + .expect("container read should return a _rid"); + + // Re-address the same database purely by RID. + let rid_db_client = run_context + .client() + .database_client(ResourceId::from(db_rid.clone())); + assert_eq!( + Some(db_rid.as_str()), + rid_db_client.rid().map(ResourceId::as_str), + "RID-addressed db client should expose the RID" + ); + assert!( + rid_db_client.name().is_none(), + "RID-addressed db client should not expose a name" + ); + + // ...and the container by RID under that RID-addressed database. + let rid_container = rid_db_client + .container_client(ResourceId::from(container_rid.clone())) + .await?; + + // Reading by RID resolves back to the same container. + let read_back = rid_container.read(None).await?.into_model()?; + assert_eq!(container_name, read_back.id); + + // Throughput is reachable by RID. + let throughput = rid_container + .read_throughput(None) + .await? + .expect("throughput should be present"); + assert_eq!(Some(400), throughput.throughput()); + + // Create an item through the RID-addressed container. + let item = RidItem { + id: format!("item-{}", Uuid::new_v4()), + pk: "pk-1".to_string(), + value: 7, + }; + rid_container + .create_item(&item.pk, &item.id, &item, None) + .await?; + + // Point-read it back by RID. + let fetched: RidItem = rid_container + .read_item(&item.pk, &item.id, None) + .await? + .into_model()?; + assert_eq!(item, fetched); + + // Single-partition query against the RID-addressed container. + let single = collect_items( + &rid_container, + Query::from("SELECT * FROM c WHERE c.id = @id").with_parameter("@id", &item.id)?, + FeedScope::partition(&item.pk), + ) + .await?; + assert_eq!(vec![item.clone()], single); + + // Cross-partition query against the RID-addressed container. + let cross = collect_items( + &rid_container, + Query::from("SELECT * FROM c WHERE c.id = @id").with_parameter("@id", &item.id)?, + FeedScope::full_container(), + ) + .await?; + assert_eq!(vec![item.clone()], cross); + + // Listing containers under the RID-addressed database also works. + let mut container_ids = Vec::new(); + let mut container_pager = rid_db_client + .query_containers( + Query::from("SELECT * FROM root r WHERE r.id = @id") + .with_parameter("@id", &container_name)?, + None, + ) + .await?; + while let Some(c) = container_pager.try_next().await? { + container_ids.push(c.id); + } + assert_eq!(vec![container_name.clone()], container_ids); + + Ok(()) + }, + Some(TestOptions::for_emulator()), + ) + .await +} + +/// A database addressed by name and a container addressed by RID (or any other +/// mix) must be rejected before any network call: addressing modes cannot be +/// mixed. +#[tokio::test] +#[cfg_attr( + not(any(test_category = "emulator", test_category = "emulator_vnext")), + ignore = "requires test_category 'emulator' or 'emulator_vnext'" +)] +#[cfg_attr( + test_category = "emulator_vnext", + ignore = "skipped on vnext emulator: RID addressing not verified" +)] +pub async fn mixed_name_and_rid_addressing_is_rejected() -> Result<(), Box> { + TestClient::run_with_unique_db( + async |run_context, db_client| { + let container_name = format!("rid-mixed-{}", Uuid::new_v4()); + let name_container = run_context + .create_container( + db_client, + ContainerProperties::new(container_name.clone(), "/pk".into()), + None, + ) + .await?; + let container_rid = name_container + .read(None) + .await? + .into_model()? + .system_properties + .resource_id + .expect("container read should return a _rid"); + + // `db_client` is name-addressed; addressing the container by RID + // mixes the two modes and must be rejected. + let Err(err) = db_client + .container_client(ResourceId::from(container_rid)) + .await + else { + panic!("expected mixed name/RID addressing to be rejected"); + }; + assert_eq!(CosmosStatus::CLIENT_MIXED_NAME_RID_ADDRESSING, err.status()); + + Ok(()) + }, + Some(TestOptions::for_emulator()), + ) + .await +} + +/// A container RID that belongs to a different database than the one addressed +/// must be rejected, so callers cannot accidentally reach into another database. +#[tokio::test] +#[cfg_attr( + not(any(test_category = "emulator", test_category = "emulator_vnext")), + ignore = "requires test_category 'emulator' or 'emulator_vnext'" +)] +#[cfg_attr( + test_category = "emulator_vnext", + ignore = "skipped on vnext emulator: RID addressing not verified" +)] +pub async fn container_rid_from_another_database_is_rejected() -> Result<(), Box> { + TestClient::run_with_unique_db( + async |run_context, db_client| { + // db1 is the unique database created by the harness. + let db1_rid = db_client + .read(None) + .await? + .into_model()? + .system_properties + .resource_id + .expect("db1 read should return a _rid"); + + // db2 + a container in db2, created out of band. + let db2_name = format!("rid-otherdb-{}", Uuid::new_v4()); + let _ = run_context + .client() + .create_database(&db2_name, None) + .await?; + let db2_client = run_context.client().database_client(db2_name.as_str()); + let container2_name = format!("rid-otherc-{}", Uuid::new_v4()); + let container2 = run_context + .create_container( + &db2_client, + ContainerProperties::new(container2_name.clone(), "/pk".into()), + None, + ) + .await?; + let container2_rid = container2 + .read(None) + .await? + .into_model()? + .system_properties + .resource_id + .expect("container2 read should return a _rid"); + + // Address db1 by RID but hand it a container RID that belongs to db2. + let rid_db1_client = run_context + .client() + .database_client(ResourceId::from(db1_rid)); + let result = rid_db1_client + .container_client(ResourceId::from(container2_rid)) + .await; + + // Clean up db2 regardless of the assertion outcome below. + db2_client.delete(None).await?; + + let Err(err) = result else { + panic!("expected a container RID from another database to be rejected"); + }; + assert_eq!(CosmosStatus::CLIENT_INVALID_RESOURCE_ID, err.status()); + + Ok(()) + }, + Some(TestOptions::for_emulator()), + ) + .await +} diff --git a/sdk/cosmos/azure_data_cosmos/tests/emulator_tests/mod.rs b/sdk/cosmos/azure_data_cosmos/tests/emulator_tests/mod.rs index 1f18fb34b5..0778220a09 100644 --- a/sdk/cosmos/azure_data_cosmos/tests/emulator_tests/mod.rs +++ b/sdk/cosmos/azure_data_cosmos/tests/emulator_tests/mod.rs @@ -12,6 +12,7 @@ mod cosmos_patch; mod cosmos_proxy; mod cosmos_query; mod cosmos_response_metadata; +mod cosmos_rid_addressing; #[path = "../framework/mod.rs"] mod framework; diff --git a/sdk/cosmos/azure_data_cosmos/tests/framework/test_client.rs b/sdk/cosmos/azure_data_cosmos/tests/framework/test_client.rs index d85e8187be..898159bb51 100644 --- a/sdk/cosmos/azure_data_cosmos/tests/framework/test_client.rs +++ b/sdk/cosmos/azure_data_cosmos/tests/framework/test_client.rs @@ -855,7 +855,7 @@ impl TestRunContext { { Ok(response) => { let created = response.into_model()?; - return db_client.container_client(&created.id).await; + return db_client.container_client(&*created.id).await; } Err(e) if e.status().status_code() == StatusCode::TooManyRequests => { println!( @@ -867,7 +867,7 @@ impl TestRunContext { } Err(e) if e.status().status_code() == StatusCode::Conflict => { // Container already exists, delete and recreate it, then return a client - let container_client = db_client.container_client(&properties.id).await?; + let container_client = db_client.container_client(&*properties.id).await?; container_client.delete(None).await?; // recreate @@ -875,7 +875,7 @@ impl TestRunContext { .create_container(properties.clone(), options.clone()) .await?; let created = response.into_model()?; - return db_client.container_client(&created.id).await; + return db_client.container_client(&*created.id).await; } Err(e) => return Err(e), } @@ -912,7 +912,7 @@ impl TestRunContext { let satellite_client = Self::create_client_with_preferred_region(SATELLITE_REGION).await?; - let container_id = &created_properties.id; + let container_id: &str = &created_properties.id; // Wait for hub region client to successfully resolve and read the container. // Both `container_client()` (which resolves metadata via the driver) and diff --git a/sdk/cosmos/azure_data_cosmos_driver/CHANGELOG.md b/sdk/cosmos/azure_data_cosmos_driver/CHANGELOG.md index b32711074e..c9875f41db 100644 --- a/sdk/cosmos/azure_data_cosmos_driver/CHANGELOG.md +++ b/sdk/cosmos/azure_data_cosmos_driver/CHANGELOG.md @@ -8,6 +8,8 @@ ### Bugs Fixed +- RID-addressed requests are now signed and routed correctly. The driver signs them over the lowercased resource RID (the leaf for point reads, the parent for feeds), matching the service's `is_name_based = false` rule, and sends the RID raw in the request URL path. Name-addressed paths continue to be percent-encoded. Previously the driver signed the full name-style resource link and percent-encoded the RID for every request, so RID reads (for example resolving a database or container by RID) were rejected with `401 Unauthorized`. ([#4613](https://github.com/Azure/azure-sdk-for-rust/pull/4613)) + ### Other Changes ## 0.5.0 (2026-06-19) diff --git a/sdk/cosmos/azure_data_cosmos_driver/src/driver/cache/container_cache.rs b/sdk/cosmos/azure_data_cosmos_driver/src/driver/cache/container_cache.rs index 41511d4ef9..8bd57909ee 100644 --- a/sdk/cosmos/azure_data_cosmos_driver/src/driver/cache/container_cache.rs +++ b/sdk/cosmos/azure_data_cosmos_driver/src/driver/cache/container_cache.rs @@ -25,12 +25,14 @@ struct ContainerNameKey { } impl ContainerNameKey { - fn from_container(c: &ContainerReference) -> Self { - Self { + /// Builds a name key from a container reference, or `None` if the container + /// was addressed by RID (no database name is available to key on). + fn from_container(c: &ContainerReference) -> Option { + Some(Self { account_endpoint: c.account().endpoint().as_str().to_owned(), - db_name: c.database_name().to_owned(), + db_name: c.database_name()?.to_owned(), container_name: c.name().to_owned(), - } + }) } } @@ -105,6 +107,29 @@ impl ContainerCache { self.get_or_fetch_impl(&self.by_name, key, fetch_fn).await } + /// Looks up a container by RID, fetching if not cached. + /// + /// On a cache miss, calls `fetch_fn` to resolve the container from the + /// service. The resolved (RID-addressed) reference is populated into the + /// by-RID cache; the by-name cache is left untouched because a RID-addressed + /// reference has no database name to key on. + pub(crate) async fn get_or_fetch_by_rid( + &self, + account_endpoint: &str, + container_rid: &str, + fetch_fn: F, + ) -> crate::error::Result> + where + F: FnOnce() -> Fut, + Fut: std::future::Future>, + { + let key = ContainerRidKey { + account_endpoint: account_endpoint.to_owned(), + container_rid: container_rid.to_owned(), + }; + self.get_or_fetch_impl(&self.by_rid, key, fetch_fn).await + } + /// Returns a cached container looked up by name, or `None` if not cached. #[allow(dead_code)] // Used in tests; will be called from production code once lookup-by-name is wired up. pub(crate) async fn get_by_name( @@ -188,17 +213,20 @@ impl ContainerCache { /// Inserts a known-resolved container reference into both caches. /// /// If an entry already exists under either key, the existing entry is - /// preserved (first-write-wins). + /// preserved (first-write-wins). RID-addressed references are inserted only + /// into the by-RID cache, since they carry no database name to key on. pub(crate) async fn put(&self, container: ContainerReference) { let name_key = ContainerNameKey::from_container(&container); let rid_key = ContainerRidKey::from_container(&container); - let container_for_rid = container.clone(); - self.by_name - .get_or_insert_with(name_key, || async { Ok(container) }) - .await; + if let Some(name_key) = name_key { + let container_for_name = container.clone(); + self.by_name + .get_or_insert_with(name_key, || async { Ok(container_for_name) }) + .await; + } self.by_rid - .get_or_insert_with(rid_key, || async { Ok(container_for_rid) }) + .get_or_insert_with(rid_key, || async { Ok(container) }) .await; } } @@ -252,6 +280,16 @@ mod tests { ) } + fn test_container_by_rid(db_rid: &str, container_rid: &str) -> ContainerReference { + ContainerReference::new_by_rid( + test_account(), + db_rid.to_owned(), + "testcontainer".to_owned(), + container_rid.to_owned(), + &test_container_props(), + ) + } + // --- get_or_fetch_by_name --- #[tokio::test] @@ -334,6 +372,64 @@ mod tests { assert!(cache.get_by_rid(ACCOUNT_ENDPOINT, &rid).await.is_some()); } + // --- get_or_fetch_by_rid --- + + #[tokio::test] + async fn fetch_by_rid_caches_without_name_index() { + let cache = ContainerCache::new(); + let counter = Arc::new(AtomicUsize::new(0)); + + let container = test_container_by_rid("db_rid", "coll_rid"); + let container_clone = container.clone(); + let counter_clone = counter.clone(); + + let resolved = cache + .get_or_fetch_by_rid(ACCOUNT_ENDPOINT, "coll_rid", || async move { + counter_clone.fetch_add(1, Ordering::SeqCst); + Ok(container_clone) + }) + .await + .unwrap(); + + assert!(resolved.is_by_rid()); + assert_eq!(counter.load(Ordering::SeqCst), 1); + + // Retrievable by RID. + assert!(cache + .get_by_rid(ACCOUNT_ENDPOINT, "coll_rid") + .await + .is_some()); + + // A second fetch is served from cache, not the factory. + let counter2 = counter.clone(); + cache + .get_or_fetch_by_rid(ACCOUNT_ENDPOINT, "coll_rid", || async move { + counter2.fetch_add(1, Ordering::SeqCst); + Ok(test_container_by_rid("db_rid", "coll_rid")) + }) + .await + .unwrap(); + assert_eq!(counter.load(Ordering::SeqCst), 1); + } + + #[tokio::test] + async fn put_rid_only_skips_name_cache() { + let cache = ContainerCache::new(); + let container = test_container_by_rid("db_rid", "coll_rid"); + + cache.put(container).await; + + // The by-RID index is populated, the by-name index is not (no db name). + assert!(cache + .get_by_rid(ACCOUNT_ENDPOINT, "coll_rid") + .await + .is_some()); + assert!(cache + .get_by_name(ACCOUNT_ENDPOINT, "db_rid", "testcontainer") + .await + .is_none()); + } + // --- different containers --- #[tokio::test] @@ -378,8 +474,8 @@ mod tests { .await .unwrap(); - assert_eq!(r1.database_name(), "db1"); - assert_eq!(r2.database_name(), "db2"); + assert_eq!(r1.database_name(), Some("db1")); + assert_eq!(r2.database_name(), Some("db2")); } // --- get returns none --- diff --git a/sdk/cosmos/azure_data_cosmos_driver/src/driver/cosmos_driver.rs b/sdk/cosmos/azure_data_cosmos_driver/src/driver/cosmos_driver.rs index e6c4ec53cc..a6b937c2c7 100644 --- a/sdk/cosmos/azure_data_cosmos_driver/src/driver/cosmos_driver.rs +++ b/sdk/cosmos/azure_data_cosmos_driver/src/driver/cosmos_driver.rs @@ -1094,8 +1094,87 @@ impl CosmosDriver { )) } - /// Creates a new driver instance. + /// Fetches a container's metadata from the service addressing it purely by RID. /// + /// The parent database RID is derived from the container RID's encoded byte + /// layout (the first 4 decoded bytes), so no `read_database` round-trip is + /// needed. The read response supplies the container's name and partition key. + /// The resulting [`ContainerReference`] is RID-addressed (it carries no + /// database name). + async fn fetch_container_by_rid( + &self, + container_rid: &str, + ) -> crate::error::Result { + // A container RID decodes to at least 8 bytes: the first 4 identify the + // parent database, the next 4 the container. Anything shorter (e.g. a + // bare database RID) is not a container RID — fail fast rather than + // issuing a request that the service would reject. + let decoded = crate::models::resource_id::decode_rid(container_rid).map_err(|e| { + crate::error::CosmosError::builder() + .with_status(crate::error::CosmosStatus::CLIENT_INVALID_RESOURCE_ID) + .with_message(format!("invalid container RID '{container_rid}'")) + .with_source(e) + .build() + })?; + if decoded.len() < 8 { + return Err(crate::error::CosmosError::builder() + .with_status(crate::error::CosmosStatus::CLIENT_INVALID_RESOURCE_ID) + .with_message(format!( + "'{container_rid}' is not a container RID (decodes to {} bytes; a container RID requires at least 8)", + decoded.len() + )) + .with_source(std::io::Error::other("container RID too short")) + .build()); + } + let db_rid = crate::models::resource_id::ResourceId::new( + crate::models::resource_id::encode_rid(&decoded[0..4]), + ); + + let options = OperationOptions::default(); + + let container_result = self + .execute_singleton_operation( + CosmosOperation::read_container_by_rid( + self.account().clone(), + db_rid.as_str().to_owned(), + container_rid.to_owned(), + ), + options, + ) + .await?; + let container_headers = container_result.headers().clone(); + let container_diagnostics = container_result.diagnostics(); + let container_props: ContainerProperties = + container_result.into_body().into_single().map_err(|e| { + crate::error::CosmosError::builder() + .with_status(crate::error::CosmosStatus::SERIALIZATION_RESPONSE_BODY_INVALID) + .with_message("failed to deserialize container response") + .with_response_parts(crate::models::CosmosResponsePayload::new( + crate::models::ResponseBody::NoPayload, + container_headers.clone(), + )) + .with_diagnostics(container_diagnostics.clone()) + .with_source(e) + .build() + })?; + + // Prefer the authoritative RID echoed back by the service; fall back to + // the caller-supplied RID if the response omits it. + let resolved_rid = container_props + .system_properties + .rid + .clone() + .unwrap_or_else(|| container_rid.to_owned()); + + Ok(ContainerReference::new_by_rid( + self.account().clone(), + db_rid.as_str().to_owned(), + container_props.id.clone().into_owned(), + resolved_rid, + &container_props, + )) + } + /// This is internal - use [`CosmosDriverRuntime::create_driver()`] instead. pub(crate) fn new( runtime: Arc, @@ -1827,6 +1906,17 @@ impl CosmosDriver { operation: CosmosOperation, options: OperationOptions, ) -> crate::error::Result> { + // Reject mixed name/RID addressing before any work is done. This is the + // release-mode guard backing the debug-time addressing assertion: a + // reference that mixes a name-addressed parent with a RID-addressed child + // (or vice versa) signs and routes inconsistently and the gateway rejects + // it with an opaque 401. Failing here turns that into a deterministic + // client-side error for references built through any path, not just the + // SDK's ContainerClient. The check is a cheap in-memory field comparison + // and is a no-op for every consistently-addressed reference, so it does + // not change the request flow or issue any additional network calls. + operation.resource_reference().validate_addressing()?; + // PATCH is a virtual operation type: dispatch it to the dedicated // Read-Modify-Write handler before any of the standard pipeline steps // run, because the handler issues its own Read/Replace operations @@ -2141,6 +2231,38 @@ impl CosmosDriver { Ok(resolved.as_ref().clone()) } + /// Resolves a container by its RID. + /// + /// Attempts to resolve from `ContainerCache` (by-RID index) first. On a cache + /// miss, fetches metadata from the service addressing the container by RID and + /// populates the cache. The returned [`ContainerReference`] is RID-addressed + /// (it carries no database name). + pub async fn resolve_container_by_rid( + &self, + container_rid: &str, + ) -> crate::error::Result { + let endpoint = self.account().endpoint().as_str().to_owned(); + let container_rid_owned = container_rid.to_owned(); + + let resolved = self + .runtime + .container_cache() + .get_or_fetch_by_rid(&endpoint, container_rid, || async move { + self.fetch_container_by_rid(&container_rid_owned) + .await + .map_err(|err| { + crate::error::CosmosErrorBuilder::from_error(err) + .with_context(format!( + "resolve container by rid (container_rid='{container_rid_owned}')" + )) + .build() + }) + }) + .await?; + + Ok(resolved.as_ref().clone()) + } + /// Plans the execution of a Cosmos DB operation. /// /// For trivial operations (non-query or single-partition), returns a diff --git a/sdk/cosmos/azure_data_cosmos_driver/src/driver/pipeline/operation_pipeline.rs b/sdk/cosmos/azure_data_cosmos_driver/src/driver/pipeline/operation_pipeline.rs index 8d212c8553..3da02525d8 100644 --- a/sdk/cosmos/azure_data_cosmos_driver/src/driver/pipeline/operation_pipeline.rs +++ b/sdk/cosmos/azure_data_cosmos_driver/src/driver/pipeline/operation_pipeline.rs @@ -27,9 +27,9 @@ use crate::{ transport::CosmosTransport, }, models::{ - cosmos_headers::QUERY_CONTENT_TYPE, request_header_names, AccountEndpoint, ActivityId, - CosmosOperation, CosmosResponse, Credential, DefaultConsistencyLevel, - EffectivePartitionKey, OperationType, SessionToken, SubStatusCode, + cosmos_headers::QUERY_CONTENT_TYPE, encode_path_segments, request_header_names, + AccountEndpoint, ActivityId, CosmosOperation, CosmosResponse, Credential, + DefaultConsistencyLevel, EffectivePartitionKey, OperationType, SessionToken, SubStatusCode, }, options::{ HedgeThreshold, OperationOptionsView, ReadConsistencyStrategy, Region, @@ -1269,7 +1269,17 @@ fn build_transport_request( } else { format!("/{}", request_path) }; - base.set_path(&normalized); + // Name-based paths are percent-encoded so the gateway reconstructs the + // same resource link we signed (names may contain reserved characters). + // RID-based paths must be sent raw: encoding the `=` padding of a base64 + // RID makes the gateway treat the segment as a name and reject the + // RID-based signature. The authorization signature is derived from + // `paths` below (a lowercased RID for RID-addressed requests). + if paths.is_rid_based() { + base.set_path(&normalized); + } else { + base.set_path(&encode_path_segments(&normalized)); + } base }; diff --git a/sdk/cosmos/azure_data_cosmos_driver/src/driver/routing/session_container.rs b/sdk/cosmos/azure_data_cosmos_driver/src/driver/routing/session_container.rs index 0f72193e40..451eb74951 100644 --- a/sdk/cosmos/azure_data_cosmos_driver/src/driver/routing/session_container.rs +++ b/sdk/cosmos/azure_data_cosmos_driver/src/driver/routing/session_container.rs @@ -33,11 +33,15 @@ struct SessionContainerInner { name_to_rid: HashMap, } -/// Returns the `dbs/{db}/colls/{coll}` name path from a [`ContainerReference`], -/// reusing the pre-computed `name_based_path` by skipping the leading `/`. +/// Returns the container path used as the session-token index key, reusing the +/// pre-computed path by skipping the leading `/`. +/// +/// For name-addressed containers this is `dbs/{db}/colls/{coll}`; for RID-addressed +/// containers it is the RID-based path. Either way it is stable per container and +/// consistent between `set` and `resolve`, which is all the name→RID fallback needs. fn name_path(container: &ContainerReference) -> &str { - // name_based_path() returns "/dbs/{db}/colls/{coll}"; skip the leading '/'. - &container.name_based_path()[1..] + // base_path() returns "/dbs/.../colls/..."; skip the leading '/'. + &container.base_path()[1..] } impl SessionContainer { diff --git a/sdk/cosmos/azure_data_cosmos_driver/src/error/cosmos_status.rs b/sdk/cosmos/azure_data_cosmos_driver/src/error/cosmos_status.rs index 4af9df72f3..faa1d582bd 100644 --- a/sdk/cosmos/azure_data_cosmos_driver/src/error/cosmos_status.rs +++ b/sdk/cosmos/azure_data_cosmos_driver/src/error/cosmos_status.rs @@ -516,6 +516,7 @@ impl SubStatusCode { 20303 => Some("ServiceReturnedOfferWithoutId"), 20304 => Some("ClientThroughputPollerIncomplete"), 20305 => Some("ClientTopologyResolutionFailed"), + 20306 => Some("ServiceReturnedObjectWithoutRid"), // SDK Server-side codes (21xxx) - consistent across .NET and Java 21001 => Some("NameCacheIsStaleExceededRetryLimit"), @@ -1274,6 +1275,17 @@ impl SubStatusCode { /// operations. pub const CLIENT_CONTINUATION_TOKEN_NON_QUERY_OPERATION: SubStatusCode = SubStatusCode(20117); + /// A caller-supplied resource RID could not be parsed as a valid Cosmos DB + /// RID (20118). RIDs are Base64-encoded byte sequences; this is raised when + /// the bytes cannot be decoded or are too short to extract the expected + /// resource hierarchy. + pub const CLIENT_INVALID_RESOURCE_ID: SubStatusCode = SubStatusCode(20118); + + /// Name-based and RID-based addressing were mixed across the + /// database/container hierarchy (20119). A RID-addressed database requires a + /// RID-addressed container and vice versa. + pub const CLIENT_MIXED_NAME_RID_ADDRESSING: SubStatusCode = SubStatusCode(20119); + // ----- 20150-20199: SDK configuration / setup errors ----- /// Two fault-injection rules registered with the same id (20150). @@ -2037,6 +2049,19 @@ impl CosmosStatus { sub_status: Some(SubStatusCode::CLIENT_CONTINUATION_TOKEN_NON_QUERY_OPERATION), }; + /// 400 / 20118 — caller-supplied resource RID could not be parsed. + pub const CLIENT_INVALID_RESOURCE_ID: CosmosStatus = CosmosStatus { + status_code: StatusCode::BadRequest, + sub_status: Some(SubStatusCode::CLIENT_INVALID_RESOURCE_ID), + }; + + /// 400 / 20119 — name-based and RID-based addressing were mixed across the + /// database/container hierarchy. + pub const CLIENT_MIXED_NAME_RID_ADDRESSING: CosmosStatus = CosmosStatus { + status_code: StatusCode::BadRequest, + sub_status: Some(SubStatusCode::CLIENT_MIXED_NAME_RID_ADDRESSING), + }; + // Configuration / setup (HTTP 400, sub-status 20150-20199) /// 400 / 20150 — duplicate fault-injection rule id. diff --git a/sdk/cosmos/azure_data_cosmos_driver/src/models/cosmos_operation.rs b/sdk/cosmos/azure_data_cosmos_driver/src/models/cosmos_operation.rs index 519ceac986..6454f0d3b3 100644 --- a/sdk/cosmos/azure_data_cosmos_driver/src/models/cosmos_operation.rs +++ b/sdk/cosmos/azure_data_cosmos_driver/src/models/cosmos_operation.rs @@ -444,6 +444,26 @@ impl CosmosOperation { Self::new(OperationType::Read, resource_ref, None) } + /// Reads a container's properties by database and container RID. + /// + /// Like [`read_container_by_name`](Self::read_container_by_name) but addresses + /// the container by RID. Taking the raw `db_rid` and `container_rid` (rather + /// than a pre-built [`DatabaseReference`]) makes a mixed name/RID path + /// unrepresentable: the parent database reference is always constructed + /// RID-based here, so the request path is guaranteed to be fully RID-based + /// (`/dbs/{db_rid}/colls/{container_rid}`). + pub fn read_container_by_rid( + account: AccountReference, + db_rid: impl Into>, + container_rid: impl Into>, + ) -> Self { + let database = DatabaseReference::from_rid(account, db_rid.into()); + let resource_ref: CosmosResourceReference = CosmosResourceReference::from(database) + .with_resource_type(ResourceType::DocumentCollection) + .with_rid(container_rid.into()); + Self::new(OperationType::Read, resource_ref, None) + } + // ===== Data Plane Factory Methods ===== /// Creates a new item (document) in a container. diff --git a/sdk/cosmos/azure_data_cosmos_driver/src/models/cosmos_resource_reference.rs b/sdk/cosmos/azure_data_cosmos_driver/src/models/cosmos_resource_reference.rs index 0cd2302653..7384b49718 100644 --- a/sdk/cosmos/azure_data_cosmos_driver/src/models/cosmos_resource_reference.rs +++ b/sdk/cosmos/azure_data_cosmos_driver/src/models/cosmos_resource_reference.rs @@ -39,11 +39,20 @@ pub(crate) struct ResourcePaths { /// For feed: `parent.len()` → signing link = `buf[1..signing_end]` /// Always `>= 1` when `buf` is non-empty (skips the leading `/`). signing_end: usize, - /// Signing link override for offer resources. + /// Signing link override. /// - /// Offers use a lowercased RID that is unrelated to the URL path, so it - /// cannot be derived as a sub-slice of `buf`. + /// Used for resources whose signing link is a lowercased RID that is + /// unrelated to the URL path and so cannot be derived as a sub-slice of + /// `buf`. This applies to offers and to any RID-addressed resource (where + /// the master-key signature is computed over the lowercased leaf/parent RID + /// only, matching `is_name_based = false` semantics in the service). signing_override: Option, + /// When `true`, the request path is RID-addressed and must be sent to the + /// gateway **raw** (no percent-encoding). Encoding the `=` padding of a + /// base64 RID makes the gateway treat the segment as a name and reject the + /// RID-based signature. Name-addressed paths keep `false` so their segments + /// are percent-encoded as usual. + rid_based: bool, } impl ResourcePaths { @@ -52,6 +61,7 @@ impl ResourcePaths { buf: String::new(), signing_end: 0, signing_override: None, + rid_based: false, } } @@ -60,6 +70,12 @@ impl ResourcePaths { &self.buf } + /// Returns `true` if the request path is RID-addressed and must be sent raw + /// (without percent-encoding the path segments). + pub(crate) fn is_rid_based(&self) -> bool { + self.rid_based + } + /// Returns the signing link (path without the leading `/`, used for auth). pub(crate) fn signing_link(&self) -> &str { if let Some(ref s) = self.signing_override { @@ -204,7 +220,8 @@ impl CosmosResourceReference { ResourcePaths { buf, signing_end, - signing_override: None, + signing_override: self.rid_signing_override(true), + rid_based: self.is_rid_addressed(), } } @@ -248,6 +265,9 @@ impl CosmosResourceReference { /// [`link_for_signing`](Self::link_for_signing) and /// [`request_path`](Self::request_path) separately. pub(crate) fn compute_paths(&self) -> ResourcePaths { + #[cfg(debug_assertions)] + self.debug_assert_addressing_consistent(); + if self.resource_type == ResourceType::DatabaseAccount { return ResourcePaths::empty(); } @@ -264,6 +284,9 @@ impl CosmosResourceReference { buf, signing_end: 1, signing_override, + // Offers keep percent-encoding (raw RID is not required for the + // offer signing scheme, which already signs the lowercased RID). + rid_based: false, }; } @@ -283,7 +306,8 @@ impl CosmosResourceReference { ResourcePaths { buf, signing_end, - signing_override: None, + signing_override: self.rid_signing_override(true), + rid_based: self.is_rid_addressed(), } } else { // Non-feed: request_path == signing_link (modulo the leading '/'). @@ -292,13 +316,199 @@ impl CosmosResourceReference { ResourcePaths { buf, signing_end, - signing_override: None, + signing_override: self.rid_signing_override(false), + rid_based: self.is_rid_addressed(), } } } // ===== Private Helpers ===== + /// Returns whether this reference's database/container parent chain is + /// RID-addressed (`Some(true)`), name-addressed (`Some(false)`), or has no + /// parent chain to classify (`None`, e.g. account-level references). + fn parent_chain_is_rid(&self) -> Option { + if let Some(ref container) = self.container { + return Some(container.is_by_rid()); + } + if let Some(ref db) = self.database { + return Some(db.is_by_rid()); + } + None + } + + /// Detects mixed name/RID addressing across this reference's database/ + /// container parent chain (and a database/container leaf addressed directly + /// by `id`). Returns a human-readable description of the conflict, or `None` + /// when the addressing is consistent. + /// + /// Under the service's no-mix rule a RID-addressed database can only contain + /// RID-addressed containers, and vice versa; a mixed path (e.g. + /// `/dbs/{name}/colls/{rid}`) signs and routes inconsistently and the gateway + /// rejects it with an opaque `401`. + /// + /// Item and sub-resource leaf ids are intentionally exempt: a document may be + /// addressed by name within a RID-addressed container (its name is + /// independent of the container's addressing mode), so only the database and + /// container parent chain — plus a database/container leaf addressed directly + /// by `id` — are checked. + fn addressing_conflict(&self) -> Option { + if let (Some(db), Some(container)) = (self.database.as_ref(), self.container.as_ref()) { + if db.is_by_rid() != container.is_by_rid() { + return Some(format!( + "database is {} but container is {}", + if db.is_by_rid() { "RID" } else { "name" }, + if container.is_by_rid() { "RID" } else { "name" }, + )); + } + } + + // When the resource is itself a database or container addressed directly + // by the leaf `id`, that id must match the parent chain's addressing. + if matches!( + self.resource_type, + ResourceType::Database | ResourceType::DocumentCollection + ) { + if let (Some(id), Some(parent_is_rid)) = (self.id.as_ref(), self.parent_chain_is_rid()) + { + if id.rid().is_some() != parent_is_rid { + return Some(format!( + "leaf id is {} but parent chain is {}", + if id.rid().is_some() { "RID" } else { "name" }, + if parent_is_rid { "RID" } else { "name" }, + )); + } + } + } + + None + } + + /// Validates that this reference does not mix name and RID addressing. + /// + /// This is the release-mode counterpart to + /// [`debug_assert_addressing_consistent`](Self::debug_assert_addressing_consistent): + /// the debug assert turns a mixed reference into a loud panic during tests, + /// while this returns a deterministic [`CLIENT_MIXED_NAME_RID_ADDRESSING`] + /// error in every build before the request is signed and sent — converting an + /// opaque gateway `401` into a clear client-side failure. The driver calls + /// this once per operation so the guarantee holds for references built + /// through any construction path, not just the SDK's `ContainerClient`. + /// + /// [`CLIENT_MIXED_NAME_RID_ADDRESSING`]: crate::error::CosmosStatus::CLIENT_MIXED_NAME_RID_ADDRESSING + pub(crate) fn validate_addressing(&self) -> crate::error::Result<()> { + if let Some(conflict) = self.addressing_conflict() { + return Err(crate::error::CosmosError::builder() + .with_status(crate::error::CosmosStatus::CLIENT_MIXED_NAME_RID_ADDRESSING) + .with_message(format!( + "mixed name/RID addressing is not allowed ({conflict}); a RID-addressed \ + database can only contain RID-addressed containers and vice versa" + )) + .with_source(std::io::Error::other("mixed name/RID addressing")) + .build()); + } + Ok(()) + } + + /// Debug-only check that this reference does not mix name and RID addressing + /// across its database/container parent chain. + /// + /// See [`addressing_conflict`](Self::addressing_conflict) for the exact rule + /// and which leaf ids are exempt. This guard turns such a programming error + /// into a deterministic panic in debug/test builds; + /// [`validate_addressing`](Self::validate_addressing) is the release-mode + /// counterpart that returns a typed error. + #[cfg(debug_assertions)] + fn debug_assert_addressing_consistent(&self) { + if let Some(conflict) = self.addressing_conflict() { + debug_assert!(false, "mixed name/RID addressing: {conflict}"); + } + } + + /// Returns the lowercased RID to sign against when this reference is + /// RID-addressed, or `None` when it is name-addressed (in which case the + /// full, case-preserved resource link is used for signing). + /// + /// For RID-based requests the Cosmos master-key signature is computed over + /// the lowercased RID of the **signing resource** only — the leaf for point + /// operations and the parent for feed operations — not the full resource + /// link. This mirrors the `is_name_based = false` path in the service SDKs. + /// + /// `is_feed` selects the parent (feed) vs. leaf (point op) signing resource. + fn rid_signing_override(&self, is_feed: bool) -> Option { + if is_feed { + // Feed/parent-signed: parent is the account (no RID), the database, + // or the container, depending on the child resource type. + match self.resource_type { + ResourceType::DocumentCollection => self + .database + .as_ref() + .and_then(|db| db.rid()) + .map(str::to_lowercase), + ResourceType::Document + | ResourceType::StoredProcedure + | ResourceType::Trigger + | ResourceType::UserDefinedFunction + | ResourceType::PartitionKeyRange => self + .container + .as_ref() + .filter(|c| c.is_by_rid()) + .map(|c| c.rid().to_lowercase()), + // Database feed (list databases) signs the account: no RID. + _ => None, + } + } else { + // Point op: the leaf resource is the signing resource. The leaf RID + // is carried either by an explicit `id` (e.g. a container or item + // read addressed directly by RID) or, when no `id` is present, by the + // addressed container/database itself. + if let Some(rid) = self.id.as_ref().and_then(|id| id.rid()) { + return Some(rid.to_lowercase()); + } + match self.resource_type { + ResourceType::Database => self + .database + .as_ref() + .and_then(|db| db.rid()) + .map(str::to_lowercase), + ResourceType::DocumentCollection => self + .container + .as_ref() + .filter(|c| c.is_by_rid()) + .map(|c| c.rid().to_lowercase()), + _ => None, + } + } + } + + /// Returns `true` if any addressing segment of this reference is a RID, so + /// the request path must be sent raw (without percent-encoding). + /// + /// Under the no-mix addressing rule a RID database implies a RID container, + /// so checking the container and database covers database, container, and + /// item operations. The leaf `id` is also checked so the raw-path decision + /// can never diverge from [`rid_signing_override`](Self::rid_signing_override), + /// which signs over the leaf RID when `id` carries one: a request signed as + /// RID-based must always be routed RID-based (raw), otherwise the gateway + /// rejects it with an opaque `401`. Offers are handled separately and are + /// not considered RID-addressed for path-encoding purposes. + fn is_rid_addressed(&self) -> bool { + if let Some(ref container) = self.container { + if container.is_by_rid() { + return true; + } + } + if let Some(ref db) = self.database { + if db.is_by_rid() { + return true; + } + } + if self.id.as_ref().and_then(|id| id.rid()).is_some() { + return true; + } + false + } + /// Computes the full resource link for the leaf resource. /// /// This combines the parent chain (account -> database -> container) with @@ -396,8 +606,9 @@ impl CosmosResourceReference { /// `Cow::Owned` for the rare cases where no container reference is available. fn container_link(&self) -> Cow<'_, str> { if let Some(ref container) = self.container { - // Hot path: borrow the pre-computed Arc — no allocation. - return Cow::Borrowed(container.name_based_path()); + // Hot path: borrow the pre-computed Arc path for the container's + // addressing mode (name-based when named, RID-based otherwise). + return Cow::Borrowed(container.base_path()); } // If we have a database but no container, try using the leaf id. if let Some(ref id) = self.id { @@ -418,6 +629,44 @@ impl CosmosResourceReference { } } +/// Returns `true` for bytes that are RFC 3986 *unreserved* characters and may +/// appear literally in a URL path segment without percent-encoding. +fn is_unreserved(b: u8) -> bool { + b.is_ascii_alphanumeric() || matches!(b, b'-' | b'.' | b'_' | b'~') +} + +/// Percent-encodes the reserved characters in each segment of a **name-based** +/// resource path so it can be used as a URL path, while leaving `/` separators +/// intact. +/// +/// Resource names may contain reserved characters (spaces, `+`, etc.) that must +/// be percent-encoded for the gateway to reconstruct the same resource link we +/// signed. RID-addressed paths are the opposite: they must be sent **raw** (see +/// [`ResourcePaths::is_rid_based`]), because percent-encoding the `=` padding of +/// a base64 RID makes the gateway treat the segment as a name and reject the +/// RID-based signature. Callers therefore apply this only to name-based paths. +/// +/// Resource ids (names) cannot contain `/`, and Cosmos RIDs use a URL-safe base64 +/// alphabet (no `/`), so splitting on `/` to preserve separators is always safe. +/// The returned value borrows the input when no character needs encoding. +pub(crate) fn encode_path_segments(path: &str) -> Cow<'_, str> { + if path.bytes().all(|b| b == b'/' || is_unreserved(b)) { + return Cow::Borrowed(path); + } + let mut out = String::with_capacity(path.len() + 8); + for &b in path.as_bytes() { + if b == b'/' || is_unreserved(b) { + out.push(b as char); + } else { + const HEX: &[u8; 16] = b"0123456789ABCDEF"; + out.push('%'); + out.push(HEX[(b >> 4) as usize] as char); + out.push(HEX[(b & 0x0f) as usize] as char); + } + } + Cow::Owned(out) +} + // ============================================================================= // From Implementations // ============================================================================= @@ -592,6 +841,17 @@ mod tests { ) } + /// A container addressed purely by RID (no name-based path available). + fn test_container_by_rid() -> ContainerReference { + ContainerReference::new_by_rid( + test_account(), + "Lx1BAA==", + "testcontainer", + "Lx1BALxJyZ8=", + &test_container_props(), + ) + } + #[test] fn from_account_reference() { let account = test_account(); @@ -853,4 +1113,239 @@ mod tests { assert_eq!(paths.signing_link(), "abc123xyz"); assert!(paths.signing_override.is_some()); } + + // ===== RID-addressed signing + raw-path tests ===== + + #[test] + fn database_by_rid_signs_lowercased_rid() { + // A RID-addressed database read signs over the lowercased database RID + // only (not the full `/dbs/{rid}` link) and sends the path raw. + let db = DatabaseReference::from_rid(test_account(), "Lx1BAA=="); + let r: CosmosResourceReference = db.into(); + let paths = r.compute_paths(); + assert_eq!(paths.request_path(), "/dbs/Lx1BAA=="); + assert_eq!(paths.signing_link(), "lx1baa=="); + assert!(paths.is_rid_based()); + // link_for_signing returns the bare lowercased RID (no leading '/'). + assert_eq!(r.link_for_signing(), "lx1baa=="); + } + + #[test] + fn database_by_name_signs_full_link() { + // Name-addressed databases are unchanged: full, case-preserved link and + // a percent-encodable (non-raw) path. + let db = DatabaseReference::from_name(test_account(), "MyDb"); + let r: CosmosResourceReference = db.into(); + let paths = r.compute_paths(); + assert_eq!(paths.signing_link(), "dbs/MyDb"); + assert!(!paths.is_rid_based()); + } + + #[test] + fn container_by_rid_signs_lowercased_rid() { + let r: CosmosResourceReference = test_container_by_rid().into(); + let paths = r.compute_paths(); + assert_eq!(paths.request_path(), "/dbs/Lx1BAA==/colls/Lx1BALxJyZ8="); + assert_eq!(paths.signing_link(), "lx1balxjyz8="); + assert!(paths.is_rid_based()); + assert_eq!(r.link_for_signing(), "lx1balxjyz8="); + } + + #[test] + fn container_read_by_rid_via_id_signs_leaf_rid() { + // A cold-cache container read is built as: database by RID + leaf `id` + // by RID, with `container == None`. The signing resource is the leaf + // collection RID carried by `id`. + let db = DatabaseReference::from_rid(test_account(), "Lx1BAA=="); + let r = CosmosResourceReference::from(db) + .with_resource_type(ResourceType::DocumentCollection) + .with_rid("Lx1BALxJyZ8=".into()); + let paths = r.compute_paths(); + assert_eq!(paths.request_path(), "/dbs/Lx1BAA==/colls/Lx1BALxJyZ8="); + assert_eq!(paths.signing_link(), "lx1balxjyz8="); + assert!(paths.is_rid_based()); + } + + #[test] + fn leaf_rid_forces_raw_path_even_under_name_parent() { + // Release-mode safety net for the raw-path/signing invariant: whenever + // the signing override is a leaf RID (carried by `id`), the path must + // also be sent raw. If `is_rid_addressed()` only inspected the parent, + // this shape would be signed RID-based but routed name-encoded -> opaque + // 401. The helpers are exercised directly (rather than via compute_paths) + // because the debug-only consistency assert rejects this mixed shape; see + // `mixed_name_parent_rid_leaf_panics_in_debug`. + let db = DatabaseReference::from_name(test_account(), "testdb"); + let r = CosmosResourceReference::from(db) + .with_resource_type(ResourceType::DocumentCollection) + .with_rid("Lx1BALxJyZ8=".into()); + // Signed over the lowercased leaf RID... + assert_eq!( + r.rid_signing_override(false), + Some("lx1balxjyz8=".to_owned()) + ); + // ...so the path must be reported raw to match. + assert!(r.is_rid_addressed()); + } + + #[test] + #[cfg(debug_assertions)] + #[should_panic(expected = "mixed name/RID addressing")] + fn mixed_name_parent_rid_leaf_panics_in_debug() { + // A name-addressed database parent with a RID-addressed container leaf is + // an invalid, mixable shape; compute_paths must fail fast in debug builds. + let db = DatabaseReference::from_name(test_account(), "testdb"); + let r = CosmosResourceReference::from(db) + .with_resource_type(ResourceType::DocumentCollection) + .with_rid("Lx1BALxJyZ8=".into()); + let _ = r.compute_paths(); + } + + #[test] + fn validate_addressing_rejects_mixed_name_parent_rid_leaf() { + // Release-mode guard: the same mixed shape that panics in debug must + // return a typed CLIENT_MIXED_NAME_RID_ADDRESSING error in every build, + // so the driver fails deterministically before signing instead of + // emitting an opaque 401. + let db = DatabaseReference::from_name(test_account(), "testdb"); + let r = CosmosResourceReference::from(db) + .with_resource_type(ResourceType::DocumentCollection) + .with_rid("Lx1BALxJyZ8=".into()); + let err = r + .validate_addressing() + .expect_err("mixed addressing must be rejected"); + assert_eq!( + err.status(), + crate::error::CosmosStatus::CLIENT_MIXED_NAME_RID_ADDRESSING + ); + } + + #[test] + fn validate_addressing_accepts_consistent_addressing() { + // A fully name-addressed and a fully RID-addressed reference are both + // consistent, so validation is a no-op (returns Ok) and the flow is + // unchanged. + let name_ref: CosmosResourceReference = test_container().into(); + name_ref + .validate_addressing() + .expect("name addressing is consistent"); + + let rid_ref: CosmosResourceReference = test_container_by_rid().into(); + rid_ref + .validate_addressing() + .expect("RID addressing is consistent"); + } + + #[test] + fn container_by_name_signs_full_link() { + let r: CosmosResourceReference = test_container().into(); + let paths = r.compute_paths(); + assert_eq!(paths.signing_link(), "dbs/testdb/colls/testcontainer"); + assert!(!paths.is_rid_based()); + } + + #[test] + fn container_feed_under_rid_db_signs_parent_db_rid() { + // Listing containers under a RID-addressed database signs the parent + // database RID (lowercased) and sends the path raw. + let db = DatabaseReference::from_rid(test_account(), "Lx1BAA=="); + let r = CosmosResourceReference::from(db) + .with_resource_type(ResourceType::DocumentCollection) + .into_feed_reference(); + let paths = r.compute_paths(); + assert_eq!(paths.request_path(), "/dbs/Lx1BAA==/colls"); + assert_eq!(paths.signing_link(), "lx1baa=="); + assert!(paths.is_rid_based()); + } + + #[test] + fn item_feed_on_rid_container_signs_parent_container_rid() { + // Query/create items on a RID-addressed container signs the parent + // container RID (lowercased). Covers both compute_paths (feed) and + // compute_feed_paths (Create/Upsert). + let feed = CosmosResourceReference::from(test_container_by_rid()) + .with_resource_type(ResourceType::Document) + .into_feed_reference(); + let paths = feed.compute_paths(); + assert_eq!( + paths.request_path(), + "/dbs/Lx1BAA==/colls/Lx1BALxJyZ8=/docs" + ); + assert_eq!(paths.signing_link(), "lx1balxjyz8="); + assert!(paths.is_rid_based()); + + // Create/Upsert path: an ItemReference on a RID container. + let item = + ItemReference::from_name(&test_container_by_rid(), PartitionKey::from("pk1"), "doc1"); + let item_ref: CosmosResourceReference = item.into(); + let feed_paths = item_ref.compute_feed_paths(); + assert_eq!( + feed_paths.request_path(), + "/dbs/Lx1BAA==/colls/Lx1BALxJyZ8=/docs" + ); + assert_eq!(feed_paths.signing_link(), "lx1balxjyz8="); + assert!(feed_paths.is_rid_based()); + } + + #[test] + fn name_addressed_paths_are_not_rid_based() { + // Sanity: every name-addressed reference keeps rid_based == false so the + // URL path continues to be percent-encoded. + let db = DatabaseReference::from_name(test_account(), "mydb"); + assert!(!CosmosResourceReference::from(db) + .compute_paths() + .is_rid_based()); + assert!(!CosmosResourceReference::from(test_container()) + .compute_paths() + .is_rid_based()); + let item = ItemReference::from_name(&test_container(), PartitionKey::from("pk1"), "doc1"); + assert!(!CosmosResourceReference::from(item) + .compute_paths() + .is_rid_based()); + } + + #[test] + fn offer_is_not_rid_based_for_path_encoding() { + // Offers sign the lowercased RID but keep percent-encoding (rid_based + // stays false) — their behavior is intentionally left unchanged. + let r = CosmosResourceReference::from(test_account()) + .with_resource_type(ResourceType::Offer) + .with_rid("ABC123XYZ".into()); + let paths = r.compute_paths(); + assert_eq!(paths.signing_link(), "abc123xyz"); + assert!(!paths.is_rid_based()); + } + + #[test] + fn encode_path_segments_borrows_when_safe() { + let p = "/dbs/mydb/colls/mycoll/docs/item1"; + assert!(matches!(encode_path_segments(p), Cow::Borrowed(_))); + assert_eq!(encode_path_segments(p), p); + } + + #[test] + fn encode_path_segments_encodes_reserved_padding() { + // The helper still percent-encodes base64 padding (`=`) when applied — + // it is only invoked for name-based paths now, but the encoding itself + // is unchanged. + let p = "/dbs/qjQBAA==/colls/qjQBAOWXnF4="; + assert_eq!( + encode_path_segments(p), + "/dbs/qjQBAA%3D%3D/colls/qjQBAOWXnF4%3D" + ); + } + + #[test] + fn encode_path_segments_preserves_separators_and_unreserved() { + // `/` separators stay literal; unreserved chars (`-` `_` `.` `~`) are kept. + let p = "/dbs/Adt-AA==/colls/a_b.c~d"; + assert_eq!(encode_path_segments(p), "/dbs/Adt-AA%3D%3D/colls/a_b.c~d"); + } + + #[test] + fn encode_path_segments_encodes_other_reserved() { + // Reserved characters that could appear in a name are encoded too. + let p = "/dbs/a+b c"; + assert_eq!(encode_path_segments(p), "/dbs/a%2Bb%20c"); + } } diff --git a/sdk/cosmos/azure_data_cosmos_driver/src/models/mod.rs b/sdk/cosmos/azure_data_cosmos_driver/src/models/mod.rs index 4f6cea6b1d..67c1430ea6 100644 --- a/sdk/cosmos/azure_data_cosmos_driver/src/models/mod.rs +++ b/sdk/cosmos/azure_data_cosmos_driver/src/models/mod.rs @@ -52,6 +52,7 @@ pub use cosmos_headers::{ CosmosResponseHeaders, MaxItemCountHint, OfferAutoscaleSettings, }; pub use cosmos_operation::CosmosOperation; +pub(crate) use cosmos_resource_reference::encode_path_segments; pub use cosmos_resource_reference::CosmosResourceReference; pub(crate) use cosmos_resource_reference::ResourcePaths; pub use cosmos_response::CosmosResponse; diff --git a/sdk/cosmos/azure_data_cosmos_driver/src/models/resource_reference.rs b/sdk/cosmos/azure_data_cosmos_driver/src/models/resource_reference.rs index e92014ed73..da467ec558 100644 --- a/sdk/cosmos/azure_data_cosmos_driver/src/models/resource_reference.rs +++ b/sdk/cosmos/azure_data_cosmos_driver/src/models/resource_reference.rs @@ -101,10 +101,12 @@ impl DatabaseReference { /// A resolved reference to a Cosmos DB container. /// -/// Always carries both the name-based and RID-based identifiers for the container -/// and its parent database, along with immutable container properties (partition key -/// definition and unique key policy). This guarantees that both addressing modes -/// are available without additional I/O. +/// Always carries the RID-based identifiers for the container and its parent +/// database, along with immutable container properties (partition key definition +/// and unique key policy). When the container was resolved by name, the name-based +/// identifiers (parent database name and name-based path) are carried too; when it +/// was resolved purely by RID they are absent, since the parent database name is +/// not available in that mode. /// /// Instances are created via async factory methods that resolve the container /// metadata from the Cosmos DB service or cache. @@ -112,36 +114,55 @@ impl DatabaseReference { /// ## Equality and Hashing /// /// Two `ContainerReference` values are considered equal if they refer to the same -/// account, container RID, and container name. This detects both delete + recreate -/// (same name, different RID) and rename scenarios (same RID, different name). +/// account and container RID. The container name and addressing mode are +/// deliberately excluded so that a name-resolved reference and a RID-resolved +/// reference for the same physical container compare equal — they collapse to a +/// single cache or throughput-registry key. #[derive(Clone, Debug)] #[non_exhaustive] pub struct ContainerReference { /// Reference to the parent account. account: AccountReference, - /// The database user-provided name. - db_name: ResourceName, /// The database internal RID. db_rid: ResourceId, /// The container user-provided name. + /// + /// Always available, even for RID-addressed references, because the service + /// returns the container's `id` in the container read response. container_name: ResourceName, /// The container internal RID. container_rid: ResourceId, /// Partition key definition for this container. partition_key_definition: PartitionKeyDefinition, - /// Pre-computed name-based path: `/dbs/{db_name}/colls/{container_name}`. + /// Pre-computed RID-based path: `/dbs/{db_rid}/colls/{container_rid}`. /// /// Stored as `Arc` so cloning `ContainerReference` is cheap (atomic refcount). - name_based_path: Arc, - /// Pre-computed RID-based path: `/dbs/{db_rid}/colls/{container_rid}`. rid_based_path: Arc, + /// Name-based addressing data. `Some` when the container was resolved by name, + /// `None` when resolved purely by RID — the parent database name is unavailable + /// in that mode, so no name-based path can be constructed. + name_addressing: Option, +} + +/// Name-based addressing data for a [`ContainerReference`]. +/// +/// Grouped so the database name and the pre-computed name path are either both +/// present (name mode) or both absent (RID mode) — never a mix. +#[derive(Clone, Debug)] +struct NameAddressing { + /// The database user-provided name. + db_name: ResourceName, + /// Pre-computed name-based path: `/dbs/{db_name}/colls/{container_name}`. + name_based_path: Arc, } impl PartialEq for ContainerReference { fn eq(&self, other: &Self) -> bool { - self.account == other.account - && self.container_rid == other.container_rid - && self.container_name == other.container_name + // An account + container RID uniquely identifies a physical container, + // independent of how it was addressed (name or RID). Keeping the name out + // of equality ensures name-resolved and RID-resolved references for the + // same container compare equal and collapse to one cache/registry key. + self.account == other.account && self.container_rid == other.container_rid } } @@ -151,7 +172,6 @@ impl Hash for ContainerReference { fn hash(&self, state: &mut H) { self.account.hash(state); self.container_rid.hash(state); - self.container_name.hash(state); } } @@ -183,13 +203,48 @@ impl ContainerReference { let rid_based_path: Arc = format!("/dbs/{}/colls/{}", db_rid, container_rid).into(); Self { account, - db_name, db_rid, container_name, container_rid, partition_key_definition: container_properties.partition_key.clone(), - name_based_path, rid_based_path, + name_addressing: Some(NameAddressing { + db_name, + name_based_path, + }), + } + } + + /// Creates a container reference addressed purely by RID. + /// + /// Used when a container is resolved from a RID, where the parent database + /// *name* is not available. Only the RID-based path is constructed; the + /// name-based accessors ([`database_name`](Self::database_name), + /// [`name_based_path`](Self::name_based_path)) return `None`. The container's + /// own name is still recorded because the service returns it in the container + /// read response. + /// + /// Not exposed publicly — use [`CosmosDriver::resolve_container_by_rid()`](crate::driver::CosmosDriver::resolve_container_by_rid) + /// to obtain a RID-addressed container reference. + pub(crate) fn new_by_rid( + account: AccountReference, + db_rid: impl Into, + container_name: impl Into, + container_rid: impl Into, + container_properties: &crate::models::ContainerProperties, + ) -> Self { + let db_rid: ResourceId = db_rid.into(); + let container_name: ResourceName = container_name.into(); + let container_rid: ResourceId = container_rid.into(); + let rid_based_path: Arc = format!("/dbs/{}/colls/{}", db_rid, container_rid).into(); + Self { + account, + db_rid, + container_name, + container_rid, + partition_key_definition: container_properties.partition_key.clone(), + rid_based_path, + name_addressing: None, } } @@ -208,9 +263,9 @@ impl ContainerReference { self.container_rid.as_str() } - /// Returns the database name. - pub fn database_name(&self) -> &str { - self.db_name.as_str() + /// Returns the database name, or `None` if this container was addressed by RID. + pub fn database_name(&self) -> Option<&str> { + self.name_addressing.as_ref().map(|n| n.db_name.as_str()) } /// Returns the database RID. @@ -223,15 +278,36 @@ impl ContainerReference { &self.partition_key_definition } - /// Returns the name-based relative path: `/dbs/{db_name}/colls/{container_name}` - pub fn name_based_path(&self) -> &str { - &self.name_based_path + /// Returns the name-based relative path `/dbs/{db_name}/colls/{container_name}`, + /// or `None` if this container was addressed by RID. + pub fn name_based_path(&self) -> Option<&str> { + self.name_addressing + .as_ref() + .map(|n| n.name_based_path.as_ref()) } /// Returns the RID-based relative path: `/dbs/{db_rid}/colls/{container_rid}` pub fn rid_based_path(&self) -> &str { &self.rid_based_path } + + /// Returns the effective relative path for operations against this container: + /// the name-based path when addressed by name, otherwise the RID-based path. + /// + /// Item and sub-resource links are built on top of this base, so item `id`s + /// remain name-based while the container/database portion follows the + /// container's addressing mode. + pub fn base_path(&self) -> &str { + match &self.name_addressing { + Some(n) => &n.name_based_path, + None => &self.rid_based_path, + } + } + + /// Returns `true` if this container was addressed purely by RID. + pub fn is_by_rid(&self) -> bool { + self.name_addressing.is_none() + } } // ============================================================================= @@ -271,7 +347,7 @@ impl ItemReference { item_name: impl Into>, ) -> Self { let name = ResourceName::new(item_name); - let resource_link = format!("{}/docs/{}", container.name_based_path(), name); + let resource_link = format!("{}/docs/{}", container.base_path(), name); Self { container: container.clone(), partition_key, @@ -371,11 +447,7 @@ impl StoredProcedureReference { stored_procedure_name: impl Into>, ) -> Self { let stored_procedure_name = ResourceName::new(stored_procedure_name); - let resource_link = format!( - "{}/sprocs/{}", - container.name_based_path(), - stored_procedure_name - ); + let resource_link = format!("{}/sprocs/{}", container.base_path(), stored_procedure_name); Self { container: container.clone(), stored_procedure_identifier: ResourceIdentifier::by_name(stored_procedure_name), @@ -466,7 +538,7 @@ impl TriggerReference { trigger_name: impl Into>, ) -> Self { let trigger_name = ResourceName::new(trigger_name); - let resource_link = format!("{}/triggers/{}", container.name_based_path(), trigger_name); + let resource_link = format!("{}/triggers/{}", container.base_path(), trigger_name); Self { container: container.clone(), trigger_identifier: ResourceIdentifier::by_name(trigger_name), @@ -549,7 +621,7 @@ impl UdfReference { udf_name: impl Into>, ) -> Self { let udf_name = ResourceName::new(udf_name); - let resource_link = format!("{}/udfs/{}", container.name_based_path(), udf_name); + let resource_link = format!("{}/udfs/{}", container.base_path(), udf_name); Self { container: container.clone(), udf_identifier: ResourceIdentifier::by_name(udf_name), @@ -676,6 +748,28 @@ mod tests { ) } + fn make_container_reference_by_rid() -> ContainerReference { + let account = AccountReference::with_master_key( + Url::parse("https://example.documents.azure.com:443/").unwrap(), + "test-key", + ); + let partition_key: PartitionKeyDefinition = + serde_json::from_str(r#"{"paths":["/tenantId"]}"#).unwrap(); + let container_properties = ContainerProperties { + id: "my-container".into(), + partition_key, + system_properties: Default::default(), + }; + + ContainerReference::new_by_rid( + account, + "db-rid", + "my-container", + "container-rid", + &container_properties, + ) + } + #[test] fn container_partition_key_definition_is_available() { let container = make_container_reference(); @@ -684,4 +778,56 @@ mod tests { assert_eq!(partition_key_definition.paths().len(), 1); assert_eq!(partition_key_definition.paths()[0].as_ref(), "/tenantId"); } + + #[test] + fn named_container_exposes_name_addressing() { + let container = make_container_reference(); + + assert!(!container.is_by_rid()); + assert_eq!(container.name(), "my-container"); + assert_eq!(container.database_name(), Some("my-db")); + assert_eq!( + container.name_based_path(), + Some("/dbs/my-db/colls/my-container") + ); + assert_eq!( + container.rid_based_path(), + "/dbs/db-rid/colls/container-rid" + ); + // Base path follows the name-based path in name mode. + assert_eq!(container.base_path(), "/dbs/my-db/colls/my-container"); + } + + #[test] + fn rid_container_has_no_name_addressing() { + let container = make_container_reference_by_rid(); + + assert!(container.is_by_rid()); + // The container's own name is still available from the read response. + assert_eq!(container.name(), "my-container"); + // But the parent database name and name-based path are not. + assert_eq!(container.database_name(), None); + assert_eq!(container.name_based_path(), None); + assert_eq!(container.database_rid(), "db-rid"); + // Base path falls back to the RID-based path in RID mode. + assert_eq!(container.base_path(), "/dbs/db-rid/colls/container-rid"); + } + + #[test] + fn name_and_rid_references_to_same_container_are_equal() { + // Equality keys on account + container RID only, so a name-resolved and a + // RID-resolved reference to the same physical container must be equal and + // hash identically (they share one cache/registry slot). + let by_name = make_container_reference(); + let by_rid = make_container_reference_by_rid(); + + assert_eq!(by_name, by_rid); + + let hash = |c: &ContainerReference| { + let mut hasher = std::collections::hash_map::DefaultHasher::new(); + c.hash(&mut hasher); + hasher.finish() + }; + assert_eq!(hash(&by_name), hash(&by_rid)); + } }