Skip to content

Commit 8f027b8

Browse files
analogrelayCopilot
andauthored
Cosmos: Microbenchmark and some optimizations (#4159)
I wrote up a quick microbenchmark of the driver's `execute_operation` code path, which excludes the I/O entirely by using a fixed Mock Transport (gated behind an internal feature flag). Using that, I was able to get a 40+% improvement in performance (again, these times **exclude** I/O, but should reflect CPU usage reductions). Before: ``` point_read time: [16.691 µs 16.734 µs 16.783 µs] ``` After ``` point_read time: [9.7474 µs 9.7864 µs 9.8319 µs] ``` Obviously, these are tiny numbers, but I suspect they add up under high concurrency. We won't know for sure until this is run in the benchmarks. They should be safe, so I'm game to merge and see, but we could also try running the tip of this branch in the perf infra (cc @tvaron3 ) to get real results. The optimizations I worked on were predominantly to reduce duplicate parsing and `HashMap::insert`/`HashMap::get` calls. Rather than calling `HashMap::get_optional_str(...)` for each header when materializing a `CosmosResponseHeaders`, we now iterate the map. Rather than re-parsing `url::Url`, the `AccountRegion` type now directly deserializes the gateway response into an `AccountEndpoint`, allowing us to reuse the same `url::Url` for each request. Rather than constantly `format!`-ing the `CosmosResourceReference`, we now produce a single `ResourcePaths` from it and allow getting slices from it for the various purposes. The benchmarks include instructions for running both profiled and unprofiled runs, and [criterion](https://criterion-rs.github.io/) does a great job of tracking results and showing you improvements over time. Having benchmark infra like this in place will be useful as we keep going. --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent e753b8e commit 8f027b8

40 files changed

Lines changed: 1599 additions & 428 deletions

Cargo.lock

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ members = [
1313
"sdk/core/azure_core_test_macros",
1414
"sdk/core/azure_core_opentelemetry",
1515
"sdk/cosmos/azure_data_cosmos",
16+
"sdk/cosmos/azure_data_cosmos_benchmarks",
1617
"sdk/cosmos/azure_data_cosmos_driver",
1718
"sdk/cosmos/azure_data_cosmos_macros",
1819
"sdk/cosmos/azure_data_cosmos_perf",
@@ -175,6 +176,13 @@ uuid = { version = "1.20", features = ["v4"] }
175176
windows = { version = "0.62", default-features = false }
176177
zip = { version = "8.2", default-features = false, features = ["deflate"] }
177178

179+
# Profile for running benchmarks
180+
# Inherits release optimizations but keeps debug symbols so that tools
181+
# can produce annotated output.
182+
[profile.bench]
183+
inherits = "release"
184+
debug = 1
185+
178186
[workspace.lints.clippy]
179187
large_futures = "deny"
180188
uninlined_format_args = "allow"

sdk/core/typespec/src/http/headers.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,11 @@ impl Headers {
134134
Self::default()
135135
}
136136

137+
/// Create a new headers collection with at least the specified capacity.
138+
pub fn with_capacity(n: usize) -> Self {
139+
Self(std::collections::HashMap::with_capacity(n))
140+
}
141+
137142
/// Gets the headers represented by `H`, or return an error if the header is not found.
138143
pub fn get<H: FromHeaders>(&self) -> crate::Result<H> {
139144
match H::from_headers(self) {

sdk/cosmos/.cspell.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
],
55
"ignoreWords": [
66
"accountname",
7+
"ALPN",
78
"apacsoutheast",
89
"Appleby's",
910
"australiacentral",
@@ -42,6 +43,7 @@
4243
"failback",
4344
"failovers",
4445
"FILETIME",
46+
"flamegraph",
4547
"fract",
4648
"francecentral",
4749
"francesouth",
@@ -94,6 +96,7 @@
9496
"PPCB",
9597
"pushback",
9698
"qname",
99+
"RAII",
97100
"readfeed",
98101
"replicaset",
99102
"reqs",

sdk/cosmos/azure_data_cosmos/src/driver_bridge.rs

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -24,18 +24,20 @@ use crate::{
2424
/// Converts a driver [`DriverResponse`] into the SDK's typed [`CosmosResponse<T>`].
2525
///
2626
/// This reconstructs an `azure_core::Response<T>` from the driver's raw bytes,
27-
/// status code, and headers, then wraps it in the SDK's response type.
27+
/// status code, and headers, then wraps it in the SDK's response type using
28+
/// the pre-parsed headers from the driver to avoid a redundant parse.
2829
pub(crate) fn driver_response_to_cosmos_response<T>(
2930
driver_response: DriverResponse,
3031
) -> CosmosResponse<T> {
3132
let status_code: StatusCode = driver_response.status().status_code();
32-
let headers = driver_response_headers_to_headers(driver_response.headers());
33+
let cosmos_headers = driver_response.headers().clone();
34+
let headers = driver_response_headers_to_headers(&cosmos_headers);
3335
let body = driver_response.into_body();
3436

3537
let raw_response = RawResponse::from_bytes(status_code, headers, Bytes::from(body));
3638
let typed_response: Response<T> = raw_response.into();
3739

38-
CosmosResponse::from_response(typed_response)
40+
CosmosResponse::from_driver_response(typed_response, cosmos_headers)
3941
}
4042

4143
/// Converts driver [`CosmosResponseHeaders`] into raw [`Headers`] for the SDK response.

sdk/cosmos/azure_data_cosmos/src/fault_injection/http_client.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// Copyright (c) Microsoft Corporation. All rights reserved.
22
// Licensed under the MIT License.
33

4+
// cSpell:ignore evals
5+
46
use super::result::FaultInjectionResult;
57
use super::rule::FaultInjectionRule;
68
use super::FaultInjectionErrorType;

sdk/cosmos/azure_data_cosmos/src/models/cosmos_response.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -50,11 +50,14 @@ impl<T> CosmosResponse<T> {
5050
}
5151
}
5252

53-
/// Creates a `CosmosResponse` from a typed response without a request.
53+
/// Creates a `CosmosResponse` from a typed response and pre-parsed headers.
5454
///
55-
/// Used for driver-routed operations where no `CosmosRequest` is available.
56-
pub(crate) fn from_response(response: Response<T>) -> Self {
57-
let cosmos_headers = CosmosResponseHeaders::from_headers(response.headers());
55+
/// Used by the driver bridge to avoid re-parsing headers that were already
56+
/// parsed by the driver pipeline.
57+
pub(crate) fn from_driver_response(
58+
response: Response<T>,
59+
cosmos_headers: CosmosResponseHeaders,
60+
) -> Self {
5861
let diagnostics = CosmosDiagnostics::from_headers(&cosmos_headers);
5962
Self {
6063
response,
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
[package]
2+
name = "azure_data_cosmos_benchmarks"
3+
version = "0.1.0"
4+
description = "Benchmarks for the Azure Cosmos DB Rust driver"
5+
publish = false
6+
authors.workspace = true
7+
edition.workspace = true
8+
license.workspace = true
9+
repository.workspace = true
10+
rust-version.workspace = true
11+
12+
[[bench]]
13+
name = "point_read"
14+
harness = false
15+
16+
[dependencies]
17+
async-trait.workspace = true
18+
azure_core = { workspace = true }
19+
azure_data_cosmos_driver = { path = "../azure_data_cosmos_driver", features = [
20+
"__internal_mocking",
21+
] }
22+
tokio = { workspace = true, features = ["rt-multi-thread", "time"] }
23+
url.workspace = true
24+
25+
[dev-dependencies]
26+
criterion.workspace = true
27+
28+
[lints]
29+
workspace = true
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
# azure_data_cosmos_benchmarks
2+
3+
Criterion benchmarks for the Azure Cosmos DB Rust driver. All benchmarks
4+
replace the reqwest transport with an in-memory mock, so they measure driver
5+
overhead only — routing, signing, retry state, response parsing, and session
6+
token management — with no network I/O.
7+
8+
## Running the benchmarks
9+
10+
### Standard latency benchmark
11+
12+
```sh
13+
cargo bench -p azure_data_cosmos_benchmarks --bench point_read
14+
```
15+
16+
Results are written to `target/criterion/point_read/`.
17+
18+
### CPU flamegraph (pprof)
19+
20+
Pass `--profile-time <seconds>` to enable pprof sampling:
21+
22+
```sh
23+
cargo bench -p azure_data_cosmos_benchmarks --bench point_read -- --profile-time 30
24+
```
25+
26+
The flamegraph SVG is written to
27+
`target/criterion/point_read/profile/point_read.svg`.
28+
29+
For readable symbol names, build with debug symbols:
30+
31+
```sh
32+
cargo bench -p azure_data_cosmos_benchmarks --profile bench --bench point_read -- --profile-time 30
33+
```
34+
35+
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
// Copyright (c) Microsoft Corporation. All rights reserved.
2+
// Licensed under the MIT License.
3+
4+
//! Criterion benchmark for `CosmosDriver::execute_operation` — point read.
5+
//!
6+
//! By default the reqwest transport is replaced with an in-memory mock so that
7+
//! the benchmark measures driver overhead (routing, signing, retry state,
8+
//! response parsing, session token management) without any network I/O.
9+
//!
10+
//! Set `AZURE_BENCH_MODE=live` to run against a real Cosmos DB endpoint. See
11+
//! `azure_data_cosmos_benchmarks::setup_live` for the required environment
12+
//! variables.
13+
//!
14+
//! Cache priming (account metadata, container metadata) is performed in setup,
15+
//! outside the measured iteration loop.
16+
//!
17+
//! # CPU flamegraph profiling
18+
//!
19+
//! Run with `--profile-time` to generate a flamegraph SVG via pprof:
20+
//!
21+
//! ```text
22+
//! cargo bench -p azure_data_cosmos_benchmarks --bench point_read -- --profile-time 30
23+
//! ```
24+
//!
25+
//! Output: `target/criterion/point_read/profile/flamegraph.svg`
26+
27+
use azure_data_cosmos_benchmarks::{self as common, BenchConfig};
28+
29+
use std::time::Duration;
30+
31+
use azure_data_cosmos_driver::{models::CosmosOperation, options::OperationOptions};
32+
use criterion::{criterion_group, criterion_main, Criterion, Throughput};
33+
use tokio::runtime::Builder;
34+
35+
fn bench_point_read(c: &mut Criterion) {
36+
let rt = Builder::new_current_thread()
37+
.enable_all()
38+
.build()
39+
.expect("failed to create tokio runtime");
40+
41+
let mut group = c.benchmark_group("point_read");
42+
group.throughput(Throughput::Elements(1));
43+
44+
match common::load_bench_config() {
45+
BenchConfig::Mock => {
46+
let (driver, item_ref) = rt.block_on(common::setup());
47+
group.bench_function("mock", |b| {
48+
b.to_async(&rt).iter(|| async {
49+
driver
50+
.execute_operation(
51+
CosmosOperation::read_item(item_ref.clone()),
52+
OperationOptions::default(),
53+
)
54+
.await
55+
.expect("execute_operation failed")
56+
});
57+
});
58+
}
59+
BenchConfig::Live => {
60+
let (driver, item_ref) = rt.block_on(common::setup_live());
61+
group
62+
.sample_size(50)
63+
.measurement_time(Duration::from_secs(30));
64+
group.bench_function("live", |b| {
65+
b.to_async(&rt).iter(|| async {
66+
driver
67+
.execute_operation(
68+
CosmosOperation::read_item(item_ref.clone()),
69+
OperationOptions::default(),
70+
)
71+
.await
72+
.expect("execute_operation failed")
73+
});
74+
});
75+
}
76+
}
77+
78+
group.finish();
79+
}
80+
81+
criterion_group!(benches, bench_point_read);
82+
criterion_main!(benches);

0 commit comments

Comments
 (0)