From dd957b3af9402a938c2aa24b75b0fe5747720011 Mon Sep 17 00:00:00 2001 From: Kathie Huang Date: Mon, 13 Apr 2026 17:01:46 -0400 Subject: [PATCH 01/10] Create datadog-metrics-collector crate to collect instance value with tags --- Cargo.lock | 39 +++++++ crates/datadog-metrics-collector/Cargo.toml | 14 +++ .../datadog-metrics-collector/src/instance.rs | 109 ++++++++++++++++++ crates/datadog-metrics-collector/src/lib.rs | 11 ++ crates/datadog-metrics-collector/src/tags.rs | 55 +++++++++ 5 files changed, 228 insertions(+) create mode 100644 crates/datadog-metrics-collector/Cargo.toml create mode 100644 crates/datadog-metrics-collector/src/instance.rs create mode 100644 crates/datadog-metrics-collector/src/lib.rs create mode 100644 crates/datadog-metrics-collector/src/tags.rs diff --git a/Cargo.lock b/Cargo.lock index 8970e45..c1bc139 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -487,6 +487,15 @@ dependencies = [ "zstd", ] +[[package]] +name = "datadog-metrics-collector" +version = "0.1.0" +dependencies = [ + "dogstatsd", + "libdd-common 1.1.0", + "tracing", +] + [[package]] name = "datadog-opentelemetry" version = "0.3.0" @@ -540,6 +549,7 @@ version = "0.1.0" dependencies = [ "datadog-fips", "datadog-logs-agent", + "datadog-metrics-collector", "datadog-trace-agent", "dogstatsd", "libdd-trace-utils 3.0.0", @@ -1431,6 +1441,35 @@ version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" +[[package]] +name = "libdd-common" +version = "1.1.0" +source = "git+https://github.com/DataDog/libdatadog?rev=d52ee90209cb12a28bdda0114535c1a985a29d95#d52ee90209cb12a28bdda0114535c1a985a29d95" +dependencies = [ + "anyhow", + "cc", + "const_format", + "futures", + "futures-core", + "futures-util", + "hex", + "http", + "http-body", + "http-body-util", + "hyper", + "hyper-util", + "libc", + "nix", + "pin-project", + "regex", + "serde", + "static_assertions", + "thiserror 1.0.69", + "tokio", + "tower-service", + "windows-sys 0.52.0", +] + [[package]] name = "libdd-common" version = "2.0.1" diff --git a/crates/datadog-metrics-collector/Cargo.toml b/crates/datadog-metrics-collector/Cargo.toml new file mode 100644 index 0000000..0153e63 --- /dev/null +++ b/crates/datadog-metrics-collector/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "datadog-metrics-collector" +version = "0.1.0" +edition.workspace = true +license.workspace = true +description = "Collector to read, compute, and submit enhanced metrics in Serverless environments" + +[dependencies] +dogstatsd = { path = "../dogstatsd", default-features = true } +tracing = { version = "0.1", default-features = false } +libdd-common = { git = "https://github.com/DataDog/libdatadog", rev = "d52ee90209cb12a28bdda0114535c1a985a29d95", default-features = false } + +[features] +windows-enhanced-metrics = [] diff --git a/crates/datadog-metrics-collector/src/instance.rs b/crates/datadog-metrics-collector/src/instance.rs new file mode 100644 index 0000000..848e862 --- /dev/null +++ b/crates/datadog-metrics-collector/src/instance.rs @@ -0,0 +1,109 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Instance identity metric collector for Azure Functions. +//! +//! Submits `azure.functions.enhanced.instance` with value 1.0 on each +//! collection tick, tagged with the instance identifier. The env var +//! checked depends on the Azure plan type: +//! +//! - Elastic Premium / Premium: `WEBSITE_INSTANCE_ID` +//! - Flex Consumption / Consumption: `WEBSITE_POD_NAME` or `CONTAINER_NAME` + +use dogstatsd::aggregator::AggregatorHandle; +use dogstatsd::metric::{Metric, MetricValue, SortedTags}; +use std::env; +use tracing::{debug, error, info}; + +const INSTANCE_METRIC: &str = "azure.functions.enhanced.instance"; + +/// Resolves the instance ID from explicit values (used by tests). +fn resolve_instance_id_from( + website_instance_id: Option<&str>, + website_pod_name: Option<&str>, + container_name: Option<&str>, +) -> Option { + website_instance_id + .or(website_pod_name) + .or(container_name) + .map(String::from) +} + +/// Resolves the instance ID from environment variables. +/// +/// Checks in order: +/// 1. `WEBSITE_INSTANCE_ID` (Elastic Premium / Premium plans) +/// 2. `WEBSITE_POD_NAME` (Flex Consumption plans) +/// 3. `CONTAINER_NAME` (Consumption plans) +fn resolve_instance_id() -> Option { + resolve_instance_id_from( + env::var("WEBSITE_INSTANCE_ID").ok().as_deref(), + env::var("WEBSITE_POD_NAME").ok().as_deref(), + env::var("CONTAINER_NAME").ok().as_deref(), + ) +} + +pub struct InstanceMetricsCollector { + aggregator: AggregatorHandle, + tags: Option, + instance_id: Option, +} + +impl InstanceMetricsCollector { + pub fn new(aggregator: AggregatorHandle, tags: Option) -> Self { + let instance_id = resolve_instance_id(); + if let Some(ref id) = instance_id { + info!("Instance ID resolved: {}", id); + } else { + debug!("No instance ID found, instance metric will not be submitted"); + } + Self { + aggregator, + tags, + instance_id, + } + } + + pub fn collect_and_submit(&self) { + let Some(ref instance_id) = self.instance_id else { + debug!("No instance ID available, skipping instance metric"); + return; + }; + + // Build tags: start with shared tags, add instance + let instance_tag = format!("instance:{}", instance_id); + let tags = match &self.tags { + Some(existing) => { + let mut combined = existing.clone(); + if let Ok(id_tag) = SortedTags::parse(&instance_tag) { + combined.extend(&id_tag); + } + Some(combined) + } + None => SortedTags::parse(&instance_tag).ok(), + }; + + let metric = Metric::new(INSTANCE_METRIC.into(), MetricValue::gauge(1.0), tags, None); + + if let Err(e) = self.aggregator.insert_batch(vec![metric]) { + error!("Failed to insert instance metric: {}", e); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_resolve_instance_id_falls_back_to_pod_name() { + let id = resolve_instance_id_from(None, Some("pod-xyz"), Some("container-123")); + assert_eq!(id, Some("pod-xyz".to_string())); + } + + #[test] + fn test_resolve_instance_id_falls_back_to_container_name() { + let id = resolve_instance_id_from(None, None, Some("container-123")); + assert_eq!(id, Some("container-123".to_string())); + } +} diff --git a/crates/datadog-metrics-collector/src/lib.rs b/crates/datadog-metrics-collector/src/lib.rs new file mode 100644 index 0000000..5f22d37 --- /dev/null +++ b/crates/datadog-metrics-collector/src/lib.rs @@ -0,0 +1,11 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +#![cfg_attr(not(test), deny(clippy::panic))] +#![cfg_attr(not(test), deny(clippy::unwrap_used))] +#![cfg_attr(not(test), deny(clippy::expect_used))] +#![cfg_attr(not(test), deny(clippy::todo))] +#![cfg_attr(not(test), deny(clippy::unimplemented))] + +pub mod instance; +pub mod tags; diff --git a/crates/datadog-metrics-collector/src/tags.rs b/crates/datadog-metrics-collector/src/tags.rs new file mode 100644 index 0000000..c6db691 --- /dev/null +++ b/crates/datadog-metrics-collector/src/tags.rs @@ -0,0 +1,55 @@ +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Shared tag builder for enhanced metrics. +//! +//! Tags are attached to all enhanced metrics submitted by the metrics collector. + +use dogstatsd::metric::SortedTags; +use libdd_common::azure_app_services; +use std::env; + +/// Builds the common tags for all enhanced metrics. +/// +/// Sources: +/// - Azure metadata (resource_group, subscription_id, name) from libdd_common +/// - Environment variables (region, plan_tier, service, env, version, serverless_compat_version) +/// +/// The DogStatsD origin tag (e.g. `origin:azurefunction`) is added by the metrics aggregator, +/// not here. +pub fn build_enhanced_metrics_tags() -> Option { + let mut tag_parts = Vec::new(); + + if let Some(aas_metadata) = &*azure_app_services::AAS_METADATA_FUNCTION { + let aas_tags = [ + ("resource_group", aas_metadata.get_resource_group()), + ("subscription_id", aas_metadata.get_subscription_id()), + ("name", aas_metadata.get_site_name()), + ]; + for (name, value) in aas_tags { + if value != "unknown" { + tag_parts.push(format!("{}:{}", name, value)); + } + } + } + + for (tag_name, env_var) in [ + ("region", "REGION_NAME"), + ("plan_tier", "WEBSITE_SKU"), + ("service", "DD_SERVICE"), + ("env", "DD_ENV"), + ("version", "DD_VERSION"), + ("serverless_compat_version", "DD_SERVERLESS_COMPAT_VERSION"), + ] { + if let Ok(val) = env::var(env_var) + && !val.is_empty() + { + tag_parts.push(format!("{}:{}", tag_name, val)); + } + } + + if tag_parts.is_empty() { + return None; + } + SortedTags::parse(&tag_parts.join(",")).ok() +} From f5ef0b7dd60b7f6e866f61af245c5712dacf2257 Mon Sep 17 00:00:00 2001 From: Kathie Huang Date: Mon, 13 Apr 2026 17:22:14 -0400 Subject: [PATCH 02/10] Categorize metrics with azure.functions prefix as enhanced metrics --- crates/dogstatsd/src/origin.rs | 46 +++++++++++++++++++++++++++------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/crates/dogstatsd/src/origin.rs b/crates/dogstatsd/src/origin.rs index fc025b9..98e18c2 100644 --- a/crates/dogstatsd/src/origin.rs +++ b/crates/dogstatsd/src/origin.rs @@ -18,6 +18,7 @@ const AZURE_FUNCTIONS_TAG_VALUE: &str = "azurefunction"; const DATADOG_PREFIX: &str = "datadog."; const AWS_LAMBDA_PREFIX: &str = "aws.lambda"; const GOOGLE_CLOUD_RUN_PREFIX: &str = "gcp.run"; +const AZURE_FUNCTIONS_PREFIX: &str = "azure.functions"; const JVM_PREFIX: &str = "jvm."; const RUNTIME_PREFIX: &str = "runtime."; @@ -83,15 +84,17 @@ impl Metric { .join("."); // Determine the service based on metric prefix first - let service = if metric_name.starts_with(JVM_PREFIX) - || metric_name.starts_with(RUNTIME_PREFIX) - { - OriginService::ServerlessRuntime - } else if metric_prefix == AWS_LAMBDA_PREFIX || metric_prefix == GOOGLE_CLOUD_RUN_PREFIX { - OriginService::ServerlessEnhanced - } else { - OriginService::ServerlessCustom - }; + let service = + if metric_name.starts_with(JVM_PREFIX) || metric_name.starts_with(RUNTIME_PREFIX) { + OriginService::ServerlessRuntime + } else if metric_prefix == AWS_LAMBDA_PREFIX + || metric_prefix == GOOGLE_CLOUD_RUN_PREFIX + || metric_prefix == AZURE_FUNCTIONS_PREFIX + { + OriginService::ServerlessEnhanced + } else { + OriginService::ServerlessCustom + }; // Then determine the category based on tags let category = if has_tag_value(&tags, AWS_LAMBDA_TAG_KEY, "") { @@ -351,6 +354,31 @@ mod tests { assert_eq!(origin, None); } + #[test] + fn test_find_metric_origin_azure_functions_enhanced() { + let tags = SortedTags::parse("origin:azurefunction").unwrap(); + let metric = Metric { + id: 0, + name: "azure.functions.enhanced.instance".into(), + value: MetricValue::Gauge(1.0), + tags: Some(tags.clone()), + timestamp: 0, + }; + let origin = metric.find_origin(tags).unwrap(); + assert_eq!( + origin.origin_product as u32, + OriginProduct::Serverless as u32 + ); + assert_eq!( + origin.origin_category as u32, + OriginCategory::AzureFunctionsMetrics as u32 + ); + assert_eq!( + origin.origin_service as u32, + OriginService::ServerlessEnhanced as u32 + ); + } + #[test] fn test_find_metric_origin_unknown() { let tags = SortedTags::parse("unknown:tag").unwrap(); From 044e316dbeb5c26705b45c51a71b09aa2ab3e0b0 Mon Sep 17 00:00:00 2001 From: Kathie Huang Date: Mon, 13 Apr 2026 17:23:19 -0400 Subject: [PATCH 03/10] Use metrics collector in main loop and refactor start_dogstatsd --- crates/datadog-serverless-compat/src/main.rs | 207 ++++++++++++------- 1 file changed, 131 insertions(+), 76 deletions(-) diff --git a/crates/datadog-serverless-compat/src/main.rs b/crates/datadog-serverless-compat/src/main.rs index 8c20c41..398c1d4 100644 --- a/crates/datadog-serverless-compat/src/main.rs +++ b/crates/datadog-serverless-compat/src/main.rs @@ -40,10 +40,12 @@ use dogstatsd::{ util::parse_metric_namespace, }; +use datadog_metrics_collector::instance::InstanceMetricsCollector; use dogstatsd::metric::{EMPTY_TAGS, SortedTags}; use tokio_util::sync::CancellationToken; const DOGSTATSD_FLUSH_INTERVAL: u64 = 10; +const ENHANCED_METRICS_COLLECTION_INTERVAL_SECS: u64 = 10; const DOGSTATSD_TIMEOUT_DURATION: Duration = Duration::from_secs(5); const DEFAULT_DOGSTATSD_PORT: u16 = 8125; const DEFAULT_LOG_INTAKE_PORT: u16 = 10517; @@ -119,6 +121,18 @@ pub async fn main() { .ok() .and_then(|v| v.parse::().ok()) .unwrap_or(DEFAULT_LOG_INTAKE_PORT); + + // Only enable enhanced metrics for Linux Azure Functions + #[cfg(not(feature = "windows-enhanced-metrics"))] + let dd_enhanced_metrics = env_type == EnvironmentType::AzureFunction + && env::var("DD_ENHANCED_METRICS_ENABLED") + .map(|val| val.to_lowercase() != "false") + .unwrap_or(true); + + // Enhanced metrics are not yet supported in Windows environments + #[cfg(feature = "windows-enhanced-metrics")] + let dd_enhanced_metrics = false; + debug!("Starting serverless trace mini agent"); let env_filter = format!("h2=off,hyper=off,rustls=off,{}", log_level); @@ -182,30 +196,59 @@ pub async fn main() { } }); - let (metrics_flusher, _aggregator_handle) = if dd_use_dogstatsd { - debug!("Starting dogstatsd"); - let (_, metrics_flusher, aggregator_handle) = start_dogstatsd( - dd_dogstatsd_port, + let needs_aggregator = dd_use_dogstatsd || dd_enhanced_metrics; + + // The aggregator is shared between dogstatsd and enhanced metrics. + // It is started independently so that either can be enabled without the other. + let (metrics_flusher, aggregator_handle) = if needs_aggregator { + debug!("Creating metrics flusher and aggregator"); + + let (flusher, handle) = start_aggregator( dd_api_key.clone(), dd_site, https_proxy.clone(), dogstatsd_tags, - dd_statsd_metric_namespace, - #[cfg(all(windows, feature = "windows-pipes"))] - dd_dogstatsd_windows_pipe_name.clone(), ) .await; - if let Some(ref windows_pipe_name) = dd_dogstatsd_windows_pipe_name { - info!("dogstatsd-pipe: starting to listen on pipe {windows_pipe_name}"); + + if dd_use_dogstatsd { + debug!("Starting dogstatsd"); + let _ = start_dogstatsd_listener( + dd_dogstatsd_port, + handle.clone(), + dd_statsd_metric_namespace, + #[cfg(all(windows, feature = "windows-pipes"))] + dd_dogstatsd_windows_pipe_name.clone(), + ) + .await; + if let Some(ref windows_pipe_name) = dd_dogstatsd_windows_pipe_name { + info!("dogstatsd-pipe: starting to listen on pipe {windows_pipe_name}"); + } else { + info!("dogstatsd-udp: starting to listen on port {dd_dogstatsd_port}"); + } } else { - info!("dogstatsd-udp: starting to listen on port {dd_dogstatsd_port}"); + info!("dogstatsd disabled"); } - (metrics_flusher, Some(aggregator_handle)) + (flusher, Some(handle)) } else { - info!("dogstatsd disabled"); + info!("dogstatsd and enhanced metrics disabled"); (None, None) }; + let instance_collector = if dd_enhanced_metrics && metrics_flusher.is_some() { + aggregator_handle.as_ref().map(|handle| { + let tags = datadog_metrics_collector::tags::build_enhanced_metrics_tags(); + InstanceMetricsCollector::new(handle.clone(), tags) + }) + } else { + if !dd_enhanced_metrics { + info!("Enhanced metrics disabled"); + } else { + info!("Enhanced metrics enabled but metrics flusher not found"); + } + None + }; + let (log_flusher, _log_aggregator_handle): (Option, Option) = if dd_logs_enabled { debug!("Starting log agent"); @@ -225,48 +268,54 @@ pub async fn main() { }; let mut flush_interval = interval(Duration::from_secs(DOGSTATSD_FLUSH_INTERVAL)); + let mut enhanced_metrics_collection_interval = interval(Duration::from_secs( + ENHANCED_METRICS_COLLECTION_INTERVAL_SECS, + )); flush_interval.tick().await; // discard first tick, which is instantaneous + enhanced_metrics_collection_interval.tick().await; // Builders for log batches that failed transiently in the previous flush // cycle. They are redriven on the next cycle before new batches are sent. let mut pending_log_retries: Vec = Vec::new(); loop { - flush_interval.tick().await; - - if let Some(metrics_flusher) = metrics_flusher.as_ref() { - debug!("Flushing dogstatsd metrics"); - metrics_flusher.flush().await; - } + tokio::select! { + _ = flush_interval.tick() => { + if let Some(metrics_flusher) = metrics_flusher.clone() { + debug!("Flushing dogstatsd metrics"); + tokio::spawn(async move { + metrics_flusher.flush().await; + }); + } - if let Some(log_flusher) = log_flusher.as_ref() { - debug!("Flushing log agent"); - let retry_in = std::mem::take(&mut pending_log_retries); - let failed = log_flusher.flush(retry_in).await; - if !failed.is_empty() { - // TODO: surface flush failures into health/metrics telemetry so - // operators have a durable signal beyond log lines when logs are - // being dropped (e.g. increment a statsd counter or set a gauge). - warn!( - "log agent flush failed for {} batch(es); will retry next cycle", - failed.len() - ); - pending_log_retries = failed; + if let Some(log_flusher) = log_flusher.as_ref() { + debug!("Flushing log agent"); + let retry_in = std::mem::take(&mut pending_log_retries); + let failed = log_flusher.flush(retry_in).await; + if !failed.is_empty() { + warn!( + "log agent flush failed for {} batch(es); will retry next cycle", + failed.len() + ); + pending_log_retries = failed; + } + } + } + _ = enhanced_metrics_collection_interval.tick() => { + if let Some(ref collector) = instance_collector { + collector.collect_and_submit(); + } } } } } -async fn start_dogstatsd( - port: u16, +async fn start_aggregator( dd_api_key: Option, dd_site: String, https_proxy: Option, dogstatsd_tags: &str, - metric_namespace: Option, - #[cfg(all(windows, feature = "windows-pipes"))] windows_pipe_name: Option, -) -> (CancellationToken, Option, AggregatorHandle) { - // 1. Create the aggregator service +) -> (Option, AggregatorHandle) { #[allow(clippy::expect_used)] let (service, handle) = AggregatorService::new( SortedTags::parse(dogstatsd_tags).unwrap_or(EMPTY_TAGS), @@ -274,53 +323,17 @@ async fn start_dogstatsd( ) .expect("Failed to create aggregator service"); - // 2. Start the aggregator service in the background tokio::spawn(service.run()); - #[cfg(all(windows, feature = "windows-pipes"))] - let dogstatsd_config = DogStatsDConfig { - host: AGENT_HOST.to_string(), - port, - metric_namespace, - windows_pipe_name, - so_rcvbuf: None, - buffer_size: None, - queue_size: None, - }; - - #[cfg(not(all(windows, feature = "windows-pipes")))] - let dogstatsd_config = DogStatsDConfig { - host: AGENT_HOST.to_string(), - port, - metric_namespace, - so_rcvbuf: None, - buffer_size: None, - queue_size: None, - }; - let dogstatsd_cancel_token = tokio_util::sync::CancellationToken::new(); - - // 3. Use handle in DogStatsD (cheap to clone) - let dogstatsd_client = DogStatsD::new( - &dogstatsd_config, - handle.clone(), - dogstatsd_cancel_token.clone(), - ) - .await; - - tokio::spawn(async move { - dogstatsd_client.spin().await; - }); - let metrics_flusher = match dd_api_key { Some(dd_api_key) => { let client = match build_metrics_client(https_proxy, DOGSTATSD_TIMEOUT_DURATION) { Ok(client) => client, Err(e) => { error!("Failed to build HTTP client: {e}, won't flush metrics"); - return (dogstatsd_cancel_token, None, handle); + return (None, handle); } }; - let metrics_intake_url_prefix = match Site::new(dd_site) .map_err(|e| e.to_string()) .and_then(|site| { @@ -329,7 +342,7 @@ async fn start_dogstatsd( Ok(prefix) => prefix, Err(e) => { error!("Failed to create metrics intake URL: {e}, won't flush metrics"); - return (dogstatsd_cancel_token, None, handle); + return (None, handle); } }; @@ -349,7 +362,49 @@ async fn start_dogstatsd( } }; - (dogstatsd_cancel_token, metrics_flusher, handle) + (metrics_flusher, handle) +} + +async fn start_dogstatsd_listener( + port: u16, + handle: AggregatorHandle, + metric_namespace: Option, + #[cfg(all(windows, feature = "windows-pipes"))] windows_pipe_name: Option, +) -> CancellationToken { + #[cfg(all(windows, feature = "windows-pipes"))] + let dogstatsd_config = DogStatsDConfig { + host: AGENT_HOST.to_string(), + port, + metric_namespace, + windows_pipe_name, + so_rcvbuf: None, + buffer_size: None, + queue_size: None, + }; + + #[cfg(not(all(windows, feature = "windows-pipes")))] + let dogstatsd_config = DogStatsDConfig { + host: AGENT_HOST.to_string(), + port, + metric_namespace, + so_rcvbuf: None, + buffer_size: None, + queue_size: None, + }; + let dogstatsd_cancel_token = tokio_util::sync::CancellationToken::new(); + + let dogstatsd_client = DogStatsD::new( + &dogstatsd_config, + handle.clone(), + dogstatsd_cancel_token.clone(), + ) + .await; + + tokio::spawn(async move { + dogstatsd_client.spin().await; + }); + + dogstatsd_cancel_token } fn build_metrics_client( From a57a342c1ffa83ff62cc59284164a6a097c6eeb2 Mon Sep 17 00:00:00 2001 From: Kathie Huang Date: Mon, 13 Apr 2026 17:23:55 -0400 Subject: [PATCH 04/10] Add windows-enhanced-metrics feature to CI --- .github/workflows/build-datadog-serverless-compat.yml | 4 ++-- .github/workflows/cargo.yml | 2 +- crates/datadog-serverless-compat/Cargo.toml | 2 ++ 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/build-datadog-serverless-compat.yml b/.github/workflows/build-datadog-serverless-compat.yml index 8c88500..070938f 100644 --- a/.github/workflows/build-datadog-serverless-compat.yml +++ b/.github/workflows/build-datadog-serverless-compat.yml @@ -56,7 +56,7 @@ jobs: retention-days: 3 - if: ${{ inputs.runner == 'windows-2022' }} shell: bash - run: cargo build --release -p datadog-serverless-compat --features windows-pipes + run: cargo build --release -p datadog-serverless-compat --features windows-pipes,windows-enhanced-metrics - if: ${{ inputs.runner == 'windows-2022' }} uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # 4.6.2 with: @@ -69,7 +69,7 @@ jobs: rustup target add i686-pc-windows-msvc cargo build --release -p datadog-serverless-compat \ --target i686-pc-windows-msvc \ - --features windows-pipes + --features windows-pipes,windows-enhanced-metrics - if: ${{ inputs.runner == 'windows-2022' }} uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # 4.6.2 with: diff --git a/.github/workflows/cargo.yml b/.github/workflows/cargo.yml index fc640c5..7863afd 100644 --- a/.github/workflows/cargo.yml +++ b/.github/workflows/cargo.yml @@ -95,7 +95,7 @@ jobs: - shell: bash run: | if [[ "${{ inputs.runner }}" == "windows-2022" ]]; then - cargo nextest run --workspace --features datadog-serverless-compat/windows-pipes + cargo nextest run --workspace --features datadog-serverless-compat/windows-pipes,datadog-serverless-compat/windows-enhanced-metrics else cargo nextest run --workspace fi diff --git a/crates/datadog-serverless-compat/Cargo.toml b/crates/datadog-serverless-compat/Cargo.toml index b84bb15..e6a114c 100644 --- a/crates/datadog-serverless-compat/Cargo.toml +++ b/crates/datadog-serverless-compat/Cargo.toml @@ -8,9 +8,11 @@ description = "Binary to run trace-agent and dogstatsd servers in Serverless env [features] default = [] windows-pipes = ["datadog-trace-agent/windows-pipes", "dogstatsd/windows-pipes"] +windows-enhanced-metrics = ["datadog-metrics-collector/windows-enhanced-metrics"] [dependencies] datadog-logs-agent = { path = "../datadog-logs-agent" } +datadog-metrics-collector = { path = "../datadog-metrics-collector" } datadog-trace-agent = { path = "../datadog-trace-agent" } libdd-trace-utils = { git = "https://github.com/DataDog/libdatadog", rev = "8c88979985154d6d97c0fc2ca9039682981eacad" } datadog-fips = { path = "../datadog-fips", default-features = false } From fc24a42bf7cad172f327604fb2d5e797d05fae86 Mon Sep 17 00:00:00 2001 From: Kathie Huang Date: Tue, 14 Apr 2026 16:24:57 -0400 Subject: [PATCH 05/10] Update collection interval, change info log to debug, and update libdatadog rev --- Cargo.lock | 31 +------------------ crates/datadog-metrics-collector/Cargo.toml | 2 +- .../datadog-metrics-collector/src/instance.rs | 4 +-- crates/datadog-serverless-compat/src/main.rs | 2 +- 4 files changed, 5 insertions(+), 34 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c1bc139..1703d95 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -492,7 +492,7 @@ name = "datadog-metrics-collector" version = "0.1.0" dependencies = [ "dogstatsd", - "libdd-common 1.1.0", + "libdd-common 3.0.1", "tracing", ] @@ -1441,35 +1441,6 @@ version = "0.2.183" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5b646652bf6661599e1da8901b3b9522896f01e736bad5f723fe7a3a27f899d" -[[package]] -name = "libdd-common" -version = "1.1.0" -source = "git+https://github.com/DataDog/libdatadog?rev=d52ee90209cb12a28bdda0114535c1a985a29d95#d52ee90209cb12a28bdda0114535c1a985a29d95" -dependencies = [ - "anyhow", - "cc", - "const_format", - "futures", - "futures-core", - "futures-util", - "hex", - "http", - "http-body", - "http-body-util", - "hyper", - "hyper-util", - "libc", - "nix", - "pin-project", - "regex", - "serde", - "static_assertions", - "thiserror 1.0.69", - "tokio", - "tower-service", - "windows-sys 0.52.0", -] - [[package]] name = "libdd-common" version = "2.0.1" diff --git a/crates/datadog-metrics-collector/Cargo.toml b/crates/datadog-metrics-collector/Cargo.toml index 0153e63..09bac29 100644 --- a/crates/datadog-metrics-collector/Cargo.toml +++ b/crates/datadog-metrics-collector/Cargo.toml @@ -8,7 +8,7 @@ description = "Collector to read, compute, and submit enhanced metrics in Server [dependencies] dogstatsd = { path = "../dogstatsd", default-features = true } tracing = { version = "0.1", default-features = false } -libdd-common = { git = "https://github.com/DataDog/libdatadog", rev = "d52ee90209cb12a28bdda0114535c1a985a29d95", default-features = false } +libdd-common = { git = "https://github.com/DataDog/libdatadog", rev = "8c88979985154d6d97c0fc2ca9039682981eacad", default-features = false } [features] windows-enhanced-metrics = [] diff --git a/crates/datadog-metrics-collector/src/instance.rs b/crates/datadog-metrics-collector/src/instance.rs index 848e862..42aa5ce 100644 --- a/crates/datadog-metrics-collector/src/instance.rs +++ b/crates/datadog-metrics-collector/src/instance.rs @@ -13,7 +13,7 @@ use dogstatsd::aggregator::AggregatorHandle; use dogstatsd::metric::{Metric, MetricValue, SortedTags}; use std::env; -use tracing::{debug, error, info}; +use tracing::{debug, error}; const INSTANCE_METRIC: &str = "azure.functions.enhanced.instance"; @@ -53,7 +53,7 @@ impl InstanceMetricsCollector { pub fn new(aggregator: AggregatorHandle, tags: Option) -> Self { let instance_id = resolve_instance_id(); if let Some(ref id) = instance_id { - info!("Instance ID resolved: {}", id); + debug!("Instance ID resolved: {}", id); } else { debug!("No instance ID found, instance metric will not be submitted"); } diff --git a/crates/datadog-serverless-compat/src/main.rs b/crates/datadog-serverless-compat/src/main.rs index 398c1d4..6a01583 100644 --- a/crates/datadog-serverless-compat/src/main.rs +++ b/crates/datadog-serverless-compat/src/main.rs @@ -45,7 +45,7 @@ use dogstatsd::metric::{EMPTY_TAGS, SortedTags}; use tokio_util::sync::CancellationToken; const DOGSTATSD_FLUSH_INTERVAL: u64 = 10; -const ENHANCED_METRICS_COLLECTION_INTERVAL_SECS: u64 = 10; +const ENHANCED_METRICS_COLLECTION_INTERVAL_SECS: u64 = 1; const DOGSTATSD_TIMEOUT_DURATION: Duration = Duration::from_secs(5); const DEFAULT_DOGSTATSD_PORT: u16 = 8125; const DEFAULT_LOG_INTAKE_PORT: u16 = 10517; From c0896174c4b2d939ba3b8f028ff7f0c91485e79a Mon Sep 17 00:00:00 2001 From: Kathie Huang Date: Tue, 14 Apr 2026 17:00:40 -0400 Subject: [PATCH 06/10] Change instance metric collection interval to 3, update comments --- .../datadog-metrics-collector/src/instance.rs | 14 ++--- crates/datadog-serverless-compat/src/main.rs | 17 +++--- crates/dogstatsd/src/origin.rs | 52 +++++++++---------- 3 files changed, 39 insertions(+), 44 deletions(-) diff --git a/crates/datadog-metrics-collector/src/instance.rs b/crates/datadog-metrics-collector/src/instance.rs index 42aa5ce..84fd701 100644 --- a/crates/datadog-metrics-collector/src/instance.rs +++ b/crates/datadog-metrics-collector/src/instance.rs @@ -4,11 +4,7 @@ //! Instance identity metric collector for Azure Functions. //! //! Submits `azure.functions.enhanced.instance` with value 1.0 on each -//! collection tick, tagged with the instance identifier. The env var -//! checked depends on the Azure plan type: -//! -//! - Elastic Premium / Premium: `WEBSITE_INSTANCE_ID` -//! - Flex Consumption / Consumption: `WEBSITE_POD_NAME` or `CONTAINER_NAME` +//! collection tick, tagged with the instance identifier. use dogstatsd::aggregator::AggregatorHandle; use dogstatsd::metric::{Metric, MetricValue, SortedTags}; @@ -33,8 +29,8 @@ fn resolve_instance_id_from( /// /// Checks in order: /// 1. `WEBSITE_INSTANCE_ID` (Elastic Premium / Premium plans) -/// 2. `WEBSITE_POD_NAME` (Flex Consumption plans) -/// 3. `CONTAINER_NAME` (Consumption plans) +/// 2. `WEBSITE_POD_NAME` (Flex Consumption / Consumption plans) +/// 3. `CONTAINER_NAME` (Flex Consumption / Consumption plans) fn resolve_instance_id() -> Option { resolve_instance_id_from( env::var("WEBSITE_INSTANCE_ID").ok().as_deref(), @@ -52,9 +48,7 @@ pub struct InstanceMetricsCollector { impl InstanceMetricsCollector { pub fn new(aggregator: AggregatorHandle, tags: Option) -> Self { let instance_id = resolve_instance_id(); - if let Some(ref id) = instance_id { - debug!("Instance ID resolved: {}", id); - } else { + if instance_id.is_none() { debug!("No instance ID found, instance metric will not be submitted"); } Self { diff --git a/crates/datadog-serverless-compat/src/main.rs b/crates/datadog-serverless-compat/src/main.rs index 6a01583..941ec4c 100644 --- a/crates/datadog-serverless-compat/src/main.rs +++ b/crates/datadog-serverless-compat/src/main.rs @@ -45,7 +45,7 @@ use dogstatsd::metric::{EMPTY_TAGS, SortedTags}; use tokio_util::sync::CancellationToken; const DOGSTATSD_FLUSH_INTERVAL: u64 = 10; -const ENHANCED_METRICS_COLLECTION_INTERVAL_SECS: u64 = 1; +const INSTANCE_METRICS_COLLECTION_INTERVAL_SECS: u64 = 3; const DOGSTATSD_TIMEOUT_DURATION: Duration = Duration::from_secs(5); const DEFAULT_DOGSTATSD_PORT: u16 = 8125; const DEFAULT_LOG_INTAKE_PORT: u16 = 10517; @@ -122,17 +122,11 @@ pub async fn main() { .and_then(|v| v.parse::().ok()) .unwrap_or(DEFAULT_LOG_INTAKE_PORT); - // Only enable enhanced metrics for Linux Azure Functions - #[cfg(not(feature = "windows-enhanced-metrics"))] let dd_enhanced_metrics = env_type == EnvironmentType::AzureFunction && env::var("DD_ENHANCED_METRICS_ENABLED") .map(|val| val.to_lowercase() != "false") .unwrap_or(true); - // Enhanced metrics are not yet supported in Windows environments - #[cfg(feature = "windows-enhanced-metrics")] - let dd_enhanced_metrics = false; - debug!("Starting serverless trace mini agent"); let env_filter = format!("h2=off,hyper=off,rustls=off,{}", log_level); @@ -200,6 +194,7 @@ pub async fn main() { // The aggregator is shared between dogstatsd and enhanced metrics. // It is started independently so that either can be enabled without the other. + // Only dogstatsd needs the dogstatsd listener let (metrics_flusher, aggregator_handle) = if needs_aggregator { debug!("Creating metrics flusher and aggregator"); @@ -269,7 +264,7 @@ pub async fn main() { let mut flush_interval = interval(Duration::from_secs(DOGSTATSD_FLUSH_INTERVAL)); let mut enhanced_metrics_collection_interval = interval(Duration::from_secs( - ENHANCED_METRICS_COLLECTION_INTERVAL_SECS, + INSTANCE_METRICS_COLLECTION_INTERVAL_SECS, )); flush_interval.tick().await; // discard first tick, which is instantaneous enhanced_metrics_collection_interval.tick().await; @@ -293,6 +288,9 @@ pub async fn main() { let retry_in = std::mem::take(&mut pending_log_retries); let failed = log_flusher.flush(retry_in).await; if !failed.is_empty() { + // TODO: surface flush failures into health/metrics telemetry so + // operators have a durable signal beyond log lines when logs are + // being dropped (e.g. increment a statsd counter or set a gauge). warn!( "log agent flush failed for {} batch(es); will retry next cycle", failed.len() @@ -316,6 +314,7 @@ async fn start_aggregator( https_proxy: Option, dogstatsd_tags: &str, ) -> (Option, AggregatorHandle) { + // Create the aggregator service #[allow(clippy::expect_used)] let (service, handle) = AggregatorService::new( SortedTags::parse(dogstatsd_tags).unwrap_or(EMPTY_TAGS), @@ -323,6 +322,7 @@ async fn start_aggregator( ) .expect("Failed to create aggregator service"); + // Start the aggregator service in the background tokio::spawn(service.run()); let metrics_flusher = match dd_api_key { @@ -393,6 +393,7 @@ async fn start_dogstatsd_listener( }; let dogstatsd_cancel_token = tokio_util::sync::CancellationToken::new(); + // Use handle in DogStatsD (cheap to clone) let dogstatsd_client = DogStatsD::new( &dogstatsd_config, handle.clone(), diff --git a/crates/dogstatsd/src/origin.rs b/crates/dogstatsd/src/origin.rs index 98e18c2..61705fd 100644 --- a/crates/dogstatsd/src/origin.rs +++ b/crates/dogstatsd/src/origin.rs @@ -300,7 +300,32 @@ mod tests { } #[test] - fn test_find_metric_origin_azure_functions() { + fn test_find_metric_origin_azure_functions_enhanced() { + let tags = SortedTags::parse("origin:azurefunction").unwrap(); + let metric = Metric { + id: 0, + name: "azure.functions.enhanced.instance".into(), + value: MetricValue::Gauge(1.0), + tags: Some(tags.clone()), + timestamp: 0, + }; + let origin = metric.find_origin(tags).unwrap(); + assert_eq!( + origin.origin_product as u32, + OriginProduct::Serverless as u32 + ); + assert_eq!( + origin.origin_category as u32, + OriginCategory::AzureFunctionsMetrics as u32 + ); + assert_eq!( + origin.origin_service as u32, + OriginService::ServerlessEnhanced as u32 + ); + } + + #[test] + fn test_find_metric_origin_azure_functions_custom() { let tags = SortedTags::parse("origin:azurefunction").unwrap(); let metric = Metric { id: 0, @@ -354,31 +379,6 @@ mod tests { assert_eq!(origin, None); } - #[test] - fn test_find_metric_origin_azure_functions_enhanced() { - let tags = SortedTags::parse("origin:azurefunction").unwrap(); - let metric = Metric { - id: 0, - name: "azure.functions.enhanced.instance".into(), - value: MetricValue::Gauge(1.0), - tags: Some(tags.clone()), - timestamp: 0, - }; - let origin = metric.find_origin(tags).unwrap(); - assert_eq!( - origin.origin_product as u32, - OriginProduct::Serverless as u32 - ); - assert_eq!( - origin.origin_category as u32, - OriginCategory::AzureFunctionsMetrics as u32 - ); - assert_eq!( - origin.origin_service as u32, - OriginService::ServerlessEnhanced as u32 - ); - } - #[test] fn test_find_metric_origin_unknown() { let tags = SortedTags::parse("unknown:tag").unwrap(); From 3fd7bc4766761963556e8b74b462cf617b5351bd Mon Sep 17 00:00:00 2001 From: Kathie Huang Date: Wed, 15 Apr 2026 10:27:07 -0400 Subject: [PATCH 07/10] Remove windows feature for now --- .github/workflows/build-datadog-serverless-compat.yml | 4 ++-- .github/workflows/cargo.yml | 2 +- crates/datadog-metrics-collector/Cargo.toml | 3 --- crates/datadog-serverless-compat/Cargo.toml | 1 - crates/datadog-serverless-compat/src/main.rs | 2 ++ 5 files changed, 5 insertions(+), 7 deletions(-) diff --git a/.github/workflows/build-datadog-serverless-compat.yml b/.github/workflows/build-datadog-serverless-compat.yml index 070938f..8c88500 100644 --- a/.github/workflows/build-datadog-serverless-compat.yml +++ b/.github/workflows/build-datadog-serverless-compat.yml @@ -56,7 +56,7 @@ jobs: retention-days: 3 - if: ${{ inputs.runner == 'windows-2022' }} shell: bash - run: cargo build --release -p datadog-serverless-compat --features windows-pipes,windows-enhanced-metrics + run: cargo build --release -p datadog-serverless-compat --features windows-pipes - if: ${{ inputs.runner == 'windows-2022' }} uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # 4.6.2 with: @@ -69,7 +69,7 @@ jobs: rustup target add i686-pc-windows-msvc cargo build --release -p datadog-serverless-compat \ --target i686-pc-windows-msvc \ - --features windows-pipes,windows-enhanced-metrics + --features windows-pipes - if: ${{ inputs.runner == 'windows-2022' }} uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # 4.6.2 with: diff --git a/.github/workflows/cargo.yml b/.github/workflows/cargo.yml index 7863afd..fc640c5 100644 --- a/.github/workflows/cargo.yml +++ b/.github/workflows/cargo.yml @@ -95,7 +95,7 @@ jobs: - shell: bash run: | if [[ "${{ inputs.runner }}" == "windows-2022" ]]; then - cargo nextest run --workspace --features datadog-serverless-compat/windows-pipes,datadog-serverless-compat/windows-enhanced-metrics + cargo nextest run --workspace --features datadog-serverless-compat/windows-pipes else cargo nextest run --workspace fi diff --git a/crates/datadog-metrics-collector/Cargo.toml b/crates/datadog-metrics-collector/Cargo.toml index 09bac29..7e6d0b9 100644 --- a/crates/datadog-metrics-collector/Cargo.toml +++ b/crates/datadog-metrics-collector/Cargo.toml @@ -9,6 +9,3 @@ description = "Collector to read, compute, and submit enhanced metrics in Server dogstatsd = { path = "../dogstatsd", default-features = true } tracing = { version = "0.1", default-features = false } libdd-common = { git = "https://github.com/DataDog/libdatadog", rev = "8c88979985154d6d97c0fc2ca9039682981eacad", default-features = false } - -[features] -windows-enhanced-metrics = [] diff --git a/crates/datadog-serverless-compat/Cargo.toml b/crates/datadog-serverless-compat/Cargo.toml index e6a114c..215d401 100644 --- a/crates/datadog-serverless-compat/Cargo.toml +++ b/crates/datadog-serverless-compat/Cargo.toml @@ -8,7 +8,6 @@ description = "Binary to run trace-agent and dogstatsd servers in Serverless env [features] default = [] windows-pipes = ["datadog-trace-agent/windows-pipes", "dogstatsd/windows-pipes"] -windows-enhanced-metrics = ["datadog-metrics-collector/windows-enhanced-metrics"] [dependencies] datadog-logs-agent = { path = "../datadog-logs-agent" } diff --git a/crates/datadog-serverless-compat/src/main.rs b/crates/datadog-serverless-compat/src/main.rs index 941ec4c..b8645b5 100644 --- a/crates/datadog-serverless-compat/src/main.rs +++ b/crates/datadog-serverless-compat/src/main.rs @@ -308,6 +308,8 @@ pub async fn main() { } } +/// Starts the metrics aggregator service and creates a flusher to send +/// aggregated metrics to the Datadog intake. async fn start_aggregator( dd_api_key: Option, dd_site: String, From ec977b21d1097bdb14451198b781dba1343e0255 Mon Sep 17 00:00:00 2001 From: Kathie Huang Date: Wed, 15 Apr 2026 12:36:45 -0400 Subject: [PATCH 08/10] Add precondition for enhanced metrics collector in tokio select loop --- crates/datadog-serverless-compat/src/main.rs | 6 +- .../2026-04-13-instance-enhanced-metric.md | 791 ++++++++++++++++++ 2 files changed, 794 insertions(+), 3 deletions(-) create mode 100644 docs/superpowers/plans/2026-04-13-instance-enhanced-metric.md diff --git a/crates/datadog-serverless-compat/src/main.rs b/crates/datadog-serverless-compat/src/main.rs index b8645b5..0ce4ef2 100644 --- a/crates/datadog-serverless-compat/src/main.rs +++ b/crates/datadog-serverless-compat/src/main.rs @@ -263,11 +263,11 @@ pub async fn main() { }; let mut flush_interval = interval(Duration::from_secs(DOGSTATSD_FLUSH_INTERVAL)); - let mut enhanced_metrics_collection_interval = interval(Duration::from_secs( + let mut instance_metrics_collection_interval = interval(Duration::from_secs( INSTANCE_METRICS_COLLECTION_INTERVAL_SECS, )); flush_interval.tick().await; // discard first tick, which is instantaneous - enhanced_metrics_collection_interval.tick().await; + instance_metrics_collection_interval.tick().await; // Builders for log batches that failed transiently in the previous flush // cycle. They are redriven on the next cycle before new batches are sent. @@ -299,7 +299,7 @@ pub async fn main() { } } } - _ = enhanced_metrics_collection_interval.tick() => { + _ = instance_metrics_collection_interval.tick(), if instance_collector.is_some() => { if let Some(ref collector) = instance_collector { collector.collect_and_submit(); } diff --git a/docs/superpowers/plans/2026-04-13-instance-enhanced-metric.md b/docs/superpowers/plans/2026-04-13-instance-enhanced-metric.md new file mode 100644 index 0000000..f348264 --- /dev/null +++ b/docs/superpowers/plans/2026-04-13-instance-enhanced-metric.md @@ -0,0 +1,791 @@ +# Instance Enhanced Metric Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Add an `azure.functions.enhanced.instance` metric that reports the Azure Functions instance identity, enabling per-instance observability. + +**Architecture:** Create the `datadog-metrics-collector` crate (ported from the CPU metrics branch scaffolding, minus CPU-specific code). The crate exposes an `InstanceMetricsCollector` that reads instance identity from env vars (`WEBSITE_INSTANCE_ID`, `WEBSITE_POD_NAME`, `CONTAINER_NAME`) and submits a **gauge** metric with value `1.0` on each collection tick, with the instance ID as an `instance_id` tag. This follows the datadog-agent pattern (PR 47421) where usage/instance metrics are gauges (not distributions) because the instance tag already provides a unique identifier, avoiding aggregation issues. The collector is wired into `datadog-serverless-compat`'s main loop via a `tokio::select!` arm, sharing the existing DogStatsD aggregator. Origin classification in `dogstatsd/src/origin.rs` needs the `azure.functions` prefix added to route instance metrics as `ServerlessEnhanced`. + +**Tech Stack:** Rust, tokio, dogstatsd crate (local), libdd-common (libdatadog) + +--- + +### Task 1: Create `datadog-metrics-collector` crate with shared tag builder + +**Files:** +- Create: `crates/datadog-metrics-collector/Cargo.toml` +- Create: `crates/datadog-metrics-collector/src/lib.rs` + +This task creates the crate shell. No metric logic yet. The workspace `Cargo.toml` uses `crates/*` glob so no workspace edit is needed. + +- [ ] **Step 1: Create `Cargo.toml`** + +```toml +[package] +name = "datadog-metrics-collector" +version = "0.1.0" +edition.workspace = true +license.workspace = true +description = "Collector to read, compute, and submit enhanced metrics in Serverless environments" + +[dependencies] +dogstatsd = { path = "../dogstatsd", default-features = true } +tracing = { version = "0.1", default-features = false } +libdd-common = { git = "https://github.com/DataDog/libdatadog", rev = "d52ee90209cb12a28bdda0114535c1a985a29d95", default-features = false } + +[features] +windows-enhanced-metrics = [] +``` + +Note: `num_cpus` is intentionally omitted — it's only needed for CPU metrics, not instance metrics. + +- [ ] **Step 2: Create `lib.rs`** + +```rust +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +#![cfg_attr(not(test), deny(clippy::panic))] +#![cfg_attr(not(test), deny(clippy::unwrap_used))] +#![cfg_attr(not(test), deny(clippy::expect_used))] +#![cfg_attr(not(test), deny(clippy::todo))] +#![cfg_attr(not(test), deny(clippy::unimplemented))] + +pub mod instance; +pub mod tags; +``` + +- [ ] **Step 3: Verify the crate compiles** + +Run: `cargo check -p datadog-metrics-collector` +Expected: success (will fail until Task 2 and 3 create the modules) + +Note: This step will be verified after Tasks 2 and 3 are done. + +- [ ] **Step 4: Commit** + +```bash +git add crates/datadog-metrics-collector/Cargo.toml crates/datadog-metrics-collector/src/lib.rs +git commit -m "feat(metrics-collector): create datadog-metrics-collector crate shell" +``` + +--- + +### Task 2: Extract shared tag builder into `tags.rs` + +**Files:** +- Create: `crates/datadog-metrics-collector/src/tags.rs` + +The tag builder is lifted from `cpu.rs` on the CPU branch. It's shared infrastructure for all enhanced metrics (instance, CPU, memory, etc.), so it lives in its own module. + +- [ ] **Step 1: Create `tags.rs`** + +```rust +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Shared tag builder for enhanced metrics. +//! +//! Tags are attached to all enhanced metrics submitted by the metrics collector. + +use dogstatsd::metric::SortedTags; +use libdd_common::azure_app_services; +use std::env; + +/// Builds the common tags for all enhanced metrics. +/// +/// Sources: +/// - Azure metadata (resource_group, subscription_id, name) from libdd_common +/// - Environment variables (region, plan_tier, service, env, version, serverless_compat_version) +/// +/// The DogStatsD origin tag (e.g. `origin:azurefunction`) is added by the aggregator, +/// not here. +pub fn build_enhanced_metrics_tags() -> Option { + let mut tag_parts = Vec::new(); + + if let Some(aas_metadata) = &*azure_app_services::AAS_METADATA_FUNCTION { + let aas_tags = [ + ("resource_group", aas_metadata.get_resource_group()), + ("subscription_id", aas_metadata.get_subscription_id()), + ("name", aas_metadata.get_site_name()), + ]; + for (name, value) in aas_tags { + if value != "unknown" { + tag_parts.push(format!("{}:{}", name, value)); + } + } + } + + for (tag_name, env_var) in [ + ("region", "REGION_NAME"), + ("plan_tier", "WEBSITE_SKU"), + ("service", "DD_SERVICE"), + ("env", "DD_ENV"), + ("version", "DD_VERSION"), + ("serverless_compat_version", "DD_SERVERLESS_COMPAT_VERSION"), + ] { + if let Ok(val) = env::var(env_var) + && !val.is_empty() + { + tag_parts.push(format!("{}:{}", tag_name, val)); + } + } + + if tag_parts.is_empty() { + return None; + } + SortedTags::parse(&tag_parts.join(",")).ok() +} +``` + +- [ ] **Step 2: Commit** + +```bash +git add crates/datadog-metrics-collector/src/tags.rs +git commit -m "feat(metrics-collector): add shared tag builder for enhanced metrics" +``` + +--- + +### Task 3: Implement `InstanceMetricsCollector` + +**Files:** +- Create: `crates/datadog-metrics-collector/src/instance.rs` + +The instance metric is simple: read the instance ID from env vars, submit `azure.functions.enhanced.instance` as a **gauge** with value `1.0` and an `instance_id` tag. Following the datadog-agent pattern (PR 47421), usage/instance metrics use gauges because the instance tag provides a unique identifier — no aggregation issues like CPU metrics have. No delta computation, no OS-specific reader. + +- [ ] **Step 1: Write failing test for `resolve_instance_id`** + +Create `crates/datadog-metrics-collector/src/instance.rs` with the test first: + +```rust +// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ +// SPDX-License-Identifier: Apache-2.0 + +//! Instance identity metric collector for Azure Functions. +//! +//! Submits `azure.functions.enhanced.instance` with value 1.0 on each +//! collection tick, tagged with the instance identifier. The env var +//! checked depends on the Azure plan type: +//! +//! - Elastic Premium / Premium: `WEBSITE_INSTANCE_ID` +//! - Flex Consumption / Consumption: `WEBSITE_POD_NAME` or `CONTAINER_NAME` + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_resolve_instance_id_returns_none_when_no_env_vars() { + // Ensure none of the vars are set (they shouldn't be in test env) + let id = resolve_instance_id_from(None, None, None); + assert!(id.is_none()); + } + + #[test] + fn test_resolve_instance_id_prefers_website_instance_id() { + let id = resolve_instance_id_from( + Some("instance-abc"), + Some("pod-xyz"), + Some("container-123"), + ); + assert_eq!(id, Some("instance-abc".to_string())); + } + + #[test] + fn test_resolve_instance_id_falls_back_to_pod_name() { + let id = resolve_instance_id_from(None, Some("pod-xyz"), Some("container-123")); + assert_eq!(id, Some("pod-xyz".to_string())); + } + + #[test] + fn test_resolve_instance_id_falls_back_to_container_name() { + let id = resolve_instance_id_from(None, None, Some("container-123")); + assert_eq!(id, Some("container-123".to_string())); + } +} +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cargo test -p datadog-metrics-collector -- test_resolve_instance_id 2>&1` +Expected: FAIL — `resolve_instance_id_from` not found + +- [ ] **Step 3: Implement `resolve_instance_id_from` and `resolve_instance_id`** + +Add above the `#[cfg(test)]` block: + +```rust +use dogstatsd::aggregator::AggregatorHandle; +use dogstatsd::metric::{Metric, MetricValue, SortedTags}; +use std::env; +use tracing::{debug, error, info}; + +const INSTANCE_METRIC: &str = "azure.functions.enhanced.instance"; + +/// Resolves the instance ID from explicit values (used by tests). +fn resolve_instance_id_from( + website_instance_id: Option<&str>, + website_pod_name: Option<&str>, + container_name: Option<&str>, +) -> Option { + website_instance_id + .or(website_pod_name) + .or(container_name) + .map(String::from) +} + +/// Resolves the instance ID from environment variables. +/// +/// Checks in order: +/// 1. `WEBSITE_INSTANCE_ID` (Elastic Premium / Premium plans) +/// 2. `WEBSITE_POD_NAME` (Flex Consumption plans) +/// 3. `CONTAINER_NAME` (Consumption plans) +fn resolve_instance_id() -> Option { + resolve_instance_id_from( + env::var("WEBSITE_INSTANCE_ID").ok().as_deref(), + env::var("WEBSITE_POD_NAME").ok().as_deref(), + env::var("CONTAINER_NAME").ok().as_deref(), + ) +} + +pub struct InstanceMetricsCollector { + aggregator: AggregatorHandle, + tags: Option, + instance_id: Option, +} + +impl InstanceMetricsCollector { + pub fn new(aggregator: AggregatorHandle, tags: Option) -> Self { + let instance_id = resolve_instance_id(); + if let Some(ref id) = instance_id { + info!("Instance ID resolved: {}", id); + } else { + debug!("No instance ID found, instance metric will not be submitted"); + } + Self { + aggregator, + tags, + instance_id, + } + } + + pub fn collect_and_submit(&self) { + let Some(ref instance_id) = self.instance_id else { + debug!("No instance ID available, skipping instance metric"); + return; + }; + + // Build tags: start with shared tags, add instance_id + let instance_tag = format!("instance_id:{}", instance_id); + let tag_string = match &self.tags { + Some(existing) => format!("{},{}", existing, instance_tag), + None => instance_tag, + }; + let tags = SortedTags::parse(&tag_string).ok(); + + let now = std::time::UNIX_EPOCH + .elapsed() + .map(|d| d.as_secs()) + .unwrap_or(0) + .try_into() + .unwrap_or(0); + + let metric = Metric::new( + INSTANCE_METRIC.into(), + MetricValue::gauge(1.0), + tags, + Some(now), + ); + + if let Err(e) = self.aggregator.insert_batch(vec![metric]) { + error!("Failed to insert instance metric: {}", e); + } + } +} +``` + +- [ ] **Step 4: Run tests to verify they pass** + +Run: `cargo test -p datadog-metrics-collector -- test_resolve_instance_id` +Expected: all 4 tests PASS + +- [ ] **Step 5: Verify crate compiles** + +Run: `cargo check -p datadog-metrics-collector` +Expected: success + +- [ ] **Step 6: Commit** + +```bash +git add crates/datadog-metrics-collector/src/instance.rs +git commit -m "feat(metrics-collector): add instance identity metric collector" +``` + +--- + +### Task 4: Add `azure.functions` prefix to origin classification + +**Files:** +- Modify: `crates/dogstatsd/src/origin.rs` + +The current `main` branch doesn't include `azure.functions` in the enhanced-service prefix check. The CPU branch added it. We need it so `azure.functions.enhanced.instance` gets classified as `ServerlessEnhanced`. + +- [ ] **Step 1: Write failing test for the new origin classification** + +Add this test to the `mod tests` block at the bottom of `crates/dogstatsd/src/origin.rs`: + +```rust + #[test] + fn test_find_metric_origin_azure_functions_enhanced() { + let tags = SortedTags::parse("origin:azurefunction").unwrap(); + let metric = Metric { + id: 0, + name: "azure.functions.enhanced.instance".into(), + value: MetricValue::Gauge(1.0), + tags: Some(tags.clone()), + timestamp: 0, + }; + let origin = metric.find_origin(tags).unwrap(); + assert_eq!( + origin.origin_product as u32, + OriginProduct::Serverless as u32 + ); + assert_eq!( + origin.origin_category as u32, + OriginCategory::AzureFunctionsMetrics as u32 + ); + assert_eq!( + origin.origin_service as u32, + OriginService::ServerlessEnhanced as u32 + ); + } +``` + +- [ ] **Step 2: Run test to verify it fails** + +Run: `cargo test -p dogstatsd -- test_find_metric_origin_azure_functions_enhanced` +Expected: FAIL — `azure.functions` prefix not matched for `ServerlessEnhanced`, falls through to `ServerlessCustom` + +- [ ] **Step 3: Add `azure.functions` prefix constant and update matching** + +In `crates/dogstatsd/src/origin.rs`, add the constant: + +```rust +const AZURE_FUNCTIONS_PREFIX: &str = "azure.functions"; +``` + +And update the service matching in `find_origin` to include it: + +```rust + } else if metric_prefix == AWS_LAMBDA_PREFIX + || metric_prefix == GOOGLE_CLOUD_RUN_PREFIX + || metric_prefix == AZURE_FUNCTIONS_PREFIX + { +``` + +- [ ] **Step 4: Run test to verify it passes** + +Run: `cargo test -p dogstatsd -- test_find_metric_origin_azure_functions_enhanced` +Expected: PASS + +- [ ] **Step 5: Run all dogstatsd tests to check for regressions** + +Run: `cargo test -p dogstatsd` +Expected: all tests pass + +- [ ] **Step 6: Commit** + +```bash +git add crates/dogstatsd/src/origin.rs +git commit -m "feat(dogstatsd): classify azure.functions prefix as ServerlessEnhanced origin" +``` + +--- + +### Task 5: Wire `InstanceMetricsCollector` into `main.rs` + +**Files:** +- Modify: `crates/datadog-serverless-compat/Cargo.toml` +- Modify: `crates/datadog-serverless-compat/src/main.rs` + +This is the integration task. The main changes to `main.rs`: +1. Add `DD_ENHANCED_METRICS_ENABLED` env var check (Azure Functions only, default true, disabled on Windows) +2. Refactor aggregator creation to be shared between dogstatsd and enhanced metrics (same pattern as CPU branch) +3. Add a `tokio::select!` loop with separate flush and collection intervals +4. Create and run the `InstanceMetricsCollector` + +- [ ] **Step 1: Add `datadog-metrics-collector` dependency to `Cargo.toml`** + +Add to `[dependencies]` in `crates/datadog-serverless-compat/Cargo.toml`: + +```toml +datadog-metrics-collector = { path = "../datadog-metrics-collector" } +``` + +Add to `[features]`: + +```toml +windows-enhanced-metrics = ["datadog-metrics-collector/windows-enhanced-metrics"] +``` + +- [ ] **Step 2: Update `main.rs` — add import and collection interval constant** + +Add import near the top with other use statements: + +```rust +use datadog_metrics_collector::instance::InstanceMetricsCollector; +``` + +Add constant: + +```rust +const ENHANCED_METRICS_COLLECTION_INTERVAL_SECS: u64 = 10; +``` + +- [ ] **Step 3: Update `main.rs` — add `dd_enhanced_metrics` env var check** + +After the `dd_logs_enabled` / `dd_logs_port` env var reads (around line 121), add: + +```rust + // Only enable enhanced metrics for Linux Azure Functions + #[cfg(not(feature = "windows-enhanced-metrics"))] + let dd_enhanced_metrics = env_type == EnvironmentType::AzureFunction + && env::var("DD_ENHANCED_METRICS_ENABLED") + .map(|val| val.to_lowercase() != "false") + .unwrap_or(true); + + // Enhanced metrics are not yet supported in Windows environments + #[cfg(feature = "windows-enhanced-metrics")] + let dd_enhanced_metrics = false; +``` + +- [ ] **Step 4: Update `main.rs` — refactor aggregator to be shared** + +Replace the dogstatsd startup block (lines 185-207) and the code down through the flush loop with the shared-aggregator pattern from the CPU branch. The key structural change is: + +1. Create aggregator when `dd_use_dogstatsd || dd_enhanced_metrics` +2. Start DogStatsD listener separately (only if `dd_use_dogstatsd`) +3. Create `InstanceMetricsCollector` when enhanced metrics enabled +4. Use `tokio::select!` with both flush and collection intervals + +Replace lines 185-207 (the dogstatsd startup block) with: + +```rust + let needs_aggregator = dd_use_dogstatsd || dd_enhanced_metrics; + + // The aggregator is shared between dogstatsd and enhanced metrics. + // It is started independently so that either can be enabled without the other. + let (metrics_flusher, aggregator_handle) = if needs_aggregator { + debug!("Creating metrics flusher and aggregator"); + + let (flusher, handle) = + start_aggregator(dd_api_key.clone(), dd_site, https_proxy.clone(), dogstatsd_tags).await; + + if dd_use_dogstatsd { + debug!("Starting dogstatsd"); + let _ = start_dogstatsd_listener( + dd_dogstatsd_port, + handle.clone(), + dd_statsd_metric_namespace, + #[cfg(all(windows, feature = "windows-pipes"))] + dd_dogstatsd_windows_pipe_name.clone(), + ) + .await; + if let Some(ref windows_pipe_name) = dd_dogstatsd_windows_pipe_name { + info!("dogstatsd-pipe: starting to listen on pipe {windows_pipe_name}"); + } else { + info!("dogstatsd-udp: starting to listen on port {dd_dogstatsd_port}"); + } + } else { + info!("dogstatsd disabled"); + } + (flusher, Some(handle)) + } else { + info!("dogstatsd and enhanced metrics disabled"); + (None, None) + }; +``` + +- [ ] **Step 5: Update `main.rs` — create instance collector** + +After the aggregator block, add: + +```rust + let instance_collector = if dd_enhanced_metrics && metrics_flusher.is_some() { + aggregator_handle.as_ref().map(|handle| { + let tags = datadog_metrics_collector::tags::build_enhanced_metrics_tags(); + InstanceMetricsCollector::new(handle.clone(), tags) + }) + } else { + if !dd_enhanced_metrics { + info!("Enhanced metrics disabled"); + } else { + info!("Enhanced metrics enabled but metrics flusher not found"); + } + None + }; +``` + +- [ ] **Step 6: Update `main.rs` — replace flush loop with `tokio::select!`** + +Replace the existing flush loop (from `let mut flush_interval` through end of `loop`) with: + +```rust + let mut flush_interval = interval(Duration::from_secs(DOGSTATSD_FLUSH_INTERVAL)); + let mut enhanced_metrics_collection_interval = + interval(Duration::from_secs(ENHANCED_METRICS_COLLECTION_INTERVAL_SECS)); + flush_interval.tick().await; // discard first tick, which is instantaneous + enhanced_metrics_collection_interval.tick().await; + + // Builders for log batches that failed transiently in the previous flush + // cycle. They are redriven on the next cycle before new batches are sent. + let mut pending_log_retries: Vec = Vec::new(); + + loop { + tokio::select! { + _ = flush_interval.tick() => { + if let Some(metrics_flusher) = metrics_flusher.clone() { + debug!("Flushing dogstatsd metrics"); + tokio::spawn(async move { + metrics_flusher.flush().await; + }); + } + + if let Some(log_flusher) = log_flusher.as_ref() { + debug!("Flushing log agent"); + let retry_in = std::mem::take(&mut pending_log_retries); + let failed = log_flusher.flush(retry_in).await; + if !failed.is_empty() { + warn!( + "log agent flush failed for {} batch(es); will retry next cycle", + failed.len() + ); + pending_log_retries = failed; + } + } + } + _ = enhanced_metrics_collection_interval.tick() => { + if let Some(ref collector) = instance_collector { + collector.collect_and_submit(); + } + } + } + } +``` + +- [ ] **Step 7: Update `main.rs` — refactor `start_dogstatsd` into `start_aggregator` + `start_dogstatsd_listener`** + +Replace the existing `start_dogstatsd` function with two functions: + +```rust +async fn start_aggregator( + dd_api_key: Option, + dd_site: String, + https_proxy: Option, + dogstatsd_tags: &str, +) -> (Option, AggregatorHandle) { + #[allow(clippy::expect_used)] + let (service, handle) = AggregatorService::new( + SortedTags::parse(dogstatsd_tags).unwrap_or(EMPTY_TAGS), + CONTEXTS, + ) + .expect("Failed to create aggregator service"); + + tokio::spawn(service.run()); + + let metrics_flusher = match dd_api_key { + Some(dd_api_key) => { + let client = match build_metrics_client(https_proxy, DOGSTATSD_TIMEOUT_DURATION) { + Ok(client) => client, + Err(e) => { + error!("Failed to build HTTP client: {e}, won't flush metrics"); + return (None, handle); + } + }; + let metrics_intake_url_prefix = match Site::new(dd_site) + .map_err(|e| e.to_string()) + .and_then(|site| { + MetricsIntakeUrlPrefix::new(Some(site), None).map_err(|e| e.to_string()) + }) { + Ok(prefix) => prefix, + Err(e) => { + error!("Failed to create metrics intake URL: {e}, won't flush metrics"); + return (None, handle); + } + }; + + let metrics_flusher = Flusher::new(FlusherConfig { + api_key_factory: Arc::new(ApiKeyFactory::new(&dd_api_key)), + aggregator_handle: handle.clone(), + metrics_intake_url_prefix, + client, + retry_strategy: RetryStrategy::LinearBackoff(3, 1), + compression_level: CompressionLevel::try_from(6).unwrap_or_default(), + }); + Some(metrics_flusher) + } + None => { + error!("DD_API_KEY not set, won't flush metrics"); + None + } + }; + + (metrics_flusher, handle) +} + +async fn start_dogstatsd_listener( + port: u16, + handle: AggregatorHandle, + metric_namespace: Option, + #[cfg(all(windows, feature = "windows-pipes"))] windows_pipe_name: Option, +) -> CancellationToken { + #[cfg(all(windows, feature = "windows-pipes"))] + let dogstatsd_config = DogStatsDConfig { + host: AGENT_HOST.to_string(), + port, + metric_namespace, + windows_pipe_name, + so_rcvbuf: None, + buffer_size: None, + queue_size: None, + }; + + #[cfg(not(all(windows, feature = "windows-pipes")))] + let dogstatsd_config = DogStatsDConfig { + host: AGENT_HOST.to_string(), + port, + metric_namespace, + so_rcvbuf: None, + buffer_size: None, + queue_size: None, + }; + let dogstatsd_cancel_token = tokio_util::sync::CancellationToken::new(); + + let dogstatsd_client = DogStatsD::new( + &dogstatsd_config, + handle.clone(), + dogstatsd_cancel_token.clone(), + ) + .await; + + tokio::spawn(async move { + dogstatsd_client.spin().await; + }); + + dogstatsd_cancel_token +} +``` + +- [ ] **Step 8: Remove unused import `warn` if no longer needed, clean up** + +Check if `warn` is still used (it is, for log flush failures). Remove the `_aggregator_handle` underscore-prefixed variable since it's now used. + +- [ ] **Step 9: Verify everything compiles** + +Run: `cargo check -p datadog-serverless-compat` +Expected: success + +- [ ] **Step 10: Run all workspace tests** + +Run: `cargo test --workspace` +Expected: all tests pass + +- [ ] **Step 11: Commit** + +```bash +git add crates/datadog-serverless-compat/Cargo.toml crates/datadog-serverless-compat/src/main.rs +git commit -m "feat(serverless-compat): wire instance metrics collector into main loop" +``` + +--- + +### Task 6: Update CI workflows for `windows-enhanced-metrics` feature + +**Files:** +- Modify: `.github/workflows/build-datadog-serverless-compat.yml` +- Modify: `.github/workflows/cargo.yml` + +Windows builds need the `windows-enhanced-metrics` feature flag so that the feature-gated code compiles correctly. + +- [ ] **Step 1: Update `build-datadog-serverless-compat.yml`** + +Change the Windows build command from: + +```yaml +run: cargo build --release -p datadog-serverless-compat --features windows-pipes +``` + +to: + +```yaml +run: cargo build --release -p datadog-serverless-compat --features windows-pipes,windows-enhanced-metrics +``` + +- [ ] **Step 2: Update `cargo.yml`** + +Change the Windows test command from: + +```yaml +cargo nextest run --workspace --features datadog-serverless-compat/windows-pipes +``` + +to: + +```yaml +cargo nextest run --workspace --features datadog-serverless-compat/windows-pipes,datadog-serverless-compat/windows-enhanced-metrics +``` + +- [ ] **Step 3: Commit** + +```bash +git add .github/workflows/build-datadog-serverless-compat.yml .github/workflows/cargo.yml +git commit -m "ci: add windows-enhanced-metrics feature flag to CI builds" +``` + +--- + +### Task 7: Update `Cargo.lock` and `LICENSE-3rdparty.csv` + +**Files:** +- Modify: `Cargo.lock` (auto-generated) +- Modify: `LICENSE-3rdparty.csv` (if any new third-party deps) + +- [ ] **Step 1: Generate lock file** + +Run: `cargo generate-lockfile` or just `cargo check` to update `Cargo.lock` + +- [ ] **Step 2: Check if LICENSE-3rdparty.csv needs updating** + +Since we're only adding local crate dependencies (dogstatsd, libdd-common which are already in the dependency tree), this likely needs no changes. Verify by checking if the CI has a license check step and whether it passes. + +- [ ] **Step 3: Commit if changed** + +```bash +git add Cargo.lock +git commit -m "chore: update Cargo.lock for datadog-metrics-collector" +``` + +--- + +### Task 8: Final verification + +- [ ] **Step 1: Run full workspace check** + +Run: `cargo check --workspace` +Expected: success + +- [ ] **Step 2: Run full workspace tests** + +Run: `cargo test --workspace` +Expected: all tests pass + +- [ ] **Step 3: Run clippy** + +Run: `cargo clippy --workspace -- -D warnings` +Expected: no warnings + +- [ ] **Step 4: Verify Windows feature flag compiles** + +Run: `cargo check -p datadog-serverless-compat --features windows-enhanced-metrics,windows-pipes` +Expected: success (will use stub code paths) From 345b0001b690dbbd94d5b374c33b56950cee2fc7 Mon Sep 17 00:00:00 2001 From: Kathie Huang Date: Wed, 15 Apr 2026 13:21:04 -0400 Subject: [PATCH 09/10] Precompute tags in new() rather than building them in collect_and_submit(), change missing instance log to warn --- .../datadog-metrics-collector/src/instance.rs | 45 +- crates/datadog-serverless-compat/src/main.rs | 2 +- .../2026-04-13-instance-enhanced-metric.md | 791 ------------------ 3 files changed, 22 insertions(+), 816 deletions(-) delete mode 100644 docs/superpowers/plans/2026-04-13-instance-enhanced-metric.md diff --git a/crates/datadog-metrics-collector/src/instance.rs b/crates/datadog-metrics-collector/src/instance.rs index 84fd701..18f3a48 100644 --- a/crates/datadog-metrics-collector/src/instance.rs +++ b/crates/datadog-metrics-collector/src/instance.rs @@ -9,7 +9,7 @@ use dogstatsd::aggregator::AggregatorHandle; use dogstatsd::metric::{Metric, MetricValue, SortedTags}; use std::env; -use tracing::{debug, error}; +use tracing::{error, warn}; const INSTANCE_METRIC: &str = "azure.functions.enhanced.instance"; @@ -42,42 +42,39 @@ fn resolve_instance_id() -> Option { pub struct InstanceMetricsCollector { aggregator: AggregatorHandle, tags: Option, - instance_id: Option, } impl InstanceMetricsCollector { - pub fn new(aggregator: AggregatorHandle, tags: Option) -> Self { + /// Creates a new collector, returning `None` if no instance ID is found. + pub fn new(aggregator: AggregatorHandle, tags: Option) -> Option { let instance_id = resolve_instance_id(); - if instance_id.is_none() { - debug!("No instance ID found, instance metric will not be submitted"); - } - Self { - aggregator, - tags, - instance_id, - } - } - - pub fn collect_and_submit(&self) { - let Some(ref instance_id) = self.instance_id else { - debug!("No instance ID available, skipping instance metric"); - return; + let Some(instance_id) = instance_id else { + warn!("No instance ID found, instance metric will not be submitted"); + return None; }; - // Build tags: start with shared tags, add instance + // Precompute tags: enhanced metrics tags + instance tag let instance_tag = format!("instance:{}", instance_id); - let tags = match &self.tags { - Some(existing) => { - let mut combined = existing.clone(); + let tags = match tags { + Some(mut existing) => { if let Ok(id_tag) = SortedTags::parse(&instance_tag) { - combined.extend(&id_tag); + existing.extend(&id_tag); } - Some(combined) + Some(existing) } None => SortedTags::parse(&instance_tag).ok(), }; - let metric = Metric::new(INSTANCE_METRIC.into(), MetricValue::gauge(1.0), tags, None); + Some(Self { aggregator, tags }) + } + + pub fn collect_and_submit(&self) { + let metric = Metric::new( + INSTANCE_METRIC.into(), + MetricValue::gauge(1.0), + self.tags.clone(), + None, + ); if let Err(e) = self.aggregator.insert_batch(vec![metric]) { error!("Failed to insert instance metric: {}", e); diff --git a/crates/datadog-serverless-compat/src/main.rs b/crates/datadog-serverless-compat/src/main.rs index 0ce4ef2..c3f00ab 100644 --- a/crates/datadog-serverless-compat/src/main.rs +++ b/crates/datadog-serverless-compat/src/main.rs @@ -231,7 +231,7 @@ pub async fn main() { }; let instance_collector = if dd_enhanced_metrics && metrics_flusher.is_some() { - aggregator_handle.as_ref().map(|handle| { + aggregator_handle.as_ref().and_then(|handle| { let tags = datadog_metrics_collector::tags::build_enhanced_metrics_tags(); InstanceMetricsCollector::new(handle.clone(), tags) }) diff --git a/docs/superpowers/plans/2026-04-13-instance-enhanced-metric.md b/docs/superpowers/plans/2026-04-13-instance-enhanced-metric.md deleted file mode 100644 index f348264..0000000 --- a/docs/superpowers/plans/2026-04-13-instance-enhanced-metric.md +++ /dev/null @@ -1,791 +0,0 @@ -# Instance Enhanced Metric Implementation Plan - -> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. - -**Goal:** Add an `azure.functions.enhanced.instance` metric that reports the Azure Functions instance identity, enabling per-instance observability. - -**Architecture:** Create the `datadog-metrics-collector` crate (ported from the CPU metrics branch scaffolding, minus CPU-specific code). The crate exposes an `InstanceMetricsCollector` that reads instance identity from env vars (`WEBSITE_INSTANCE_ID`, `WEBSITE_POD_NAME`, `CONTAINER_NAME`) and submits a **gauge** metric with value `1.0` on each collection tick, with the instance ID as an `instance_id` tag. This follows the datadog-agent pattern (PR 47421) where usage/instance metrics are gauges (not distributions) because the instance tag already provides a unique identifier, avoiding aggregation issues. The collector is wired into `datadog-serverless-compat`'s main loop via a `tokio::select!` arm, sharing the existing DogStatsD aggregator. Origin classification in `dogstatsd/src/origin.rs` needs the `azure.functions` prefix added to route instance metrics as `ServerlessEnhanced`. - -**Tech Stack:** Rust, tokio, dogstatsd crate (local), libdd-common (libdatadog) - ---- - -### Task 1: Create `datadog-metrics-collector` crate with shared tag builder - -**Files:** -- Create: `crates/datadog-metrics-collector/Cargo.toml` -- Create: `crates/datadog-metrics-collector/src/lib.rs` - -This task creates the crate shell. No metric logic yet. The workspace `Cargo.toml` uses `crates/*` glob so no workspace edit is needed. - -- [ ] **Step 1: Create `Cargo.toml`** - -```toml -[package] -name = "datadog-metrics-collector" -version = "0.1.0" -edition.workspace = true -license.workspace = true -description = "Collector to read, compute, and submit enhanced metrics in Serverless environments" - -[dependencies] -dogstatsd = { path = "../dogstatsd", default-features = true } -tracing = { version = "0.1", default-features = false } -libdd-common = { git = "https://github.com/DataDog/libdatadog", rev = "d52ee90209cb12a28bdda0114535c1a985a29d95", default-features = false } - -[features] -windows-enhanced-metrics = [] -``` - -Note: `num_cpus` is intentionally omitted — it's only needed for CPU metrics, not instance metrics. - -- [ ] **Step 2: Create `lib.rs`** - -```rust -// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ -// SPDX-License-Identifier: Apache-2.0 - -#![cfg_attr(not(test), deny(clippy::panic))] -#![cfg_attr(not(test), deny(clippy::unwrap_used))] -#![cfg_attr(not(test), deny(clippy::expect_used))] -#![cfg_attr(not(test), deny(clippy::todo))] -#![cfg_attr(not(test), deny(clippy::unimplemented))] - -pub mod instance; -pub mod tags; -``` - -- [ ] **Step 3: Verify the crate compiles** - -Run: `cargo check -p datadog-metrics-collector` -Expected: success (will fail until Task 2 and 3 create the modules) - -Note: This step will be verified after Tasks 2 and 3 are done. - -- [ ] **Step 4: Commit** - -```bash -git add crates/datadog-metrics-collector/Cargo.toml crates/datadog-metrics-collector/src/lib.rs -git commit -m "feat(metrics-collector): create datadog-metrics-collector crate shell" -``` - ---- - -### Task 2: Extract shared tag builder into `tags.rs` - -**Files:** -- Create: `crates/datadog-metrics-collector/src/tags.rs` - -The tag builder is lifted from `cpu.rs` on the CPU branch. It's shared infrastructure for all enhanced metrics (instance, CPU, memory, etc.), so it lives in its own module. - -- [ ] **Step 1: Create `tags.rs`** - -```rust -// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ -// SPDX-License-Identifier: Apache-2.0 - -//! Shared tag builder for enhanced metrics. -//! -//! Tags are attached to all enhanced metrics submitted by the metrics collector. - -use dogstatsd::metric::SortedTags; -use libdd_common::azure_app_services; -use std::env; - -/// Builds the common tags for all enhanced metrics. -/// -/// Sources: -/// - Azure metadata (resource_group, subscription_id, name) from libdd_common -/// - Environment variables (region, plan_tier, service, env, version, serverless_compat_version) -/// -/// The DogStatsD origin tag (e.g. `origin:azurefunction`) is added by the aggregator, -/// not here. -pub fn build_enhanced_metrics_tags() -> Option { - let mut tag_parts = Vec::new(); - - if let Some(aas_metadata) = &*azure_app_services::AAS_METADATA_FUNCTION { - let aas_tags = [ - ("resource_group", aas_metadata.get_resource_group()), - ("subscription_id", aas_metadata.get_subscription_id()), - ("name", aas_metadata.get_site_name()), - ]; - for (name, value) in aas_tags { - if value != "unknown" { - tag_parts.push(format!("{}:{}", name, value)); - } - } - } - - for (tag_name, env_var) in [ - ("region", "REGION_NAME"), - ("plan_tier", "WEBSITE_SKU"), - ("service", "DD_SERVICE"), - ("env", "DD_ENV"), - ("version", "DD_VERSION"), - ("serverless_compat_version", "DD_SERVERLESS_COMPAT_VERSION"), - ] { - if let Ok(val) = env::var(env_var) - && !val.is_empty() - { - tag_parts.push(format!("{}:{}", tag_name, val)); - } - } - - if tag_parts.is_empty() { - return None; - } - SortedTags::parse(&tag_parts.join(",")).ok() -} -``` - -- [ ] **Step 2: Commit** - -```bash -git add crates/datadog-metrics-collector/src/tags.rs -git commit -m "feat(metrics-collector): add shared tag builder for enhanced metrics" -``` - ---- - -### Task 3: Implement `InstanceMetricsCollector` - -**Files:** -- Create: `crates/datadog-metrics-collector/src/instance.rs` - -The instance metric is simple: read the instance ID from env vars, submit `azure.functions.enhanced.instance` as a **gauge** with value `1.0` and an `instance_id` tag. Following the datadog-agent pattern (PR 47421), usage/instance metrics use gauges because the instance tag provides a unique identifier — no aggregation issues like CPU metrics have. No delta computation, no OS-specific reader. - -- [ ] **Step 1: Write failing test for `resolve_instance_id`** - -Create `crates/datadog-metrics-collector/src/instance.rs` with the test first: - -```rust -// Copyright 2023-Present Datadog, Inc. https://www.datadoghq.com/ -// SPDX-License-Identifier: Apache-2.0 - -//! Instance identity metric collector for Azure Functions. -//! -//! Submits `azure.functions.enhanced.instance` with value 1.0 on each -//! collection tick, tagged with the instance identifier. The env var -//! checked depends on the Azure plan type: -//! -//! - Elastic Premium / Premium: `WEBSITE_INSTANCE_ID` -//! - Flex Consumption / Consumption: `WEBSITE_POD_NAME` or `CONTAINER_NAME` - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_resolve_instance_id_returns_none_when_no_env_vars() { - // Ensure none of the vars are set (they shouldn't be in test env) - let id = resolve_instance_id_from(None, None, None); - assert!(id.is_none()); - } - - #[test] - fn test_resolve_instance_id_prefers_website_instance_id() { - let id = resolve_instance_id_from( - Some("instance-abc"), - Some("pod-xyz"), - Some("container-123"), - ); - assert_eq!(id, Some("instance-abc".to_string())); - } - - #[test] - fn test_resolve_instance_id_falls_back_to_pod_name() { - let id = resolve_instance_id_from(None, Some("pod-xyz"), Some("container-123")); - assert_eq!(id, Some("pod-xyz".to_string())); - } - - #[test] - fn test_resolve_instance_id_falls_back_to_container_name() { - let id = resolve_instance_id_from(None, None, Some("container-123")); - assert_eq!(id, Some("container-123".to_string())); - } -} -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `cargo test -p datadog-metrics-collector -- test_resolve_instance_id 2>&1` -Expected: FAIL — `resolve_instance_id_from` not found - -- [ ] **Step 3: Implement `resolve_instance_id_from` and `resolve_instance_id`** - -Add above the `#[cfg(test)]` block: - -```rust -use dogstatsd::aggregator::AggregatorHandle; -use dogstatsd::metric::{Metric, MetricValue, SortedTags}; -use std::env; -use tracing::{debug, error, info}; - -const INSTANCE_METRIC: &str = "azure.functions.enhanced.instance"; - -/// Resolves the instance ID from explicit values (used by tests). -fn resolve_instance_id_from( - website_instance_id: Option<&str>, - website_pod_name: Option<&str>, - container_name: Option<&str>, -) -> Option { - website_instance_id - .or(website_pod_name) - .or(container_name) - .map(String::from) -} - -/// Resolves the instance ID from environment variables. -/// -/// Checks in order: -/// 1. `WEBSITE_INSTANCE_ID` (Elastic Premium / Premium plans) -/// 2. `WEBSITE_POD_NAME` (Flex Consumption plans) -/// 3. `CONTAINER_NAME` (Consumption plans) -fn resolve_instance_id() -> Option { - resolve_instance_id_from( - env::var("WEBSITE_INSTANCE_ID").ok().as_deref(), - env::var("WEBSITE_POD_NAME").ok().as_deref(), - env::var("CONTAINER_NAME").ok().as_deref(), - ) -} - -pub struct InstanceMetricsCollector { - aggregator: AggregatorHandle, - tags: Option, - instance_id: Option, -} - -impl InstanceMetricsCollector { - pub fn new(aggregator: AggregatorHandle, tags: Option) -> Self { - let instance_id = resolve_instance_id(); - if let Some(ref id) = instance_id { - info!("Instance ID resolved: {}", id); - } else { - debug!("No instance ID found, instance metric will not be submitted"); - } - Self { - aggregator, - tags, - instance_id, - } - } - - pub fn collect_and_submit(&self) { - let Some(ref instance_id) = self.instance_id else { - debug!("No instance ID available, skipping instance metric"); - return; - }; - - // Build tags: start with shared tags, add instance_id - let instance_tag = format!("instance_id:{}", instance_id); - let tag_string = match &self.tags { - Some(existing) => format!("{},{}", existing, instance_tag), - None => instance_tag, - }; - let tags = SortedTags::parse(&tag_string).ok(); - - let now = std::time::UNIX_EPOCH - .elapsed() - .map(|d| d.as_secs()) - .unwrap_or(0) - .try_into() - .unwrap_or(0); - - let metric = Metric::new( - INSTANCE_METRIC.into(), - MetricValue::gauge(1.0), - tags, - Some(now), - ); - - if let Err(e) = self.aggregator.insert_batch(vec![metric]) { - error!("Failed to insert instance metric: {}", e); - } - } -} -``` - -- [ ] **Step 4: Run tests to verify they pass** - -Run: `cargo test -p datadog-metrics-collector -- test_resolve_instance_id` -Expected: all 4 tests PASS - -- [ ] **Step 5: Verify crate compiles** - -Run: `cargo check -p datadog-metrics-collector` -Expected: success - -- [ ] **Step 6: Commit** - -```bash -git add crates/datadog-metrics-collector/src/instance.rs -git commit -m "feat(metrics-collector): add instance identity metric collector" -``` - ---- - -### Task 4: Add `azure.functions` prefix to origin classification - -**Files:** -- Modify: `crates/dogstatsd/src/origin.rs` - -The current `main` branch doesn't include `azure.functions` in the enhanced-service prefix check. The CPU branch added it. We need it so `azure.functions.enhanced.instance` gets classified as `ServerlessEnhanced`. - -- [ ] **Step 1: Write failing test for the new origin classification** - -Add this test to the `mod tests` block at the bottom of `crates/dogstatsd/src/origin.rs`: - -```rust - #[test] - fn test_find_metric_origin_azure_functions_enhanced() { - let tags = SortedTags::parse("origin:azurefunction").unwrap(); - let metric = Metric { - id: 0, - name: "azure.functions.enhanced.instance".into(), - value: MetricValue::Gauge(1.0), - tags: Some(tags.clone()), - timestamp: 0, - }; - let origin = metric.find_origin(tags).unwrap(); - assert_eq!( - origin.origin_product as u32, - OriginProduct::Serverless as u32 - ); - assert_eq!( - origin.origin_category as u32, - OriginCategory::AzureFunctionsMetrics as u32 - ); - assert_eq!( - origin.origin_service as u32, - OriginService::ServerlessEnhanced as u32 - ); - } -``` - -- [ ] **Step 2: Run test to verify it fails** - -Run: `cargo test -p dogstatsd -- test_find_metric_origin_azure_functions_enhanced` -Expected: FAIL — `azure.functions` prefix not matched for `ServerlessEnhanced`, falls through to `ServerlessCustom` - -- [ ] **Step 3: Add `azure.functions` prefix constant and update matching** - -In `crates/dogstatsd/src/origin.rs`, add the constant: - -```rust -const AZURE_FUNCTIONS_PREFIX: &str = "azure.functions"; -``` - -And update the service matching in `find_origin` to include it: - -```rust - } else if metric_prefix == AWS_LAMBDA_PREFIX - || metric_prefix == GOOGLE_CLOUD_RUN_PREFIX - || metric_prefix == AZURE_FUNCTIONS_PREFIX - { -``` - -- [ ] **Step 4: Run test to verify it passes** - -Run: `cargo test -p dogstatsd -- test_find_metric_origin_azure_functions_enhanced` -Expected: PASS - -- [ ] **Step 5: Run all dogstatsd tests to check for regressions** - -Run: `cargo test -p dogstatsd` -Expected: all tests pass - -- [ ] **Step 6: Commit** - -```bash -git add crates/dogstatsd/src/origin.rs -git commit -m "feat(dogstatsd): classify azure.functions prefix as ServerlessEnhanced origin" -``` - ---- - -### Task 5: Wire `InstanceMetricsCollector` into `main.rs` - -**Files:** -- Modify: `crates/datadog-serverless-compat/Cargo.toml` -- Modify: `crates/datadog-serverless-compat/src/main.rs` - -This is the integration task. The main changes to `main.rs`: -1. Add `DD_ENHANCED_METRICS_ENABLED` env var check (Azure Functions only, default true, disabled on Windows) -2. Refactor aggregator creation to be shared between dogstatsd and enhanced metrics (same pattern as CPU branch) -3. Add a `tokio::select!` loop with separate flush and collection intervals -4. Create and run the `InstanceMetricsCollector` - -- [ ] **Step 1: Add `datadog-metrics-collector` dependency to `Cargo.toml`** - -Add to `[dependencies]` in `crates/datadog-serverless-compat/Cargo.toml`: - -```toml -datadog-metrics-collector = { path = "../datadog-metrics-collector" } -``` - -Add to `[features]`: - -```toml -windows-enhanced-metrics = ["datadog-metrics-collector/windows-enhanced-metrics"] -``` - -- [ ] **Step 2: Update `main.rs` — add import and collection interval constant** - -Add import near the top with other use statements: - -```rust -use datadog_metrics_collector::instance::InstanceMetricsCollector; -``` - -Add constant: - -```rust -const ENHANCED_METRICS_COLLECTION_INTERVAL_SECS: u64 = 10; -``` - -- [ ] **Step 3: Update `main.rs` — add `dd_enhanced_metrics` env var check** - -After the `dd_logs_enabled` / `dd_logs_port` env var reads (around line 121), add: - -```rust - // Only enable enhanced metrics for Linux Azure Functions - #[cfg(not(feature = "windows-enhanced-metrics"))] - let dd_enhanced_metrics = env_type == EnvironmentType::AzureFunction - && env::var("DD_ENHANCED_METRICS_ENABLED") - .map(|val| val.to_lowercase() != "false") - .unwrap_or(true); - - // Enhanced metrics are not yet supported in Windows environments - #[cfg(feature = "windows-enhanced-metrics")] - let dd_enhanced_metrics = false; -``` - -- [ ] **Step 4: Update `main.rs` — refactor aggregator to be shared** - -Replace the dogstatsd startup block (lines 185-207) and the code down through the flush loop with the shared-aggregator pattern from the CPU branch. The key structural change is: - -1. Create aggregator when `dd_use_dogstatsd || dd_enhanced_metrics` -2. Start DogStatsD listener separately (only if `dd_use_dogstatsd`) -3. Create `InstanceMetricsCollector` when enhanced metrics enabled -4. Use `tokio::select!` with both flush and collection intervals - -Replace lines 185-207 (the dogstatsd startup block) with: - -```rust - let needs_aggregator = dd_use_dogstatsd || dd_enhanced_metrics; - - // The aggregator is shared between dogstatsd and enhanced metrics. - // It is started independently so that either can be enabled without the other. - let (metrics_flusher, aggregator_handle) = if needs_aggregator { - debug!("Creating metrics flusher and aggregator"); - - let (flusher, handle) = - start_aggregator(dd_api_key.clone(), dd_site, https_proxy.clone(), dogstatsd_tags).await; - - if dd_use_dogstatsd { - debug!("Starting dogstatsd"); - let _ = start_dogstatsd_listener( - dd_dogstatsd_port, - handle.clone(), - dd_statsd_metric_namespace, - #[cfg(all(windows, feature = "windows-pipes"))] - dd_dogstatsd_windows_pipe_name.clone(), - ) - .await; - if let Some(ref windows_pipe_name) = dd_dogstatsd_windows_pipe_name { - info!("dogstatsd-pipe: starting to listen on pipe {windows_pipe_name}"); - } else { - info!("dogstatsd-udp: starting to listen on port {dd_dogstatsd_port}"); - } - } else { - info!("dogstatsd disabled"); - } - (flusher, Some(handle)) - } else { - info!("dogstatsd and enhanced metrics disabled"); - (None, None) - }; -``` - -- [ ] **Step 5: Update `main.rs` — create instance collector** - -After the aggregator block, add: - -```rust - let instance_collector = if dd_enhanced_metrics && metrics_flusher.is_some() { - aggregator_handle.as_ref().map(|handle| { - let tags = datadog_metrics_collector::tags::build_enhanced_metrics_tags(); - InstanceMetricsCollector::new(handle.clone(), tags) - }) - } else { - if !dd_enhanced_metrics { - info!("Enhanced metrics disabled"); - } else { - info!("Enhanced metrics enabled but metrics flusher not found"); - } - None - }; -``` - -- [ ] **Step 6: Update `main.rs` — replace flush loop with `tokio::select!`** - -Replace the existing flush loop (from `let mut flush_interval` through end of `loop`) with: - -```rust - let mut flush_interval = interval(Duration::from_secs(DOGSTATSD_FLUSH_INTERVAL)); - let mut enhanced_metrics_collection_interval = - interval(Duration::from_secs(ENHANCED_METRICS_COLLECTION_INTERVAL_SECS)); - flush_interval.tick().await; // discard first tick, which is instantaneous - enhanced_metrics_collection_interval.tick().await; - - // Builders for log batches that failed transiently in the previous flush - // cycle. They are redriven on the next cycle before new batches are sent. - let mut pending_log_retries: Vec = Vec::new(); - - loop { - tokio::select! { - _ = flush_interval.tick() => { - if let Some(metrics_flusher) = metrics_flusher.clone() { - debug!("Flushing dogstatsd metrics"); - tokio::spawn(async move { - metrics_flusher.flush().await; - }); - } - - if let Some(log_flusher) = log_flusher.as_ref() { - debug!("Flushing log agent"); - let retry_in = std::mem::take(&mut pending_log_retries); - let failed = log_flusher.flush(retry_in).await; - if !failed.is_empty() { - warn!( - "log agent flush failed for {} batch(es); will retry next cycle", - failed.len() - ); - pending_log_retries = failed; - } - } - } - _ = enhanced_metrics_collection_interval.tick() => { - if let Some(ref collector) = instance_collector { - collector.collect_and_submit(); - } - } - } - } -``` - -- [ ] **Step 7: Update `main.rs` — refactor `start_dogstatsd` into `start_aggregator` + `start_dogstatsd_listener`** - -Replace the existing `start_dogstatsd` function with two functions: - -```rust -async fn start_aggregator( - dd_api_key: Option, - dd_site: String, - https_proxy: Option, - dogstatsd_tags: &str, -) -> (Option, AggregatorHandle) { - #[allow(clippy::expect_used)] - let (service, handle) = AggregatorService::new( - SortedTags::parse(dogstatsd_tags).unwrap_or(EMPTY_TAGS), - CONTEXTS, - ) - .expect("Failed to create aggregator service"); - - tokio::spawn(service.run()); - - let metrics_flusher = match dd_api_key { - Some(dd_api_key) => { - let client = match build_metrics_client(https_proxy, DOGSTATSD_TIMEOUT_DURATION) { - Ok(client) => client, - Err(e) => { - error!("Failed to build HTTP client: {e}, won't flush metrics"); - return (None, handle); - } - }; - let metrics_intake_url_prefix = match Site::new(dd_site) - .map_err(|e| e.to_string()) - .and_then(|site| { - MetricsIntakeUrlPrefix::new(Some(site), None).map_err(|e| e.to_string()) - }) { - Ok(prefix) => prefix, - Err(e) => { - error!("Failed to create metrics intake URL: {e}, won't flush metrics"); - return (None, handle); - } - }; - - let metrics_flusher = Flusher::new(FlusherConfig { - api_key_factory: Arc::new(ApiKeyFactory::new(&dd_api_key)), - aggregator_handle: handle.clone(), - metrics_intake_url_prefix, - client, - retry_strategy: RetryStrategy::LinearBackoff(3, 1), - compression_level: CompressionLevel::try_from(6).unwrap_or_default(), - }); - Some(metrics_flusher) - } - None => { - error!("DD_API_KEY not set, won't flush metrics"); - None - } - }; - - (metrics_flusher, handle) -} - -async fn start_dogstatsd_listener( - port: u16, - handle: AggregatorHandle, - metric_namespace: Option, - #[cfg(all(windows, feature = "windows-pipes"))] windows_pipe_name: Option, -) -> CancellationToken { - #[cfg(all(windows, feature = "windows-pipes"))] - let dogstatsd_config = DogStatsDConfig { - host: AGENT_HOST.to_string(), - port, - metric_namespace, - windows_pipe_name, - so_rcvbuf: None, - buffer_size: None, - queue_size: None, - }; - - #[cfg(not(all(windows, feature = "windows-pipes")))] - let dogstatsd_config = DogStatsDConfig { - host: AGENT_HOST.to_string(), - port, - metric_namespace, - so_rcvbuf: None, - buffer_size: None, - queue_size: None, - }; - let dogstatsd_cancel_token = tokio_util::sync::CancellationToken::new(); - - let dogstatsd_client = DogStatsD::new( - &dogstatsd_config, - handle.clone(), - dogstatsd_cancel_token.clone(), - ) - .await; - - tokio::spawn(async move { - dogstatsd_client.spin().await; - }); - - dogstatsd_cancel_token -} -``` - -- [ ] **Step 8: Remove unused import `warn` if no longer needed, clean up** - -Check if `warn` is still used (it is, for log flush failures). Remove the `_aggregator_handle` underscore-prefixed variable since it's now used. - -- [ ] **Step 9: Verify everything compiles** - -Run: `cargo check -p datadog-serverless-compat` -Expected: success - -- [ ] **Step 10: Run all workspace tests** - -Run: `cargo test --workspace` -Expected: all tests pass - -- [ ] **Step 11: Commit** - -```bash -git add crates/datadog-serverless-compat/Cargo.toml crates/datadog-serverless-compat/src/main.rs -git commit -m "feat(serverless-compat): wire instance metrics collector into main loop" -``` - ---- - -### Task 6: Update CI workflows for `windows-enhanced-metrics` feature - -**Files:** -- Modify: `.github/workflows/build-datadog-serverless-compat.yml` -- Modify: `.github/workflows/cargo.yml` - -Windows builds need the `windows-enhanced-metrics` feature flag so that the feature-gated code compiles correctly. - -- [ ] **Step 1: Update `build-datadog-serverless-compat.yml`** - -Change the Windows build command from: - -```yaml -run: cargo build --release -p datadog-serverless-compat --features windows-pipes -``` - -to: - -```yaml -run: cargo build --release -p datadog-serverless-compat --features windows-pipes,windows-enhanced-metrics -``` - -- [ ] **Step 2: Update `cargo.yml`** - -Change the Windows test command from: - -```yaml -cargo nextest run --workspace --features datadog-serverless-compat/windows-pipes -``` - -to: - -```yaml -cargo nextest run --workspace --features datadog-serverless-compat/windows-pipes,datadog-serverless-compat/windows-enhanced-metrics -``` - -- [ ] **Step 3: Commit** - -```bash -git add .github/workflows/build-datadog-serverless-compat.yml .github/workflows/cargo.yml -git commit -m "ci: add windows-enhanced-metrics feature flag to CI builds" -``` - ---- - -### Task 7: Update `Cargo.lock` and `LICENSE-3rdparty.csv` - -**Files:** -- Modify: `Cargo.lock` (auto-generated) -- Modify: `LICENSE-3rdparty.csv` (if any new third-party deps) - -- [ ] **Step 1: Generate lock file** - -Run: `cargo generate-lockfile` or just `cargo check` to update `Cargo.lock` - -- [ ] **Step 2: Check if LICENSE-3rdparty.csv needs updating** - -Since we're only adding local crate dependencies (dogstatsd, libdd-common which are already in the dependency tree), this likely needs no changes. Verify by checking if the CI has a license check step and whether it passes. - -- [ ] **Step 3: Commit if changed** - -```bash -git add Cargo.lock -git commit -m "chore: update Cargo.lock for datadog-metrics-collector" -``` - ---- - -### Task 8: Final verification - -- [ ] **Step 1: Run full workspace check** - -Run: `cargo check --workspace` -Expected: success - -- [ ] **Step 2: Run full workspace tests** - -Run: `cargo test --workspace` -Expected: all tests pass - -- [ ] **Step 3: Run clippy** - -Run: `cargo clippy --workspace -- -D warnings` -Expected: no warnings - -- [ ] **Step 4: Verify Windows feature flag compiles** - -Run: `cargo check -p datadog-serverless-compat --features windows-enhanced-metrics,windows-pipes` -Expected: success (will use stub code paths) From 53ea5b3c5e69bff4a38299212b2d1470c6fa7f8f Mon Sep 17 00:00:00 2001 From: Kathie Huang Date: Thu, 16 Apr 2026 16:23:16 -0400 Subject: [PATCH 10/10] Update rustls-webpki --- Cargo.lock | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 1703d95..2023ad4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2206,7 +2206,7 @@ version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" dependencies = [ - "heck 0.5.0", + "heck 0.4.1", "itertools 0.14.0", "log", "multimap", @@ -2686,9 +2686,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.10" +version = "0.103.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df33b2b81ac578cabaf06b89b0631153a3f416b0a886e8a7a1707fb51abbd1ef" +checksum = "8279bb85272c9f10811ae6a6c547ff594d6a7f3c6c6b02ee9726d1d0dcfcdd06" dependencies = [ "aws-lc-rs", "ring",