diff --git a/datadog-sidecar-ffi/Cargo.toml b/datadog-sidecar-ffi/Cargo.toml index 554b985f5a..161132621b 100644 --- a/datadog-sidecar-ffi/Cargo.toml +++ b/datadog-sidecar-ffi/Cargo.toml @@ -35,6 +35,7 @@ libdd-crashtracker-ffi = { path = "../libdd-crashtracker-ffi", features = ["coll [dev-dependencies] http = "1.1" +libdd-trace-utils = { path = "../libdd-trace-utils", features = ["test-utils"] } tempfile = { version = "3.3" } [lints.rust] diff --git a/datadog-sidecar-ffi/src/span.rs b/datadog-sidecar-ffi/src/span.rs index 1dd36e0b4e..fe79c94dde 100644 --- a/datadog-sidecar-ffi/src/span.rs +++ b/datadog-sidecar-ffi/src/span.rs @@ -5,6 +5,7 @@ use libdd_common_ffi::slice::{AsBytes, CharSlice}; use libdd_tinybytes::{Bytes, BytesString}; use libdd_trace_utils::span::v04::{ AttributeAnyValueBytes, AttributeArrayValueBytes, SpanBytes, SpanEventBytes, SpanLinkBytes, + VecMap, }; use std::borrow::Cow; use std::collections::HashMap; @@ -51,25 +52,35 @@ fn insert_hashmap(map: &mut HashMap, key: CharSlice, value: V } #[inline] -fn remove_hashmap(map: &mut HashMap, key: CharSlice) { +fn insert_vec_map(map: &mut VecMap, key: CharSlice, value: V) { + if key.is_empty() { + return; + } + let bytes_str_key = convert_char_slice_to_bytes_string(key); + map.insert(bytes_str_key, value); +} + +#[inline] +fn remove_vec_map_slow(map: &mut VecMap, key: CharSlice) { let bytes_str_key = convert_char_slice_to_bytes_string(key); - map.remove(&bytes_str_key); + map.remove_slow(&bytes_str_key); } #[inline] -fn exists_hashmap(map: &HashMap, key: CharSlice) -> bool { +fn exists_vec_map(map: &VecMap, key: CharSlice) -> bool { let bytes_str_key = convert_char_slice_to_bytes_string(key); map.contains_key(&bytes_str_key) } /// The return value is an owned array of slices (`Box<[CharSlice<'a>]>`) that must be dropped /// explicitly. -fn get_hashmap_keys<'a, V>( - map: &'a HashMap, +fn get_vec_map_keys<'a, V>( + map: &'a VecMap, out_count: &mut usize, ) -> *mut CharSlice<'a> { - let mut keys: Vec<&str> = map.keys().map(|b| b.as_str()).collect(); + let mut keys: Vec<&str> = map.iter().map(|(k, _)| k.as_str()).collect(); keys.sort_unstable(); + keys.dedup(); let slices: Box<[CharSlice]> = keys .iter() @@ -182,8 +193,8 @@ pub extern "C" fn ddog_trace_new_span_with_capacities( new_vector_push( trace, SpanBytes { - meta: HashMap::with_capacity(meta_size), - metrics: HashMap::with_capacity(metrics_size), + meta: VecMap::with_capacity(meta_size), + metrics: VecMap::with_capacity(metrics_size), ..SpanBytes::default() }, ) @@ -324,7 +335,7 @@ pub extern "C" fn ddog_get_span_error(span: &mut SpanBytes) -> i32 { #[no_mangle] pub extern "C" fn ddog_add_span_meta(span: &mut SpanBytes, key: CharSlice, value: CharSlice) { - insert_hashmap( + insert_vec_map( &mut span.meta, key, BytesString::from_slice(value.as_bytes()).unwrap_or_default(), @@ -333,7 +344,7 @@ pub extern "C" fn ddog_add_span_meta(span: &mut SpanBytes, key: CharSlice, value #[no_mangle] pub extern "C" fn ddog_del_span_meta(span: &mut SpanBytes, key: CharSlice) { - remove_hashmap(&mut span.meta, key); + remove_vec_map_slow(&mut span.meta, key); } #[no_mangle] @@ -355,7 +366,7 @@ pub extern "C" fn ddog_get_span_meta<'a>( #[no_mangle] pub extern "C" fn ddog_has_span_meta(span: &mut SpanBytes, key: CharSlice) -> bool { - exists_hashmap(&span.meta, key) + exists_vec_map(&span.meta, key) } /// The return value is an owned array of slices (`Box<[CharSlice]>`) that must be freed explicitly @@ -365,17 +376,17 @@ pub extern "C" fn ddog_span_meta_get_keys<'a>( span: &'a mut SpanBytes, out_count: &mut usize, ) -> *mut CharSlice<'a> { - get_hashmap_keys(&span.meta, out_count) + get_vec_map_keys(&span.meta, out_count) } #[no_mangle] pub extern "C" fn ddog_add_span_metrics(span: &mut SpanBytes, key: CharSlice, val: f64) { - insert_hashmap(&mut span.metrics, key, val); + insert_vec_map(&mut span.metrics, key, val); } #[no_mangle] pub extern "C" fn ddog_del_span_metrics(span: &mut SpanBytes, key: CharSlice) { - remove_hashmap(&mut span.metrics, key); + remove_vec_map_slow(&mut span.metrics, key); } #[no_mangle] @@ -396,7 +407,7 @@ pub extern "C" fn ddog_get_span_metrics( #[no_mangle] pub extern "C" fn ddog_has_span_metrics(span: &mut SpanBytes, key: CharSlice) -> bool { - exists_hashmap(&span.metrics, key) + exists_vec_map(&span.metrics, key) } #[no_mangle] @@ -404,12 +415,12 @@ pub extern "C" fn ddog_span_metrics_get_keys<'a>( span: &'a mut SpanBytes, out_count: &mut usize, ) -> *mut CharSlice<'a> { - get_hashmap_keys(&span.metrics, out_count) + get_vec_map_keys(&span.metrics, out_count) } #[no_mangle] pub extern "C" fn ddog_add_span_meta_struct(span: &mut SpanBytes, key: CharSlice, val: CharSlice) { - insert_hashmap( + insert_vec_map( &mut span.meta_struct, key, Bytes::copy_from_slice(val.as_bytes()), @@ -418,7 +429,7 @@ pub extern "C" fn ddog_add_span_meta_struct(span: &mut SpanBytes, key: CharSlice #[no_mangle] pub extern "C" fn ddog_del_span_meta_struct(span: &mut SpanBytes, key: CharSlice) { - remove_hashmap(&mut span.meta_struct, key); + remove_vec_map_slow(&mut span.meta_struct, key); } #[no_mangle] @@ -438,7 +449,7 @@ pub extern "C" fn ddog_get_span_meta_struct<'a>( #[no_mangle] pub extern "C" fn ddog_has_span_meta_struct(span: &mut SpanBytes, key: CharSlice) -> bool { - exists_hashmap(&span.meta_struct, key) + exists_vec_map(&span.meta_struct, key) } /// The return value is an array of slices (`Box<[CharSlice]>`) that must be freed explicitly @@ -448,7 +459,7 @@ pub extern "C" fn ddog_span_meta_struct_get_keys<'a>( span: &'a mut SpanBytes, out_count: &mut usize, ) -> *mut CharSlice<'a> { - get_hashmap_keys(&span.meta_struct, out_count) + get_vec_map_keys(&span.meta_struct, out_count) } /// # Safety @@ -461,7 +472,7 @@ pub unsafe extern "C" fn ddog_span_free_keys_ptr(keys_ptr: *mut CharSlice<'_>, c return; } - // Safety: all `xxx_get_keys()` functions return from `get_hashmap_keys()`, which returns a + // Safety: all `xxx_get_keys()` functions return from `get_vec_map_keys()`, which returns a // `Box<[T]>`. It is an official guarantee of `Vec` that this can be freely converted to and // from `Box<[T]>` when `len == capacity`. unsafe { diff --git a/datadog-sidecar-ffi/tests/span.rs b/datadog-sidecar-ffi/tests/span.rs index dd35391657..72be9a9d56 100644 --- a/datadog-sidecar-ffi/tests/span.rs +++ b/datadog-sidecar-ffi/tests/span.rs @@ -186,7 +186,7 @@ fn test_span_debug_log_output() { ddog_set_span_name(span, CharSlice::from("debug-span")); let debug_output = ddog_span_debug_log(span); - let expected_output = CharSlice::from("Span { service: , name: debug-span, resource: , type: , trace_id: 0, span_id: 0, parent_id: 0, start: 0, duration: 0, error: 0, meta: {}, metrics: {}, meta_struct: {}, span_links: [], span_events: [] }"); + let expected_output = CharSlice::from("Span { service: , name: debug-span, resource: , type: , trace_id: 0, span_id: 0, parent_id: 0, start: 0, duration: 0, error: 0, meta: VecMap { data: [], deduped: false }, metrics: VecMap { data: [], deduped: false }, meta_struct: VecMap { data: [], deduped: false }, span_links: [], span_events: [] }"); assert_eq!(debug_output, expected_output); @@ -343,12 +343,13 @@ fn test_full_span() { start: 4, duration: 5, error: 6, - meta: HashMap::from([(get_bytes_str("meta_key"), get_bytes_str("meta_value"))]), - metrics: HashMap::from([(get_bytes_str("metric_key"), 1.0)]), - meta_struct: HashMap::from([( + meta: vec![(get_bytes_str("meta_key"), get_bytes_str("meta_value"))].into(), + metrics: vec![(get_bytes_str("metric_key"), 1.0)].into(), + meta_struct: vec![( get_bytes_str("meta_struct_key"), get_bytes("meta_struct_value"), - )]), + )] + .into(), span_links: vec![SpanLinkBytes { trace_id: 10, span_id: 20, diff --git a/libdd-data-pipeline-ffi/src/tracer.rs b/libdd-data-pipeline-ffi/src/tracer.rs index 0ca46cd001..9489527ad6 100644 --- a/libdd-data-pipeline-ffi/src/tracer.rs +++ b/libdd-data-pipeline-ffi/src/tracer.rs @@ -499,7 +499,8 @@ mod tests { ddog_tracer_span_set_meta(Some(&mut *span), cs("k"), cs("v1")); ddog_tracer_span_set_meta(Some(&mut *span), cs("k"), cs("v2")); - assert_eq!(span.0.meta.len(), 1); + // After the introduction of `VecMap`, the length is still 2, as the data structure + // tolerates duplicate entries. assert_eq!(span.0.meta.get("k").unwrap().as_ref(), "v2"); ddog_tracer_span_free(span); diff --git a/libdd-data-pipeline/benches/trace_buffer.rs b/libdd-data-pipeline/benches/trace_buffer.rs index e06f4b0a02..377a58e1e6 100644 --- a/libdd-data-pipeline/benches/trace_buffer.rs +++ b/libdd-data-pipeline/benches/trace_buffer.rs @@ -1,7 +1,6 @@ // Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use std::collections::HashMap; use std::pin::Pin; use std::sync::Arc; use std::time::Duration; @@ -14,6 +13,7 @@ use libdd_data_pipeline::trace_exporter::{ use libdd_shared_runtime::SharedRuntime; use libdd_tinybytes::BytesString; use libdd_trace_utils::span::v04::SpanBytes; +use libdd_trace_utils::span::vec_map::VecMap; // Number of chunks each sender thread sends per benchmark iteration. const CHUNKS_PER_SENDER: usize = 900; @@ -34,18 +34,20 @@ fn make_span() -> SpanBytes { start: 1_700_000_000_000_000_000_i64, duration: 5_000_000_i64, error: 0, - meta: HashMap::from_iter([ + meta: vec![ (bs("env"), bs("prod")), (bs("version"), bs("1.0.0")), (bs("http.method"), bs("GET")), (bs("http.url"), bs("/api/v1/users")), (bs("peer.service"), bs("users-service")), - ]), - metrics: HashMap::from_iter([ + ] + .into(), + metrics: vec![ (bs("_sampling_priority_v1"), 1.0_f64), (bs("_dd.agent_psr"), 1.0_f64), - ]), - meta_struct: HashMap::new(), + ] + .into(), + meta_struct: VecMap::new(), span_links: vec![], span_events: vec![], } diff --git a/libdd-data-pipeline/src/trace_buffer/mod.rs b/libdd-data-pipeline/src/trace_buffer/mod.rs index 09698eef18..e01769fe53 100644 --- a/libdd-data-pipeline/src/trace_buffer/mod.rs +++ b/libdd-data-pipeline/src/trace_buffer/mod.rs @@ -42,13 +42,16 @@ where size += self.resource.as_ref().len(); size += self.r#type.as_ref().len(); - for (k, v) in &self.meta { + // We expect VecMaps to be already deduped at this point, so `defensive_dedup` should be + // cheap (and alloc-free). In the future we could relax the check and accept non-deduped + // VecMap, trading over-estimating the size of a span for less work. + for (k, v) in self.meta.defensive_dedup().iter() { size += k.as_ref().len() + v.as_ref().len(); } - for k in self.metrics.keys() { + for (k, _) in self.metrics.defensive_dedup().iter() { size += k.as_ref().len() + 8; } - for (k, v) in &self.meta_struct { + for (k, v) in self.meta_struct.defensive_dedup().iter() { size += k.as_ref().len() + v.as_ref().len(); } for link in &self.span_links { diff --git a/libdd-data-pipeline/src/trace_exporter/mod.rs b/libdd-data-pipeline/src/trace_exporter/mod.rs index 3850d46215..070fc754e0 100644 --- a/libdd-data-pipeline/src/trace_exporter/mod.rs +++ b/libdd-data-pipeline/src/trace_exporter/mod.rs @@ -612,6 +612,12 @@ impl Tra self.client_computed_top_level, ); + for chunk in &mut traces { + for span in chunk.iter_mut() { + span.dedup(); + } + } + // OTLP path: send sampled traces via OTLP when an OTLP endpoint is configured. // Unlike the agent path, there is no downstream agent to drop unsampled traces, // so drop_chunks is always called here regardless of whether stats are enabled. diff --git a/libdd-trace-stats/benches/span_concentrator_bench.rs b/libdd-trace-stats/benches/span_concentrator_bench.rs index 858e2435ff..03526acb3e 100644 --- a/libdd-trace-stats/benches/span_concentrator_bench.rs +++ b/libdd-trace-stats/benches/span_concentrator_bench.rs @@ -1,13 +1,10 @@ // Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/ // SPDX-License-Identifier: Apache-2.0 -use std::{ - collections::HashMap, - time::{self, Duration, SystemTime}, -}; +use std::time::{self, Duration, SystemTime}; use criterion::{criterion_group, Criterion}; use libdd_trace_stats::span_concentrator::SpanConcentrator; -use libdd_trace_utils::span::v04::SpanBytes; +use libdd_trace_utils::span::v04::{SpanBytes, VecMap}; fn get_bucket_start(now: SystemTime, n: u64) -> i64 { let start = now.duration_since(time::UNIX_EPOCH).unwrap() + Duration::from_secs(10 * n); @@ -15,11 +12,11 @@ fn get_bucket_start(now: SystemTime, n: u64) -> i64 { } fn get_span(now: SystemTime, trace_id: u64, span_id: u64) -> SpanBytes { - let mut metrics = HashMap::from([("_dd.measured".into(), 1.0)]); + let mut metrics: VecMap<_, _> = vec![("_dd.measured".into(), 1.0)].into(); if span_id == 1 { metrics.insert("_dd.top_level".into(), 1.0); } - let mut meta = HashMap::from([("db_name".into(), "postgres".into())]); + let mut meta: VecMap<_, _> = vec![("db_name".into(), "postgres".into())].into(); if span_id.is_multiple_of(3) { meta.insert("bucket_s3".into(), "aws_bucket".into()); } diff --git a/libdd-trace-stats/src/span_concentrator/aggregation.rs b/libdd-trace-stats/src/span_concentrator/aggregation.rs index e9b24d0d2f..acc7629478 100644 --- a/libdd-trace-stats/src/span_concentrator/aggregation.rs +++ b/libdd-trace-stats/src/span_concentrator/aggregation.rs @@ -440,7 +440,7 @@ mod tests { use libdd_trace_utils::span::v04::{SpanBytes, SpanSlice}; use super::*; - use std::{collections::HashMap, hash::Hash}; + use std::hash::Hash; fn get_hash(v: &impl Hash) -> u64 { use std::hash::Hasher; @@ -494,7 +494,7 @@ mod tests { resource: "res".into(), span_id: 1, parent_id: 0, - meta: HashMap::from([("span.kind".into(), "client".into())]), + meta: vec![("span.kind".into(), "client".into())].into(), ..Default::default() }, FixedAggregationKey { @@ -515,10 +515,11 @@ mod tests { resource: "res".into(), span_id: 1, parent_id: 0, - meta: HashMap::from([ + meta: vec![ ("span.kind".into(), "client".into()), ("aws.s3.bucket".into(), "bucket-a".into()), - ]), + ] + .into(), ..Default::default() }, FixedAggregationKey { @@ -539,12 +540,13 @@ mod tests { resource: "res".into(), span_id: 1, parent_id: 0, - meta: HashMap::from([ + meta: vec![ ("span.kind".into(), "producer".into()), ("aws.s3.bucket".into(), "bucket-a".into()), ("db.instance".into(), "dynamo.test.us1".into()), ("db.system".into(), "dynamodb".into()), - ]), + ] + .into(), ..Default::default() }, FixedAggregationKey { @@ -566,12 +568,13 @@ mod tests { resource: "res".into(), span_id: 1, parent_id: 0, - meta: HashMap::from([ + meta: vec![ ("span.kind".into(), "server".into()), ("aws.s3.bucket".into(), "bucket-a".into()), ("db.instance".into(), "dynamo.test.us1".into()), ("db.system".into(), "dynamodb".into()), - ]), + ] + .into(), ..Default::default() }, FixedAggregationKey { @@ -592,7 +595,7 @@ mod tests { resource: "res".into(), span_id: 1, parent_id: 0, - meta: HashMap::from([("_dd.origin".into(), "synthetics-browser".into())]), + meta: vec![("_dd.origin".into(), "synthetics-browser".into())].into(), ..Default::default() }, FixedAggregationKey { @@ -613,7 +616,7 @@ mod tests { resource: "res".into(), span_id: 1, parent_id: 0, - meta: HashMap::from([("http.status_code".into(), "418".into())]), + meta: vec![("http.status_code".into(), "418".into())].into(), ..Default::default() }, FixedAggregationKey { @@ -635,7 +638,7 @@ mod tests { resource: "res".into(), span_id: 1, parent_id: 0, - meta: HashMap::from([("http.status_code".into(), "x".into())]), + meta: vec![("http.status_code".into(), "x".into())].into(), ..Default::default() }, FixedAggregationKey { @@ -656,7 +659,7 @@ mod tests { resource: "res".into(), span_id: 1, parent_id: 0, - metrics: HashMap::from([("http.status_code".into(), 418.0)]), + metrics: vec![("http.status_code".into(), 418.0)].into(), ..Default::default() }, FixedAggregationKey { @@ -678,10 +681,11 @@ mod tests { resource: "GET /api/v1/users".into(), span_id: 1, parent_id: 0, - meta: HashMap::from([ + meta: vec![ ("http.method".into(), "GET".into()), ("http.route".into(), "/api/v1/users".into()), - ]), + ] + .into(), ..Default::default() }, FixedAggregationKey { @@ -704,11 +708,12 @@ mod tests { resource: "POST /users/create".into(), span_id: 1, parent_id: 0, - meta: HashMap::from([ + meta: vec![ ("http.method".into(), "POST".into()), ("http.route".into(), "/users/create".into()), ("http.endpoint".into(), "/users/create2".into()), - ]), + ] + .into(), ..Default::default() }, FixedAggregationKey { @@ -726,7 +731,7 @@ mod tests { // Span with grpc status from meta as named string ( SpanBytes { - meta: HashMap::from([("rpc.grpc.status_code".into(), "OK".into())]), + meta: vec![("rpc.grpc.status_code".into(), "OK".into())].into(), ..Default::default() }, FixedAggregationKey { @@ -739,7 +744,7 @@ mod tests { // Span with grpc status from meta as numeric string ( SpanBytes { - meta: HashMap::from([("rpc.grpc.status_code".into(), "14".into())]), + meta: vec![("rpc.grpc.status_code".into(), "14".into())].into(), ..Default::default() }, FixedAggregationKey { @@ -752,7 +757,7 @@ mod tests { // Span with grpc status from meta with StatusCode. prefix ( SpanBytes { - meta: HashMap::from([("grpc.code".into(), "StatusCode.UNAVAILABLE".into())]), + meta: vec![("grpc.code".into(), "StatusCode.UNAVAILABLE".into())].into(), ..Default::default() }, FixedAggregationKey { @@ -765,11 +770,8 @@ mod tests { // Span with grpc status from metrics takes precedence over meta ( SpanBytes { - meta: HashMap::from([( - "rpc.grpc.status_code".into(), - "PERMISSION_DENIED".into(), - )]), - metrics: HashMap::from([("rpc.grpc.status_code".into(), 2.0)]), + meta: vec![("rpc.grpc.status_code".into(), "PERMISSION_DENIED".into())].into(), + metrics: vec![("rpc.grpc.status_code".into(), 2.0)].into(), ..Default::default() }, FixedAggregationKey { @@ -782,7 +784,7 @@ mod tests { // Span with grpc status from metrics via secondary key ( SpanBytes { - metrics: HashMap::from([("grpc.code".into(), 3.0)]), + metrics: vec![("grpc.code".into(), 3.0)].into(), ..Default::default() }, FixedAggregationKey { @@ -795,7 +797,7 @@ mod tests { // Span with invalid grpc status string ( SpanBytes { - meta: HashMap::from([("rpc.grpc.status_code".into(), "NOPE".into())]), + meta: vec![("rpc.grpc.status_code".into(), "NOPE".into())].into(), ..Default::default() }, FixedAggregationKey { @@ -812,7 +814,7 @@ mod tests { resource: "res".into(), span_id: 1, parent_id: 0, - meta: HashMap::from([("_dd.svc_src".into(), "redis".into())]), + meta: vec![("_dd.svc_src".into(), "redis".into())].into(), ..Default::default() }, FixedAggregationKey { @@ -833,7 +835,7 @@ mod tests { resource: "res".into(), span_id: 1, parent_id: 0, - meta: HashMap::from([("_dd.svc_src".into(), "opt.split_by_tag".into())]), + meta: vec![("_dd.svc_src".into(), "opt.split_by_tag".into())].into(), ..Default::default() }, FixedAggregationKey { @@ -883,7 +885,7 @@ mod tests { resource: "res", span_id: 1, parent_id: 0, - meta: HashMap::from([("span.kind", "client"), ("aws.s3.bucket", "bucket-a")]), + meta: vec![("span.kind", "client"), ("aws.s3.bucket", "bucket-a")].into(), ..Default::default() }, FixedAggregationKey { @@ -904,12 +906,13 @@ mod tests { resource: "res", span_id: 1, parent_id: 0, - meta: HashMap::from([ + meta: vec![ ("span.kind", "producer"), ("aws.s3.bucket", "bucket-a"), ("db.instance", "dynamo.test.us1"), ("db.system", "dynamodb"), - ]), + ] + .into(), ..Default::default() }, FixedAggregationKey { @@ -935,12 +938,13 @@ mod tests { resource: "res", span_id: 1, parent_id: 0, - meta: HashMap::from([ + meta: vec![ ("span.kind", "server"), ("aws.s3.bucket", "bucket-a"), ("db.instance", "dynamo.test.us1"), ("db.system", "dynamodb"), - ]), + ] + .into(), ..Default::default() }, FixedAggregationKey { @@ -989,11 +993,12 @@ mod tests { resource: "res", span_id: 1, parent_id: 0, - meta: HashMap::from([ + meta: vec![ ("span.kind", "client"), ("peer.hostname", "10.1.2.3"), ("db.instance", "my-db"), - ]), + ] + .into(), ..Default::default() }; let key = BorrowedAggregationKey::from_span(&span_ipv4, &peer_tag_keys); @@ -1016,10 +1021,11 @@ mod tests { resource: "res", span_id: 1, parent_id: 0, - meta: HashMap::from([ + meta: vec![ ("span.kind", "client"), ("peer.hostname", "2001:db8:3333:4444:CCCC:DDDD:EEEE:FFFF"), - ]), + ] + .into(), ..Default::default() }; let ipv6_keys = vec!["peer.hostname".to_string()]; @@ -1040,7 +1046,7 @@ mod tests { resource: "res", span_id: 1, parent_id: 0, - meta: HashMap::from([("span.kind", "client"), ("db.instance", "dynamo.test.us1")]), + meta: vec![("span.kind", "client"), ("db.instance", "dynamo.test.us1")].into(), ..Default::default() }; let non_ip_keys = vec!["db.instance".to_string()]; diff --git a/libdd-trace-stats/src/span_concentrator/tests.rs b/libdd-trace-stats/src/span_concentrator/tests.rs index c702344312..56039c4d5b 100644 --- a/libdd-trace-stats/src/span_concentrator/tests.rs +++ b/libdd-trace-stats/src/span_concentrator/tests.rs @@ -4,6 +4,7 @@ use crate::span_concentrator::aggregation::OwnedAggregationKey; use super::*; +use libdd_trace_utils::span::v04::VecMap; use libdd_trace_utils::span::{trace_utils::compute_top_level_span, v04::SpanSlice}; use rand::{thread_rng, Rng}; @@ -72,7 +73,7 @@ fn get_test_span_with_meta<'a>( for (k, v) in meta { span.meta.insert(*k, *v); } - span.metrics = HashMap::new(); + span.metrics = VecMap::new(); for (k, v) in metrics { span.metrics.insert(*k, *v); } @@ -1197,119 +1198,119 @@ fn test_compute_stats_for_span_kind() { let test_cases: Vec<(SpanSlice, bool)> = vec![ ( SpanSlice { - meta: HashMap::from([("span.kind", "server")]), + meta: vec![("span.kind", "server")].into(), ..Default::default() }, true, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "consumer")]), + meta: vec![("span.kind", "consumer")].into(), ..Default::default() }, true, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "client")]), + meta: vec![("span.kind", "client")].into(), ..Default::default() }, true, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "producer")]), + meta: vec![("span.kind", "producer")].into(), ..Default::default() }, true, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "internal")]), + meta: vec![("span.kind", "internal")].into(), ..Default::default() }, false, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "SERVER")]), + meta: vec![("span.kind", "SERVER")].into(), ..Default::default() }, true, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "CONSUMER")]), + meta: vec![("span.kind", "CONSUMER")].into(), ..Default::default() }, true, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "CLIENT")]), + meta: vec![("span.kind", "CLIENT")].into(), ..Default::default() }, true, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "PRODUCER")]), + meta: vec![("span.kind", "PRODUCER")].into(), ..Default::default() }, true, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "INTERNAL")]), + meta: vec![("span.kind", "INTERNAL")].into(), ..Default::default() }, false, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "SerVER")]), + meta: vec![("span.kind", "SerVER")].into(), ..Default::default() }, true, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "ConSUMeR")]), + meta: vec![("span.kind", "ConSUMeR")].into(), ..Default::default() }, true, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "CLiENT")]), + meta: vec![("span.kind", "CLiENT")].into(), ..Default::default() }, true, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "PROducER")]), + meta: vec![("span.kind", "PROducER")].into(), ..Default::default() }, true, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "INtERNAL")]), + meta: vec![("span.kind", "INtERNAL")].into(), ..Default::default() }, false, ), ( SpanSlice { - meta: HashMap::from([("span.kind", "")]), + meta: vec![("span.kind", "")].into(), ..Default::default() }, false, ), ( SpanSlice { - meta: HashMap::from([]), + meta: vec![].into(), ..Default::default() }, false, diff --git a/libdd-trace-utils/src/msgpack_decoder/decode/map.rs b/libdd-trace-utils/src/msgpack_decoder/decode/map.rs index 9d25ffd423..be5ae1d943 100644 --- a/libdd-trace-utils/src/msgpack_decoder/decode/map.rs +++ b/libdd-trace-utils/src/msgpack_decoder/decode/map.rs @@ -2,6 +2,7 @@ // SPDX-License-Identifier: Apache-2.0 use crate::msgpack_decoder::decode::{buffer::Buffer, error::DecodeError}; +use crate::span::vec_map::VecMap; use crate::span::DeserializableTraceData; use rmp::{decode, decode::RmpRead, Marker}; use std::collections::HashMap; @@ -51,6 +52,27 @@ where Ok(map) } +/// Reads a map from the buffer and returns it as a `VecMap`. +/// +/// Like `read_map` but returns pairs in a VecMap instead of HashMap for better +/// cache locality when iteration order doesn't matter. +#[inline] +pub fn read_map_vec( + len: usize, + buf: &mut B, + read_pair: F, +) -> Result, DecodeError> +where + F: Fn(&mut B) -> Result<(K, V), DecodeError>, +{ + let mut map = VecMap::with_capacity(len); + for _ in 0..len { + let (k, v) = read_pair(buf)?; + map.insert(k, v); + } + Ok(map) +} + /// Reads map length from the buffer /// /// # Arguments diff --git a/libdd-trace-utils/src/msgpack_decoder/decode/meta_struct.rs b/libdd-trace-utils/src/msgpack_decoder/decode/meta_struct.rs index 54fbd62bdc..31ae8eb4a7 100644 --- a/libdd-trace-utils/src/msgpack_decoder/decode/meta_struct.rs +++ b/libdd-trace-utils/src/msgpack_decoder/decode/meta_struct.rs @@ -3,11 +3,11 @@ use crate::msgpack_decoder::decode::buffer::Buffer; use crate::msgpack_decoder::decode::error::DecodeError; -use crate::msgpack_decoder::decode::map::{read_map, read_map_len}; +use crate::msgpack_decoder::decode::map::{read_map_len, read_map_vec}; use crate::msgpack_decoder::decode::string::handle_null_marker; +use crate::span::vec_map::VecMap; use crate::span::DeserializableTraceData; use rmp::decode; -use std::collections::HashMap; fn read_byte_array_len( buf: &mut Buffer, @@ -20,9 +20,9 @@ fn read_byte_array_len( #[inline] pub fn read_meta_struct( buf: &mut Buffer, -) -> Result, DecodeError> { +) -> Result, DecodeError> { if handle_null_marker(buf) { - return Ok(HashMap::default()); + return Ok(VecMap::new()); } fn read_meta_struct_pair( @@ -41,7 +41,7 @@ pub fn read_meta_struct( } let len = read_map_len(buf)?; - read_map(len, buf, read_meta_struct_pair) + read_map_vec(len, buf, read_meta_struct_pair) } #[cfg(test)] @@ -49,6 +49,7 @@ mod tests { use super::*; use crate::span::SliceData; use libdd_tinybytes::Bytes; + use std::collections::HashMap; #[test] fn read_meta_test() { @@ -58,7 +59,8 @@ mod tests { let mut slice = Buffer::::new(serialized.as_ref()); let res = read_meta_struct(&mut slice).unwrap(); - assert_eq!(res.get("key").unwrap().to_vec(), vec![1, 2, 3, 4]); + let val = res.iter().find(|(k, _)| *k == "key").map(|(_, v)| v); + assert_eq!(val.unwrap().to_vec(), vec![1, 2, 3, 4]); } #[test] diff --git a/libdd-trace-utils/src/msgpack_decoder/decode/metrics.rs b/libdd-trace-utils/src/msgpack_decoder/decode/metrics.rs index bee6d90e53..c4b2dd5e3c 100644 --- a/libdd-trace-utils/src/msgpack_decoder/decode/metrics.rs +++ b/libdd-trace-utils/src/msgpack_decoder/decode/metrics.rs @@ -3,11 +3,11 @@ use crate::msgpack_decoder::decode::buffer::Buffer; use crate::msgpack_decoder::decode::error::DecodeError; -use crate::msgpack_decoder::decode::map::{read_map, read_map_len}; +use crate::msgpack_decoder::decode::map::{read_map_len, read_map_vec}; use crate::msgpack_decoder::decode::number::read_number; use crate::msgpack_decoder::decode::string::handle_null_marker; +use crate::span::vec_map::VecMap; use crate::span::DeserializableTraceData; -use std::collections::HashMap; #[inline] pub fn read_metric_pair( @@ -21,12 +21,12 @@ pub fn read_metric_pair( #[inline] pub fn read_metrics( buf: &mut Buffer, -) -> Result, DecodeError> { +) -> Result, DecodeError> { if handle_null_marker(buf) { - return Ok(HashMap::default()); + return Ok(VecMap::new()); } let len = read_map_len(buf)?; - read_map(len, buf, read_metric_pair) + read_map_vec(len, buf, read_metric_pair) } diff --git a/libdd-trace-utils/src/msgpack_decoder/decode/span_link.rs b/libdd-trace-utils/src/msgpack_decoder/decode/span_link.rs index 5ceaa7a5b6..af32dc3ff9 100644 --- a/libdd-trace-utils/src/msgpack_decoder/decode/span_link.rs +++ b/libdd-trace-utils/src/msgpack_decoder/decode/span_link.rs @@ -4,7 +4,7 @@ use crate::msgpack_decoder::decode::buffer::Buffer; use crate::msgpack_decoder::decode::error::DecodeError; use crate::msgpack_decoder::decode::number::read_number; -use crate::msgpack_decoder::decode::string::{handle_null_marker, read_str_map_to_strings}; +use crate::msgpack_decoder::decode::string::{handle_null_marker, read_str_map_to_hashmap}; use crate::span::v04::SpanLink; use crate::span::DeserializableTraceData; use std::borrow::Borrow; @@ -84,7 +84,7 @@ fn decode_span_link( SpanLinkKey::TraceId => span.trace_id = read_number(buf)?, SpanLinkKey::TraceIdHigh => span.trace_id_high = read_number(buf)?, SpanLinkKey::SpanId => span.span_id = read_number(buf)?, - SpanLinkKey::Attributes => span.attributes = read_str_map_to_strings(buf)?, + SpanLinkKey::Attributes => span.attributes = read_str_map_to_hashmap(buf)?, SpanLinkKey::Tracestate => span.tracestate = buf.read_string()?, SpanLinkKey::Flags => span.flags = read_number(buf)?, } diff --git a/libdd-trace-utils/src/msgpack_decoder/decode/string.rs b/libdd-trace-utils/src/msgpack_decoder/decode/string.rs index 791471d571..69f1814f43 100644 --- a/libdd-trace-utils/src/msgpack_decoder/decode/string.rs +++ b/libdd-trace-utils/src/msgpack_decoder/decode/string.rs @@ -3,9 +3,9 @@ use crate::msgpack_decoder::decode::buffer::Buffer; use crate::msgpack_decoder::decode::error::DecodeError; +use crate::span::vec_map::VecMap; use crate::span::DeserializableTraceData; use rmp::decode; -use std::collections::HashMap; // https://docs.rs/rmp/latest/rmp/enum.Marker.html#variant.Null (0xc0 == 192) const NULL_MARKER: &u8 = &0xc0; @@ -25,21 +25,21 @@ pub fn read_nullable_string( } } -/// Read a hashmap of (string, string) from the slices `buf`. +/// Read a [VecMap] of `(String, String)` pairs from the slices `buf`. /// /// # Errors -/// Fails if the buffer does not contain a valid map length prefix, -/// or if any key or value is not a valid utf8 msgpack string. -/// Null values are skipped (key not inserted into map). +/// +/// Fails if the buffer does not contain a valid map length prefix, or if any key or value is not a +/// valid utf8 msgpack string. +/// Null values are skipped. #[inline] -pub fn read_str_map_to_strings( +pub fn read_str_map_to_vecmap( buf: &mut Buffer, -) -> Result, DecodeError> { +) -> Result, DecodeError> { let len = decode::read_map_len(buf.as_mut_slice()) .map_err(|_| DecodeError::InvalidFormat("Unable to get map len for str map".to_owned()))?; - #[allow(clippy::expect_used)] - let mut map = HashMap::with_capacity(len.try_into().expect("Unable to cast map len to usize")); + let mut map = VecMap::with_capacity(len.try_into().unwrap_or_default()); for _ in 0..len { let key = buf.read_string()?; // Only insert if value is not null @@ -51,21 +51,44 @@ pub fn read_str_map_to_strings( Ok(map) } -/// Read a nullable hashmap of (string, string) from the slices `buf`. +/// Read a nullable vec of (string, string) pairs from the slices `buf`. /// /// # Errors /// Fails if the buffer does not contain a valid map length prefix, /// or if any key or value is not a valid utf8 msgpack string. -/// Null values are skipped (key not inserted into map). +/// Null values are skipped (key not inserted into vec). #[inline] pub fn read_nullable_str_map_to_strings( buf: &mut Buffer, -) -> Result, DecodeError> { +) -> Result, DecodeError> { if handle_null_marker(buf) { - return Ok(HashMap::default()); + return Ok(VecMap::new()); } - read_str_map_to_strings(buf) + read_str_map_to_vecmap(buf) +} + +/// Read a hashmap of (string, string) from the slices `buf`. +/// Used for SpanLink/SpanEvent attributes which remain as HashMap. +#[inline] +pub fn read_str_map_to_hashmap( + buf: &mut Buffer, +) -> Result, DecodeError> +where + T::Text: std::hash::Hash + Eq, +{ + let len = decode::read_map_len(buf.as_mut_slice()) + .map_err(|_| DecodeError::InvalidFormat("Unable to get map len for str map".to_owned()))?; + + let mut map = std::collections::HashMap::with_capacity(len.try_into().unwrap_or_default()); + for _ in 0..len { + let key = buf.read_string()?; + if !handle_null_marker(buf) { + let value = buf.read_string()?; + map.insert(key, value); + } + } + Ok(map) } /// Handle the null value by peeking if the next value is a null marker, and will only advance the diff --git a/libdd-trace-utils/src/msgpack_decoder/v04/mod.rs b/libdd-trace-utils/src/msgpack_decoder/v04/mod.rs index f7ebfca73d..503e86cd88 100644 --- a/libdd-trace-utils/src/msgpack_decoder/v04/mod.rs +++ b/libdd-trace-utils/src/msgpack_decoder/v04/mod.rs @@ -269,8 +269,7 @@ mod tests { fn test_decoder_meta_struct_success() { let data = vec![1, 2, 3, 4]; let mut span = create_test_no_alloc_span(1, 2, 0, 0, false); - span.meta_struct = - HashMap::from([(BytesString::from("meta_key"), Bytes::from(data.clone()))]); + span.meta_struct = vec![(BytesString::from("meta_key"), Bytes::from(data.clone()))].into(); let encoded_data = rmp_serde::to_vec_named(&vec![vec![span]]).unwrap(); let (decoded_traces, _) = @@ -305,10 +304,8 @@ mod tests { let decoded_span = &decoded_traces[0][0]; for (key, value) in expected_meta.iter() { - assert_eq!( - value, - &decoded_span.meta[&BytesString::from_slice(key.as_ref()).unwrap()].as_str() - ); + let bs_key = BytesString::from_slice(key.as_ref()).unwrap(); + assert_eq!(value, &decoded_span.meta.get(&bs_key).unwrap().as_str()); } } @@ -351,10 +348,8 @@ mod tests { let decoded_span = &decoded_traces[0][0]; for (key, value) in expected_meta.iter() { - assert_eq!( - value, - &decoded_span.meta[&BytesString::from_slice(key.as_ref()).unwrap()].as_str() - ); + let bs_key = BytesString::from_slice(key.as_ref()).unwrap(); + assert_eq!(value, &decoded_span.meta.get(&bs_key).unwrap().as_str()); } } @@ -373,10 +368,8 @@ mod tests { let decoded_span = &decoded_traces[0][0]; for (key, value) in expected_metrics.iter() { - assert_eq!( - value, - &decoded_span.metrics[&BytesString::from_slice(key.as_ref()).unwrap()] - ); + let bs_key = BytesString::from_slice(key.as_ref()).unwrap(); + assert_eq!(value, decoded_span.metrics.get(&bs_key).unwrap()); } } @@ -396,10 +389,8 @@ mod tests { let decoded_span = &decoded_traces[0][0]; for (key, value) in expected_metrics.iter() { - assert_eq!( - value, - &decoded_span.metrics[&BytesString::from_slice(key.as_ref()).unwrap()] - ); + let bs_key = BytesString::from_slice(key.as_ref()).unwrap(); + assert_eq!(value, decoded_span.metrics.get(&bs_key).unwrap()); } } @@ -509,20 +500,15 @@ mod tests { assert_eq!(1, decoded_traces[0].len()); let decoded_span = &decoded_traces[0][0]; - assert_eq!( - "value1", - decoded_span.meta[&BytesString::from_slice("key1".as_ref()).unwrap()].as_str() - ); + let key1 = BytesString::from_slice("key1".as_ref()).unwrap(); + let key2 = BytesString::from_slice("key2".as_ref()).unwrap(); + let key3 = BytesString::from_slice("key3".as_ref()).unwrap(); + assert_eq!("value1", decoded_span.meta.get(&key1).unwrap().as_str()); assert!( - !decoded_span - .meta - .contains_key(&BytesString::from_slice("key2".as_ref()).unwrap()), + !decoded_span.meta.contains_key(&key2), "Null value should be skipped, but key was present" ); - assert_eq!( - "value3", - decoded_span.meta[&BytesString::from_slice("key3".as_ref()).unwrap()].as_str() - ); + assert_eq!("value3", decoded_span.meta.get(&key3).unwrap().as_str()); } #[test] @@ -659,14 +645,16 @@ mod tests { service: BytesString::from_slice(service.as_ref()).unwrap(), resource: BytesString::from_slice(resource.as_ref()).unwrap(), r#type: BytesString::from_slice(span_type.as_ref()).unwrap(), - meta: HashMap::from([( + meta: vec![( BytesString::from_slice(meta_key.as_ref()).unwrap(), BytesString::from_slice(meta_value.as_ref()).unwrap(), - )]), - metrics: HashMap::from([( + )] + .into(), + metrics: vec![( BytesString::from_slice(metric_key.as_ref()).unwrap(), metric_value.parse::().unwrap_or_default(), - )]), + )] + .into(), trace_id: trace_id as u128, span_id, parent_id, diff --git a/libdd-trace-utils/src/msgpack_decoder/v05/mod.rs b/libdd-trace-utils/src/msgpack_decoder/v05/mod.rs index 90b62261c2..a6eac76669 100644 --- a/libdd-trace-utils/src/msgpack_decoder/v05/mod.rs +++ b/libdd-trace-utils/src/msgpack_decoder/v05/mod.rs @@ -6,8 +6,8 @@ use crate::msgpack_decoder::decode::{ buffer::Buffer, map::read_map_len, number::read_number, string::handle_null_marker, }; use crate::span::v04::{Span, SpanBytes, SpanSlice}; +use crate::span::vec_map::VecMap; use crate::span::DeserializableTraceData; -use std::collections::HashMap; const PAYLOAD_LEN: u32 = 2; const SPAN_ELEM_COUNT: u32 = 12; @@ -234,15 +234,14 @@ where fn read_indexed_map_to_bytes_strings( buf: &mut Buffer, dict: &[T::Text], -) -> Result, DecodeError> +) -> Result, DecodeError> where T::Text: Clone, { let len = rmp::decode::read_map_len(buf.as_mut_slice()) .map_err(|_| DecodeError::InvalidFormat("Unable to get map len for str map".to_owned()))?; - #[allow(clippy::expect_used)] - let mut map = HashMap::with_capacity(len.try_into().expect("Unable to cast map len to usize")); + let mut map = VecMap::with_capacity(len.try_into().unwrap_or_default()); for _ in 0..len { let key = get_from_dict(buf, dict)?; let value = get_from_dict(buf, dict)?; @@ -254,17 +253,17 @@ where fn read_metrics( buf: &mut Buffer, dict: &[T::Text], -) -> Result, DecodeError> +) -> Result, DecodeError> where T::Text: Clone, { if handle_null_marker(buf) { - return Ok(HashMap::default()); + return Ok(VecMap::new()); } let len = read_map_len(buf)?; - let mut map = HashMap::with_capacity(len); + let mut map = VecMap::with_capacity(len); for _ in 0..len { let k = get_from_dict(buf, dict)?; let v = read_number(buf)?; diff --git a/libdd-trace-utils/src/msgpack_encoder/v04/span.rs b/libdd-trace-utils/src/msgpack_encoder/v04/span.rs index c86325bf25..b4e8f7301c 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v04/span.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v04/span.rs @@ -288,8 +288,9 @@ pub fn encode_span( if !span.meta.is_empty() { write_const_msg_pack_str!(writer, "meta")?; - rmp::encode::write_map_len(writer, span.meta.len() as u32)?; - for (k, v) in span.meta.iter() { + let meta_dd = span.meta.defensive_dedup(); + rmp::encode::write_map_len(writer, meta_dd.len() as u32)?; + for (k, v) in meta_dd.iter() { write_str(writer, k.borrow())?; write_str(writer, v.borrow())?; } @@ -297,8 +298,9 @@ pub fn encode_span( if !span.metrics.is_empty() { write_const_msg_pack_str!(writer, "metrics")?; - rmp::encode::write_map_len(writer, span.metrics.len() as u32)?; - for (k, v) in span.metrics.iter() { + let metrics_dd = span.metrics.defensive_dedup(); + rmp::encode::write_map_len(writer, metrics_dd.len() as u32)?; + for (k, v) in metrics_dd.iter() { write_str(writer, k.borrow())?; write_f64(writer, *v)?; } @@ -311,8 +313,9 @@ pub fn encode_span( if !span.meta_struct.is_empty() { write_const_msg_pack_str!(writer, "meta_struct")?; - rmp::encode::write_map_len(writer, span.meta_struct.len() as u32)?; - for (k, v) in span.meta_struct.iter() { + let meta_struct_dd = span.meta_struct.defensive_dedup(); + rmp::encode::write_map_len(writer, meta_struct_dd.len() as u32)?; + for (k, v) in meta_struct_dd.iter() { write_str(writer, k.borrow())?; write_bin(writer, v.borrow())?; } diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs index f72567f917..9bb50e393d 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/mod.rs @@ -426,7 +426,6 @@ mod tests { use super::*; use crate::span::v04::SpanBytes; use libdd_tinybytes::BytesString; - use std::collections::HashMap; fn make_span( service: &str, @@ -490,13 +489,12 @@ mod tests { #[test] fn test_chunk_level_attrs_origin_and_priority() { - let mut meta = HashMap::new(); - meta.insert( + let meta = vec![( BytesString::from_static("_dd.origin"), BytesString::from_static("lambda"), - ); - let mut metrics = HashMap::new(); - metrics.insert(BytesString::from_static("_sampling_priority_v1"), 1.0f64); + )] + .into(); + let metrics = vec![(BytesString::from_static("_sampling_priority_v1"), 1.0f64)].into(); let root = SpanBytes { service: BytesString::from_slice(b"svc").unwrap(), @@ -540,9 +538,11 @@ mod tests { #[test] fn test_remote_parent_root_span_top_level() { // A span with a non-zero parent_id but _dd.top_level=1.0 is a root in its chunk. - let mut metrics = HashMap::new(); - metrics.insert(BytesString::from_static("_dd.top_level"), 1.0f64); - metrics.insert(BytesString::from_static("_sampling_priority_v1"), 2.0f64); + let metrics = vec![ + (BytesString::from_static("_dd.top_level"), 1.0f64), + (BytesString::from_static("_sampling_priority_v1"), 2.0f64), + ] + .into(); let root = SpanBytes { service: BytesString::from_slice(b"svc").unwrap(), @@ -563,19 +563,21 @@ mod tests { #[test] fn test_payload_promoted_fields() { - let mut meta = HashMap::new(); - meta.insert( - BytesString::from_static("env"), - BytesString::from_static("prod"), - ); - meta.insert( - BytesString::from_static("version"), - BytesString::from_static("1.2.3"), - ); - meta.insert( - BytesString::from_static("_dd.hostname"), - BytesString::from_static("my-host"), - ); + let meta = vec![ + ( + BytesString::from_static("env"), + BytesString::from_static("prod"), + ), + ( + BytesString::from_static("version"), + BytesString::from_static("1.2.3"), + ), + ( + BytesString::from_static("_dd.hostname"), + BytesString::from_static("my-host"), + ), + ] + .into(); let span = SpanBytes { service: BytesString::from_slice(b"svc").unwrap(), @@ -605,15 +607,17 @@ mod tests { #[test] fn test_payload_attributes_apm_mode_and_git_commit_sha() { - let mut meta = HashMap::new(); - meta.insert( - BytesString::from_static("_dd.apm_mode"), - BytesString::from_static("ssi"), - ); - meta.insert( - BytesString::from_static("_dd.git.commit.sha"), - BytesString::from_static("abc123"), - ); + let meta = vec![ + ( + BytesString::from_static("_dd.apm_mode"), + BytesString::from_static("ssi"), + ), + ( + BytesString::from_static("_dd.git.commit.sha"), + BytesString::from_static("abc123"), + ), + ] + .into(); let span = SpanBytes { service: BytesString::from_slice(b"svc").unwrap(), @@ -704,11 +708,11 @@ mod tests { #[test] fn test_128bit_trace_id_from_dd_p_tid() { - let mut meta = HashMap::new(); - meta.insert( + let meta = vec![( BytesString::from_static("_dd.p.tid"), BytesString::from_static("640cfd5400000000"), - ); + )] + .into(); let span = SpanBytes { service: BytesString::from_slice(b"svc").unwrap(), name: BytesString::from_slice(b"op").unwrap(), @@ -759,11 +763,11 @@ mod tests { fn test_sampling_mechanism_negative_value() { // `_dd.p.dm` is a signed integer stored as a string (e.g. "-4" → manual rule). // The encoder must parse it, take unsigned_abs, and emit it at chunk level. - let mut meta = HashMap::new(); - meta.insert( + let meta = vec![( BytesString::from_static("_dd.p.dm"), BytesString::from_static("-4"), - ); + )] + .into(); let root = SpanBytes { service: BytesString::from_slice(b"svc").unwrap(), name: BytesString::from_slice(b"op").unwrap(), @@ -792,18 +796,17 @@ mod tests { fn test_chunk_attrs_fallback_no_root_span() { // Partial flush: no root span (every span has a non-zero parent and no // `_dd.top_level`). Values must be accumulated from non-root spans. - let mut meta1 = HashMap::new(); - meta1.insert( + let meta1 = vec![( BytesString::from_static("_dd.origin"), BytesString::from_static("lambda"), - ); - let mut metrics2 = HashMap::new(); - metrics2.insert(BytesString::from_static("_sampling_priority_v1"), 2.0f64); - let mut meta3 = HashMap::new(); - meta3.insert( + )] + .into(); + let metrics2 = vec![(BytesString::from_static("_sampling_priority_v1"), 2.0f64)].into(); + let meta3 = vec![( BytesString::from_static("_dd.p.dm"), BytesString::from_static("-3"), - ); + )] + .into(); let s1 = SpanBytes { service: BytesString::from_slice(b"svc").unwrap(), diff --git a/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs b/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs index 2c5962f8f1..ac7472de00 100644 --- a/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs +++ b/libdd-trace-utils/src/msgpack_encoder/v1/span_v04.rs @@ -255,8 +255,14 @@ pub fn encode_span( "env" | "version" | "component" | "span.kind" | "_dd.p.tid" ) }; - let non_promoted_meta = span.meta.iter().filter(|(k, _)| !is_promoted(k)).count() as u32; - let attr_count = non_promoted_meta + span.metrics.len() as u32 + span.meta_struct.len() as u32; + let meta_dd = span.meta.defensive_dedup(); + let metrics_dd = span.metrics.defensive_dedup(); + let meta_struct_dd = span.meta_struct.defensive_dedup(); + + let non_promoted_meta = meta_dd.iter().filter(|(k, _)| !is_promoted(k)).count() as u32; + let metrics_len = metrics_dd.len() as u32; + let meta_struct_len = meta_struct_dd.len() as u32; + let attr_count = non_promoted_meta + metrics_len + meta_struct_len; let has_attributes = attr_count > 0; let env = span.meta.get("env").map(|v| v.borrow()); @@ -344,25 +350,25 @@ pub fn encode_span( write_uint8(writer, SpanKey::Attributes as u8)?; rmp::encode::write_array_len(writer, attr_count * 3)?; - for (k, v) in span.meta.iter() { + for (k, v) in meta_dd.iter() { if is_promoted(k) { continue; } - table.write_interned(writer, k.borrow())?; + table.write_interned(writer, (*k).borrow())?; write_uint8(writer, AnyValueKey::String as u8)?; - table.write_interned(writer, v.borrow())?; + table.write_interned(writer, (*v).borrow())?; } - for (k, v) in span.metrics.iter() { - table.write_interned(writer, k.borrow())?; + for (k, v) in metrics_dd.iter() { + table.write_interned(writer, (*k).borrow())?; write_uint8(writer, AnyValueKey::Double as u8)?; write_f64(writer, *v)?; } - for (k, v) in span.meta_struct.iter() { - table.write_interned(writer, k.borrow())?; + for (k, v) in meta_struct_dd.iter() { + table.write_interned(writer, (*k).borrow())?; write_uint8(writer, AnyValueKey::Bytes as u8)?; - write_bin(writer, v.borrow())?; + write_bin(writer, (*v).borrow())?; } } diff --git a/libdd-trace-utils/src/span/trace_utils.rs b/libdd-trace-utils/src/span/trace_utils.rs index 8dd8a03b05..00faad9516 100644 --- a/libdd-trace-utils/src/span/trace_utils.rs +++ b/libdd-trace-utils/src/span/trace_utils.rs @@ -13,16 +13,12 @@ const TRACER_TOP_LEVEL_KEY: &str = "_dd.top_level"; const MEASURED_KEY: &str = "_dd.measured"; const PARTIAL_VERSION_KEY: &str = "_dd.partial_version"; -fn set_top_level_span(span: &mut Span, is_top_level: bool) +fn set_top_level_span(span: &mut Span) where T: TraceData, { - if is_top_level { - span.metrics - .insert(T::Text::from_static_str(TOP_LEVEL_KEY), 1.0); - } else { - span.metrics.remove(TOP_LEVEL_KEY); - } + span.metrics + .insert(T::Text::from_static_str(TOP_LEVEL_KEY), 1.0); } /// Updates all the spans top-level attribute. @@ -42,19 +38,19 @@ where for span_idx in 0..trace.len() { let parent_id = trace[span_idx].parent_id; if parent_id == 0 { - set_top_level_span(&mut trace[span_idx], true); + set_top_level_span(&mut trace[span_idx]); continue; } match span_id_idx.get(&parent_id).map(|i| &trace[*i].service) { Some(parent_span_service) => { if !(parent_span_service == &trace[span_idx].service) { // parent is not in the same service - set_top_level_span(&mut trace[span_idx], true) + set_top_level_span(&mut trace[span_idx]) } } None => { // span has no parent in chunk - set_top_level_span(&mut trace[span_idx], true) + set_top_level_span(&mut trace[span_idx]) } } } @@ -164,7 +160,7 @@ where #[cfg(test)] mod tests { use super::*; - use crate::span::v04::SpanBytes; + use crate::span::v04::{SpanBytes, VecMap}; fn create_test_span( trace_id: u64, @@ -183,14 +179,15 @@ mod tests { start, duration: 5, error: 0, - meta: HashMap::from([ + meta: vec![ ("service".into(), "test-service".into()), ("env".into(), "test-env".into()), ("runtime-id".into(), "test-runtime-id-value".into()), - ]), - metrics: HashMap::new(), + ] + .into(), + metrics: VecMap::new(), r#type: "".into(), - meta_struct: HashMap::new(), + meta_struct: VecMap::new(), span_links: vec![], span_events: vec![], }; @@ -259,10 +256,11 @@ mod tests { let chunk_with_priority = vec![ SpanBytes { span_id: 1, - metrics: HashMap::from([ + metrics: vec![ (SAMPLING_PRIORITY_KEY.into(), 1.0), (TRACER_TOP_LEVEL_KEY.into(), 1.0), - ]), + ] + .into(), ..Default::default() }, SpanBytes { @@ -274,10 +272,11 @@ mod tests { let chunk_with_null_priority = vec![ SpanBytes { span_id: 1, - metrics: HashMap::from([ + metrics: vec![ (SAMPLING_PRIORITY_KEY.into(), 0.0), (TRACER_TOP_LEVEL_KEY.into(), 1.0), - ]), + ] + .into(), ..Default::default() }, SpanBytes { @@ -289,7 +288,7 @@ mod tests { let chunk_without_priority = vec![ SpanBytes { span_id: 1, - metrics: HashMap::from([(TRACER_TOP_LEVEL_KEY.into(), 1.0)]), + metrics: vec![(TRACER_TOP_LEVEL_KEY.into(), 1.0)].into(), ..Default::default() }, SpanBytes { @@ -301,10 +300,11 @@ mod tests { let chunk_with_multiple_top_level = vec![ SpanBytes { span_id: 1, - metrics: HashMap::from([ + metrics: vec![ (SAMPLING_PRIORITY_KEY.into(), -1.0), (TRACER_TOP_LEVEL_KEY.into(), 1.0), - ]), + ] + .into(), ..Default::default() }, SpanBytes { @@ -315,7 +315,7 @@ mod tests { SpanBytes { span_id: 4, parent_id: 3, - metrics: HashMap::from([(TRACER_TOP_LEVEL_KEY.into(), 1.0)]), + metrics: vec![(TRACER_TOP_LEVEL_KEY.into(), 1.0)].into(), ..Default::default() }, ]; @@ -323,10 +323,11 @@ mod tests { SpanBytes { span_id: 1, error: 1, - metrics: HashMap::from([ + metrics: vec![ (SAMPLING_PRIORITY_KEY.into(), 0.0), (TRACER_TOP_LEVEL_KEY.into(), 1.0), - ]), + ] + .into(), ..Default::default() }, SpanBytes { @@ -338,32 +339,34 @@ mod tests { let chunk_with_a_single_span = vec![ SpanBytes { span_id: 1, - metrics: HashMap::from([ + metrics: vec![ (SAMPLING_PRIORITY_KEY.into(), 0.0), (TRACER_TOP_LEVEL_KEY.into(), 1.0), - ]), + ] + .into(), ..Default::default() }, SpanBytes { span_id: 2, parent_id: 1, - metrics: HashMap::from([(SAMPLING_SINGLE_SPAN_MECHANISM.into(), 8.0)]), + metrics: vec![(SAMPLING_SINGLE_SPAN_MECHANISM.into(), 8.0)].into(), ..Default::default() }, ]; let chunk_with_analyzed_span = vec![ SpanBytes { span_id: 1, - metrics: HashMap::from([ + metrics: vec![ (SAMPLING_PRIORITY_KEY.into(), 0.0), (TRACER_TOP_LEVEL_KEY.into(), 1.0), - ]), + ] + .into(), ..Default::default() }, SpanBytes { span_id: 2, parent_id: 1, - metrics: HashMap::from([(SAMPLING_ANALYTICS_RATE_KEY.into(), 1.0)]), + metrics: vec![(SAMPLING_ANALYTICS_RATE_KEY.into(), 1.0)].into(), ..Default::default() }, ]; diff --git a/libdd-trace-utils/src/span/v04/mod.rs b/libdd-trace-utils/src/span/v04/mod.rs index 54edc57ee7..ba6079dca5 100644 --- a/libdd-trace-utils/src/span/v04/mod.rs +++ b/libdd-trace-utils/src/span/v04/mod.rs @@ -9,6 +9,8 @@ use std::borrow::Borrow; use std::collections::HashMap; use std::str::FromStr; +pub use super::vec_map::VecMap; + #[derive(Debug, PartialEq)] pub enum SpanKey { Service, @@ -70,7 +72,8 @@ fn is_empty_str>(value: &T) -> bool { /// let _ = span.meta.get("foo"); /// } /// ``` -#[derive(Debug, Default, PartialEq, Serialize)] +#[derive(Debug, Default, Serialize)] +#[cfg_attr(any(test, feature = "test-utils"), derive(PartialEq))] pub struct Span { pub service: T::Text, pub name: T::Text, @@ -86,12 +89,12 @@ pub struct Span { pub duration: i64, #[serde(skip_serializing_if = "is_default")] pub error: i32, - #[serde(skip_serializing_if = "HashMap::is_empty")] - pub meta: HashMap, - #[serde(skip_serializing_if = "HashMap::is_empty")] - pub metrics: HashMap, - #[serde(skip_serializing_if = "HashMap::is_empty")] - pub meta_struct: HashMap, + #[serde(skip_serializing_if = "VecMap::is_empty")] + pub meta: VecMap, + #[serde(skip_serializing_if = "VecMap::is_empty")] + pub metrics: VecMap, + #[serde(skip_serializing_if = "VecMap::is_empty")] + pub meta_struct: VecMap, #[serde(skip_serializing_if = "Vec::is_empty")] pub span_links: Vec>, #[serde(skip_serializing_if = "Vec::is_empty")] @@ -303,6 +306,15 @@ impl From<&AttributeArrayValue> for u8 { } } +impl Span { + /// Deduplicate the [VecMap] parts of this span. See [VecMap::dedup]. + pub fn dedup(&mut self) { + self.meta.dedup(); + self.metrics.dedup(); + self.meta_struct.dedup(); + } +} + fn is_default(t: &T) -> bool { t == &T::default() } diff --git a/libdd-trace-utils/src/span/v05/mod.rs b/libdd-trace-utils/src/span/v05/mod.rs index a4f4e8ad9d..905dc1815a 100644 --- a/libdd-trace-utils/src/span/v05/mod.rs +++ b/libdd-trace-utils/src/span/v05/mod.rs @@ -65,7 +65,7 @@ pub fn from_v04_span( #[cfg(test)] mod tests { use super::*; - use crate::span::v04::SpanBytes; + use crate::span::v04::{SpanBytes, VecMap}; use libdd_tinybytes::BytesString; #[test] @@ -81,12 +81,13 @@ mod tests { start: 1, duration: 111, error: 0, - meta: HashMap::from([( + meta: vec![( BytesString::from("meta_field"), BytesString::from("meta_value"), - )]), - metrics: HashMap::from([(BytesString::from("metrics_field"), 1.1)]), - meta_struct: HashMap::new(), + )] + .into(), + metrics: vec![(BytesString::from("metrics_field"), 1.1)].into(), + meta_struct: VecMap::new(), span_links: vec![], span_events: vec![], }; diff --git a/libdd-trace-utils/src/span/vec_map.rs b/libdd-trace-utils/src/span/vec_map.rs index 92cefd44bc..5e73942d14 100644 --- a/libdd-trace-utils/src/span/vec_map.rs +++ b/libdd-trace-utils/src/span/vec_map.rs @@ -201,8 +201,7 @@ impl VecMap { /// This is a convenience wrapper around [Self::as_deduped_map] used in the msgpack encoder, /// where we expect the map to be deduped, but call `as_deduped_map` as a defensive measure. If /// the latter had to deduplicate and allocate a new vec, we log a warning (at most once). - #[allow(unused)] - pub(crate) fn defensive_dedup(&self) -> DedupedVecMap<'_, K, V> { + pub fn defensive_dedup(&self) -> DedupedVecMap<'_, K, V> { if !self.is_deduped() { static WARNED: AtomicBool = AtomicBool::new(false); if !WARNED.swap(true, Ordering::Relaxed) { diff --git a/libdd-trace-utils/src/test_utils/mod.rs b/libdd-trace-utils/src/test_utils/mod.rs index e71f65fdd1..819999feed 100644 --- a/libdd-trace-utils/src/test_utils/mod.rs +++ b/libdd-trace-utils/src/test_utils/mod.rs @@ -15,7 +15,7 @@ use std::collections::HashMap; use std::time::Duration; use crate::send_data::SendData; -use crate::span::v04::SpanBytes; +use crate::span::v04::{SpanBytes, VecMap}; use crate::span::{v05, SharedDictBytes}; use crate::trace_utils::TracerHeaderTags; use crate::tracer_payload::TracerPayloadCollection; @@ -43,7 +43,7 @@ pub fn create_test_no_alloc_span( start, duration: 5, error: 0, - meta: HashMap::from([ + meta: vec![ ( BytesString::from_slice("service".as_ref()).unwrap(), BytesString::from_slice("test-service".as_ref()).unwrap(), @@ -56,10 +56,11 @@ pub fn create_test_no_alloc_span( BytesString::from_slice("runtime-id".as_ref()).unwrap(), BytesString::from_slice("test-runtime-id-value".as_ref()).unwrap(), ), - ]), - metrics: HashMap::new(), + ] + .into(), + metrics: VecMap::new(), r#type: BytesString::default(), - meta_struct: HashMap::new(), + meta_struct: VecMap::new(), span_links: vec![], span_events: vec![], }; diff --git a/libdd-trace-utils/src/trace_utils.rs b/libdd-trace-utils/src/trace_utils.rs index 3cd53b04d7..dd0099b365 100644 --- a/libdd-trace-utils/src/trace_utils.rs +++ b/libdd-trace-utils/src/trace_utils.rs @@ -424,19 +424,19 @@ pub fn compute_top_level_span(trace: &mut [pb::Span]) { } for span in trace.iter_mut() { if span.parent_id == 0 { - set_top_level_span(span, true); + set_top_level_span(span); continue; } match span_id_to_service.get(&span.parent_id) { Some(parent_span_service) => { if !parent_span_service.eq(&span.service) { // parent is not in the same service - set_top_level_span(span, true) + set_top_level_span(span) } } None => { // span has no parent in chunk - set_top_level_span(span, true) + set_top_level_span(span) } } } @@ -450,12 +450,8 @@ pub fn has_top_level(span: &pb::Span) -> bool { || span.metrics.get(TOP_LEVEL_KEY).is_some_and(|v| *v == 1.0) } -fn set_top_level_span(span: &mut pb::Span, is_top_level: bool) { - if is_top_level { - span.metrics.insert(TOP_LEVEL_KEY.to_string(), 1.0); - } else { - span.metrics.remove(TOP_LEVEL_KEY); - } +fn set_top_level_span(span: &mut pb::Span) { + span.metrics.insert(TOP_LEVEL_KEY.to_string(), 1.0); } pub fn set_serverless_root_span_tags( diff --git a/libdd-trace-utils/src/tracer_payload.rs b/libdd-trace-utils/src/tracer_payload.rs index e30f04df91..30752cabdf 100644 --- a/libdd-trace-utils/src/tracer_payload.rs +++ b/libdd-trace-utils/src/tracer_payload.rs @@ -240,12 +240,11 @@ pub fn decode_to_trace_chunks( #[cfg(test)] mod tests { use super::*; - use crate::span::v04::SpanBytes; + use crate::span::v04::{SpanBytes, VecMap}; use crate::test_utils::create_test_no_alloc_span; use libdd_tinybytes::BytesString; use libdd_trace_protobuf::pb; use serde_json::json; - use std::collections::HashMap; fn create_dummy_collection_v07() -> TracerPayloadCollection { TracerPayloadCollection::V07(vec![pb::TracerPayload { @@ -362,9 +361,9 @@ mod tests { start: 1, duration: 5, error: 0, - meta: HashMap::new(), - metrics: HashMap::new(), - meta_struct: HashMap::new(), + meta: VecMap::new(), + metrics: VecMap::new(), + meta_struct: VecMap::new(), r#type: BytesString::from_slice("serverless".as_ref()).unwrap(), span_links: vec![], span_events: vec![], @@ -395,9 +394,9 @@ mod tests { start: 1, duration: 5, error: 1, - meta: HashMap::new(), - metrics: HashMap::new(), - meta_struct: HashMap::new(), + meta: VecMap::new(), + metrics: VecMap::new(), + meta_struct: VecMap::new(), r#type: BytesString::default(), span_links: vec![], span_events: vec![], diff --git a/libdd-trace-utils/tests/test_send_data.rs b/libdd-trace-utils/tests/test_send_data.rs index 37ae09d691..e4307ebf33 100644 --- a/libdd-trace-utils/tests/test_send_data.rs +++ b/libdd-trace-utils/tests/test_send_data.rs @@ -17,7 +17,6 @@ mod tracing_integration_tests { use libdd_trace_utils::trace_utils::TracerHeaderTags; use libdd_trace_utils::tracer_payload::{decode_to_trace_chunks, TraceEncoding}; use serde_json::json; - use std::collections::HashMap; #[cfg(target_os = "linux")] use std::fs::Permissions; #[cfg(target_os = "linux")] @@ -191,7 +190,7 @@ mod tracing_integration_tests { root_span.name = BytesString::from("test_send_data_v04_trace_meta_struct_snapshot_01"); root_span.r#type = BytesString::from("web"); root_span.meta_struct = - HashMap::from([(BytesString::from("appsec"), Bytes::from(meta_struct_data))]); + vec![(BytesString::from("appsec"), Bytes::from(meta_struct_data))].into(); let encoded_data = rmp_serde::to_vec_named(&vec![vec![root_span]]).unwrap();