Skip to content

Commit 74284ca

Browse files
authored
refactor(span)!: use VecMap for meta, metrics and meta_struct for v04 spans (#2043)
Depends on #2022 and #2049. # What does this PR do? This PR continues the native span performance work required to land native spans in dd-trace-js. Follow-up of #2022. Actually use `VecMap` in the `Span` data structure. # Motivation Performance improvement following dd-trace-js native span experiments. See #2022. # Additional considerations There are some deduplication design choices. We deduplicate before serializing to the agent, because while the current agent implementation would support duplicate keys in a map (with the same semantics of "last one wins"), this is not part of the spec/API, so we can't rely on it. We also deduplicate defensively in the msgpack encoder, but it should already be deduped at this stage. For byte estimate (`byte_size()`), we make the choice of not deduplicating. This means potentially overestimating the size of a chunk (and reaching the limit sooner), in exchange of delaying the deduplication to serialization time. # How to test the change? Run tests. Co-authored-by: yann.hamdaoui <yann.hamdaoui@datadoghq.com>
1 parent 53dd5eb commit 74284ca

28 files changed

Lines changed: 381 additions & 296 deletions

File tree

datadog-sidecar-ffi/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ libdd-crashtracker-ffi = { path = "../libdd-crashtracker-ffi", features = ["coll
3535

3636
[dev-dependencies]
3737
http = "1.1"
38+
libdd-trace-utils = { path = "../libdd-trace-utils", features = ["test-utils"] }
3839
tempfile = { version = "3.3" }
3940

4041
[lints.rust]

datadog-sidecar-ffi/src/span.rs

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ use libdd_common_ffi::slice::{AsBytes, CharSlice};
55
use libdd_tinybytes::{Bytes, BytesString};
66
use libdd_trace_utils::span::v04::{
77
AttributeAnyValueBytes, AttributeArrayValueBytes, SpanBytes, SpanEventBytes, SpanLinkBytes,
8+
VecMap,
89
};
910
use std::borrow::Cow;
1011
use std::collections::HashMap;
@@ -51,25 +52,35 @@ fn insert_hashmap<V>(map: &mut HashMap<BytesString, V>, key: CharSlice, value: V
5152
}
5253

5354
#[inline]
54-
fn remove_hashmap<V>(map: &mut HashMap<BytesString, V>, key: CharSlice) {
55+
fn insert_vec_map<V>(map: &mut VecMap<BytesString, V>, key: CharSlice, value: V) {
56+
if key.is_empty() {
57+
return;
58+
}
59+
let bytes_str_key = convert_char_slice_to_bytes_string(key);
60+
map.insert(bytes_str_key, value);
61+
}
62+
63+
#[inline]
64+
fn remove_vec_map_slow<V>(map: &mut VecMap<BytesString, V>, key: CharSlice) {
5565
let bytes_str_key = convert_char_slice_to_bytes_string(key);
56-
map.remove(&bytes_str_key);
66+
map.remove_slow(&bytes_str_key);
5767
}
5868

5969
#[inline]
60-
fn exists_hashmap<V>(map: &HashMap<BytesString, V>, key: CharSlice) -> bool {
70+
fn exists_vec_map<V>(map: &VecMap<BytesString, V>, key: CharSlice) -> bool {
6171
let bytes_str_key = convert_char_slice_to_bytes_string(key);
6272
map.contains_key(&bytes_str_key)
6373
}
6474

6575
/// The return value is an owned array of slices (`Box<[CharSlice<'a>]>`) that must be dropped
6676
/// explicitly.
67-
fn get_hashmap_keys<'a, V>(
68-
map: &'a HashMap<BytesString, V>,
77+
fn get_vec_map_keys<'a, V>(
78+
map: &'a VecMap<BytesString, V>,
6979
out_count: &mut usize,
7080
) -> *mut CharSlice<'a> {
71-
let mut keys: Vec<&str> = map.keys().map(|b| b.as_str()).collect();
81+
let mut keys: Vec<&str> = map.iter().map(|(k, _)| k.as_str()).collect();
7282
keys.sort_unstable();
83+
keys.dedup();
7384

7485
let slices: Box<[CharSlice]> = keys
7586
.iter()
@@ -182,8 +193,8 @@ pub extern "C" fn ddog_trace_new_span_with_capacities(
182193
new_vector_push(
183194
trace,
184195
SpanBytes {
185-
meta: HashMap::with_capacity(meta_size),
186-
metrics: HashMap::with_capacity(metrics_size),
196+
meta: VecMap::with_capacity(meta_size),
197+
metrics: VecMap::with_capacity(metrics_size),
187198
..SpanBytes::default()
188199
},
189200
)
@@ -324,7 +335,7 @@ pub extern "C" fn ddog_get_span_error(span: &mut SpanBytes) -> i32 {
324335

325336
#[no_mangle]
326337
pub extern "C" fn ddog_add_span_meta(span: &mut SpanBytes, key: CharSlice, value: CharSlice) {
327-
insert_hashmap(
338+
insert_vec_map(
328339
&mut span.meta,
329340
key,
330341
BytesString::from_slice(value.as_bytes()).unwrap_or_default(),
@@ -333,7 +344,7 @@ pub extern "C" fn ddog_add_span_meta(span: &mut SpanBytes, key: CharSlice, value
333344

334345
#[no_mangle]
335346
pub extern "C" fn ddog_del_span_meta(span: &mut SpanBytes, key: CharSlice) {
336-
remove_hashmap(&mut span.meta, key);
347+
remove_vec_map_slow(&mut span.meta, key);
337348
}
338349

339350
#[no_mangle]
@@ -355,7 +366,7 @@ pub extern "C" fn ddog_get_span_meta<'a>(
355366

356367
#[no_mangle]
357368
pub extern "C" fn ddog_has_span_meta(span: &mut SpanBytes, key: CharSlice) -> bool {
358-
exists_hashmap(&span.meta, key)
369+
exists_vec_map(&span.meta, key)
359370
}
360371

361372
/// The return value is an owned array of slices (`Box<[CharSlice]>`) that must be freed explicitly
@@ -365,17 +376,17 @@ pub extern "C" fn ddog_span_meta_get_keys<'a>(
365376
span: &'a mut SpanBytes,
366377
out_count: &mut usize,
367378
) -> *mut CharSlice<'a> {
368-
get_hashmap_keys(&span.meta, out_count)
379+
get_vec_map_keys(&span.meta, out_count)
369380
}
370381

371382
#[no_mangle]
372383
pub extern "C" fn ddog_add_span_metrics(span: &mut SpanBytes, key: CharSlice, val: f64) {
373-
insert_hashmap(&mut span.metrics, key, val);
384+
insert_vec_map(&mut span.metrics, key, val);
374385
}
375386

376387
#[no_mangle]
377388
pub extern "C" fn ddog_del_span_metrics(span: &mut SpanBytes, key: CharSlice) {
378-
remove_hashmap(&mut span.metrics, key);
389+
remove_vec_map_slow(&mut span.metrics, key);
379390
}
380391

381392
#[no_mangle]
@@ -396,20 +407,20 @@ pub extern "C" fn ddog_get_span_metrics(
396407

397408
#[no_mangle]
398409
pub extern "C" fn ddog_has_span_metrics(span: &mut SpanBytes, key: CharSlice) -> bool {
399-
exists_hashmap(&span.metrics, key)
410+
exists_vec_map(&span.metrics, key)
400411
}
401412

402413
#[no_mangle]
403414
pub extern "C" fn ddog_span_metrics_get_keys<'a>(
404415
span: &'a mut SpanBytes,
405416
out_count: &mut usize,
406417
) -> *mut CharSlice<'a> {
407-
get_hashmap_keys(&span.metrics, out_count)
418+
get_vec_map_keys(&span.metrics, out_count)
408419
}
409420

410421
#[no_mangle]
411422
pub extern "C" fn ddog_add_span_meta_struct(span: &mut SpanBytes, key: CharSlice, val: CharSlice) {
412-
insert_hashmap(
423+
insert_vec_map(
413424
&mut span.meta_struct,
414425
key,
415426
Bytes::copy_from_slice(val.as_bytes()),
@@ -418,7 +429,7 @@ pub extern "C" fn ddog_add_span_meta_struct(span: &mut SpanBytes, key: CharSlice
418429

419430
#[no_mangle]
420431
pub extern "C" fn ddog_del_span_meta_struct(span: &mut SpanBytes, key: CharSlice) {
421-
remove_hashmap(&mut span.meta_struct, key);
432+
remove_vec_map_slow(&mut span.meta_struct, key);
422433
}
423434

424435
#[no_mangle]
@@ -438,7 +449,7 @@ pub extern "C" fn ddog_get_span_meta_struct<'a>(
438449

439450
#[no_mangle]
440451
pub extern "C" fn ddog_has_span_meta_struct(span: &mut SpanBytes, key: CharSlice) -> bool {
441-
exists_hashmap(&span.meta_struct, key)
452+
exists_vec_map(&span.meta_struct, key)
442453
}
443454

444455
/// The return value is an array of slices (`Box<[CharSlice]>`) that must be freed explicitly
@@ -448,7 +459,7 @@ pub extern "C" fn ddog_span_meta_struct_get_keys<'a>(
448459
span: &'a mut SpanBytes,
449460
out_count: &mut usize,
450461
) -> *mut CharSlice<'a> {
451-
get_hashmap_keys(&span.meta_struct, out_count)
462+
get_vec_map_keys(&span.meta_struct, out_count)
452463
}
453464

454465
/// # Safety
@@ -461,7 +472,7 @@ pub unsafe extern "C" fn ddog_span_free_keys_ptr(keys_ptr: *mut CharSlice<'_>, c
461472
return;
462473
}
463474

464-
// Safety: all `xxx_get_keys()` functions return from `get_hashmap_keys()`, which returns a
475+
// Safety: all `xxx_get_keys()` functions return from `get_vec_map_keys()`, which returns a
465476
// `Box<[T]>`. It is an official guarantee of `Vec` that this can be freely converted to and
466477
// from `Box<[T]>` when `len == capacity`.
467478
unsafe {

datadog-sidecar-ffi/tests/span.rs

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@ fn test_span_debug_log_output() {
186186
ddog_set_span_name(span, CharSlice::from("debug-span"));
187187
let debug_output = ddog_span_debug_log(span);
188188

189-
let expected_output = CharSlice::from("Span { service: , name: debug-span, resource: , type: , trace_id: 0, span_id: 0, parent_id: 0, start: 0, duration: 0, error: 0, meta: {}, metrics: {}, meta_struct: {}, span_links: [], span_events: [] }");
189+
let expected_output = CharSlice::from("Span { service: , name: debug-span, resource: , type: , trace_id: 0, span_id: 0, parent_id: 0, start: 0, duration: 0, error: 0, meta: VecMap { data: [], deduped: false }, metrics: VecMap { data: [], deduped: false }, meta_struct: VecMap { data: [], deduped: false }, span_links: [], span_events: [] }");
190190

191191
assert_eq!(debug_output, expected_output);
192192

@@ -343,12 +343,13 @@ fn test_full_span() {
343343
start: 4,
344344
duration: 5,
345345
error: 6,
346-
meta: HashMap::from([(get_bytes_str("meta_key"), get_bytes_str("meta_value"))]),
347-
metrics: HashMap::from([(get_bytes_str("metric_key"), 1.0)]),
348-
meta_struct: HashMap::from([(
346+
meta: vec![(get_bytes_str("meta_key"), get_bytes_str("meta_value"))].into(),
347+
metrics: vec![(get_bytes_str("metric_key"), 1.0)].into(),
348+
meta_struct: vec![(
349349
get_bytes_str("meta_struct_key"),
350350
get_bytes("meta_struct_value"),
351-
)]),
351+
)]
352+
.into(),
352353
span_links: vec![SpanLinkBytes {
353354
trace_id: 10,
354355
span_id: 20,

libdd-data-pipeline-ffi/src/tracer.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -499,7 +499,8 @@ mod tests {
499499
ddog_tracer_span_set_meta(Some(&mut *span), cs("k"), cs("v1"));
500500
ddog_tracer_span_set_meta(Some(&mut *span), cs("k"), cs("v2"));
501501

502-
assert_eq!(span.0.meta.len(), 1);
502+
// After the introduction of `VecMap`, the length is still 2, as the data structure
503+
// tolerates duplicate entries.
503504
assert_eq!(span.0.meta.get("k").unwrap().as_ref(), "v2");
504505

505506
ddog_tracer_span_free(span);

libdd-data-pipeline/benches/trace_buffer.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
// Copyright 2025-Present Datadog, Inc. https://www.datadoghq.com/
22
// SPDX-License-Identifier: Apache-2.0
33

4-
use std::collections::HashMap;
54
use std::pin::Pin;
65
use std::sync::Arc;
76
use std::time::Duration;
@@ -14,6 +13,7 @@ use libdd_data_pipeline::trace_exporter::{
1413
use libdd_shared_runtime::SharedRuntime;
1514
use libdd_tinybytes::BytesString;
1615
use libdd_trace_utils::span::v04::SpanBytes;
16+
use libdd_trace_utils::span::vec_map::VecMap;
1717

1818
// Number of chunks each sender thread sends per benchmark iteration.
1919
const CHUNKS_PER_SENDER: usize = 900;
@@ -34,18 +34,20 @@ fn make_span() -> SpanBytes {
3434
start: 1_700_000_000_000_000_000_i64,
3535
duration: 5_000_000_i64,
3636
error: 0,
37-
meta: HashMap::from_iter([
37+
meta: vec![
3838
(bs("env"), bs("prod")),
3939
(bs("version"), bs("1.0.0")),
4040
(bs("http.method"), bs("GET")),
4141
(bs("http.url"), bs("/api/v1/users")),
4242
(bs("peer.service"), bs("users-service")),
43-
]),
44-
metrics: HashMap::from_iter([
43+
]
44+
.into(),
45+
metrics: vec![
4546
(bs("_sampling_priority_v1"), 1.0_f64),
4647
(bs("_dd.agent_psr"), 1.0_f64),
47-
]),
48-
meta_struct: HashMap::new(),
48+
]
49+
.into(),
50+
meta_struct: VecMap::new(),
4951
span_links: vec![],
5052
span_events: vec![],
5153
}

libdd-data-pipeline/src/trace_buffer/mod.rs

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,16 @@ where
4242
size += self.resource.as_ref().len();
4343
size += self.r#type.as_ref().len();
4444

45-
for (k, v) in &self.meta {
45+
// We expect VecMaps to be already deduped at this point, so `defensive_dedup` should be
46+
// cheap (and alloc-free). In the future we could relax the check and accept non-deduped
47+
// VecMap, trading over-estimating the size of a span for less work.
48+
for (k, v) in self.meta.defensive_dedup().iter() {
4649
size += k.as_ref().len() + v.as_ref().len();
4750
}
48-
for k in self.metrics.keys() {
51+
for (k, _) in self.metrics.defensive_dedup().iter() {
4952
size += k.as_ref().len() + 8;
5053
}
51-
for (k, v) in &self.meta_struct {
54+
for (k, v) in self.meta_struct.defensive_dedup().iter() {
5255
size += k.as_ref().len() + v.as_ref().len();
5356
}
5457
for link in &self.span_links {

libdd-data-pipeline/src/trace_exporter/mod.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,6 +612,12 @@ impl<C: HttpClientCapability + SleepCapability + MaybeSend + Sync + 'static> Tra
612612
self.client_computed_top_level,
613613
);
614614

615+
for chunk in &mut traces {
616+
for span in chunk.iter_mut() {
617+
span.dedup();
618+
}
619+
}
620+
615621
// OTLP path: send sampled traces via OTLP when an OTLP endpoint is configured.
616622
// Unlike the agent path, there is no downstream agent to drop unsampled traces,
617623
// so drop_chunks is always called here regardless of whether stats are enabled.

libdd-trace-stats/benches/span_concentrator_bench.rs

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,22 @@
11
// Copyright 2024-Present Datadog, Inc. https://www.datadoghq.com/
22
// SPDX-License-Identifier: Apache-2.0
3-
use std::{
4-
collections::HashMap,
5-
time::{self, Duration, SystemTime},
6-
};
3+
use std::time::{self, Duration, SystemTime};
74

85
use criterion::{criterion_group, Criterion};
96
use libdd_trace_stats::span_concentrator::SpanConcentrator;
10-
use libdd_trace_utils::span::v04::SpanBytes;
7+
use libdd_trace_utils::span::v04::{SpanBytes, VecMap};
118

129
fn get_bucket_start(now: SystemTime, n: u64) -> i64 {
1310
let start = now.duration_since(time::UNIX_EPOCH).unwrap() + Duration::from_secs(10 * n);
1411
start.as_nanos() as i64
1512
}
1613

1714
fn get_span(now: SystemTime, trace_id: u64, span_id: u64) -> SpanBytes {
18-
let mut metrics = HashMap::from([("_dd.measured".into(), 1.0)]);
15+
let mut metrics: VecMap<_, _> = vec![("_dd.measured".into(), 1.0)].into();
1916
if span_id == 1 {
2017
metrics.insert("_dd.top_level".into(), 1.0);
2118
}
22-
let mut meta = HashMap::from([("db_name".into(), "postgres".into())]);
19+
let mut meta: VecMap<_, _> = vec![("db_name".into(), "postgres".into())].into();
2320
if span_id.is_multiple_of(3) {
2421
meta.insert("bucket_s3".into(), "aws_bucket".into());
2522
}

0 commit comments

Comments
 (0)