diff --git a/libdd-trace-utils/src/span/vec_map.rs b/libdd-trace-utils/src/span/vec_map.rs index 9d2b5391ae..8825cfbbdf 100644 --- a/libdd-trace-utils/src/span/vec_map.rs +++ b/libdd-trace-utils/src/span/vec_map.rs @@ -10,8 +10,10 @@ use serde::ser::{Serialize, Serializer}; use std::borrow::Borrow; -use std::collections::HashSet; +use std::collections::{HashMap, HashSet}; use std::hash::Hash; +use std::slice; +use std::sync::atomic::{AtomicBool, Ordering}; /// A Vec-backed map that provides HashMap-like lookup by key. /// @@ -39,7 +41,7 @@ use std::hash::Hash; /// /// As this is a map, iteration order is not defined nor guaranteed. In practice, iteration follows /// insertion order, but [Self::dedup] will reverse the underlying vector. -#[derive(Clone, Debug, PartialEq)] +#[derive(Clone, Debug)] pub struct VecMap { data: Vec<(K, V)>, /// Deduped is a flag that is set after entry deduplication. It is dirtied (set to `false`) @@ -59,6 +61,21 @@ impl Default for VecMap { } } +// This implementation allocates, which isn't expected for equality testing (not allocating would be +// rather tedious though). It's fine for tests (where `PartialEq` is currently needed), so we +// cfg-gate it to avoid unintended usage in prod. +#[cfg(any(test, feature = "test-utils"))] +impl PartialEq for VecMap { + fn eq(&self, other: &Self) -> bool { + let lhs: HashMap<&K, &V> = self.data.iter().map(|(k, v)| (k, v)).collect(); + let rhs: HashMap<&K, &V> = other.data.iter().map(|(k, v)| (k, v)).collect(); + lhs == rhs + } +} + +#[cfg(any(test, feature = "test-utils"))] +impl Eq for VecMap {} + impl VecMap { #[must_use] #[inline] @@ -136,13 +153,13 @@ impl VecMap { /// Iterate over the element, including duplicate entries. #[inline] - pub fn iter(&self) -> std::slice::Iter<'_, (K, V)> { + pub fn iter(&self) -> slice::Iter<'_, (K, V)> { self.data.iter() } /// Iterate mutably over the elements, including duplicate entries. #[inline] - pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, (K, V)> { + pub fn iter_mut(&mut self) -> slice::IterMut<'_, (K, V)> { self.dirty(); self.data.iter_mut() } @@ -168,6 +185,37 @@ impl VecMap { } } +impl VecMap { + /// Returns a deduped map, that either borrows from `self` without performing any work if the + /// map is already deduped, or dedup the entries in a new separate vec otherwise. As opposed to + /// [Self::dedup], `as_deduped_map` takes an immutable reference to `self` but might allocate. + /// Prefer [Self::dedup] when applicable. + pub fn as_deduped_map(&self) -> DedupedVecMap<'_, K, V> { + if self.deduped { + DedupedVecMap::Borrowed(self) + } else { + DedupedVecMap::Owned(self.data.iter().map(|(k, v)| (k, v)).collect()) + } + } + + /// This is a convenience wrapper around [Self::as_deduped_map] used in the msgpack encoder, + /// where we expect the map to be deduped, but call `as_deduped_map` as a defensive measure. If + /// the latter had to deduplicate and allocate a new vec, we log a warning (at most once). + #[allow(unused)] + pub(crate) fn defensive_dedup(&self) -> DedupedVecMap<'_, K, V> { + if !self.is_deduped() { + static WARNED: AtomicBool = AtomicBool::new(false); + if !WARNED.swap(true, Ordering::Relaxed) { + tracing::warn!( + "VecMap not deduped before encoding. Performing defensive on-the-fly dedup" + ); + } + } + + self.as_deduped_map() + } +} + impl VecMap { /// Remove entries with a duplicate key, only keeping the last one. After this, a flag is set /// internally, such that as long as the map isn't extended or mutably iterated, the next @@ -222,7 +270,7 @@ impl IntoIterator for VecMap { impl<'a, K, V> IntoIterator for &'a VecMap { type Item = &'a (K, V); - type IntoIter = std::slice::Iter<'a, (K, V)>; + type IntoIter = slice::Iter<'a, (K, V)>; fn into_iter(self) -> Self::IntoIter { self.data.iter() @@ -231,7 +279,7 @@ impl<'a, K, V> IntoIterator for &'a VecMap { impl<'a, K, V> IntoIterator for &'a mut VecMap { type Item = &'a mut (K, V); - type IntoIter = std::slice::IterMut<'a, (K, V)>; + type IntoIter = slice::IterMut<'a, (K, V)>; fn into_iter(self) -> Self::IntoIter { // Since we iterate on keys as well, they can modified, and introduce duplicates. @@ -250,33 +298,74 @@ impl Extend<(K, V)> for VecMap { impl Serialize for VecMap { fn serialize(&self, serializer: S) -> Result { use serde::ser::SerializeMap; - use std::collections::HashMap; - // We pre-compute the deduped map. If deduplication were done on the fly during - // serialization, we couldn't provide a length up front to the serializer, and the current - // one (rmp) will allocate an intermediate buffer defensively. - if self.deduped { - let mut map_ser = serializer.serialize_map(Some(self.len()))?; + let deduped = self.as_deduped_map(); + let mut map_ser = serializer.serialize_map(Some(deduped.len()))?; - for (k, v) in self { - map_ser.serialize_entry(k, v)?; - } + for (k, v) in deduped.iter() { + map_ser.serialize_entry(k, v)?; + } - map_ser.end() - } else { - // Note: using `dedup` would need an additional `clone()` of the whole map here. We can - // use references instead. - self.data - .iter() - .map(|(k, v)| (k, v)) - // Since the iterator is sized, `collect()` should pre-allocate with the right - // capacity in one shot. - .collect::>() - .serialize(serializer) + map_ser.end() + } +} + +pub enum DedupedVecMap<'a, K, V> { + Borrowed(&'a VecMap), + Owned(HashMap<&'a K, &'a V>), +} + +impl<'a, K, V> DedupedVecMap<'a, K, V> { + #[inline] + pub fn iter(&self) -> DedupedVecMapIter<'_, 'a, K, V> { + match self { + DedupedVecMap::Borrowed(vec_map) => DedupedVecMapIter::Borrowed(vec_map.iter()), + DedupedVecMap::Owned(map) => DedupedVecMapIter::Owned(map.iter()), } } + + #[inline] + pub fn len(&self) -> usize { + match self { + DedupedVecMap::Borrowed(vec_map) => vec_map.len(), + DedupedVecMap::Owned(map) => map.len(), + } + } + + #[inline] + pub fn is_empty(&self) -> bool { + match self { + DedupedVecMap::Borrowed(vec_map) => vec_map.is_empty(), + DedupedVecMap::Owned(map) => map.is_empty(), + } + } +} + +pub enum DedupedVecMapIter<'b, 'a: 'b, K, V> { + Borrowed(slice::Iter<'a, (K, V)>), + Owned(std::collections::hash_map::Iter<'b, &'a K, &'a V>), } +impl<'b, 'a: 'b, K, V> Iterator for DedupedVecMapIter<'b, 'a, K, V> { + type Item = (&'a K, &'a V); + + fn next(&mut self) -> Option { + match self { + DedupedVecMapIter::Borrowed(iter) => iter.next().map(|(k, v)| (k, v)), + DedupedVecMapIter::Owned(iter) => iter.next().map(|(&k, &v)| (k, v)), + } + } + + fn size_hint(&self) -> (usize, Option) { + match self { + DedupedVecMapIter::Borrowed(iter) => iter.size_hint(), + DedupedVecMapIter::Owned(iter) => iter.size_hint(), + } + } +} + +impl<'b, 'a: 'b, K, V> ExactSizeIterator for DedupedVecMapIter<'b, 'a, K, V> {} + #[cfg(test)] mod tests { use super::*; @@ -448,6 +537,61 @@ mod tests { assert!(!m.is_deduped()); } + #[test] + fn deduped_vec_map_borrowed_iter_and_len() { + let mut m = VecMap::new(); + m.insert("a", 1); + m.insert("b", 2); + m.dedup(); + + let d = m.defensive_dedup(); + assert!(matches!(d, DedupedVecMap::Borrowed(_))); + assert_eq!(d.len(), 2); + + let mut items: Vec<_> = d.iter().collect(); + items.sort_by_key(|(k, _)| **k); + assert_eq!(items, vec![(&"a", &1), (&"b", &2)]); + } + + #[test] + fn deduped_vec_map_copy_iter_and_len() { + let mut m = VecMap::new(); + m.insert("a", 1); + m.insert("a", 2); + m.insert("b", 3); + + let d = m.defensive_dedup(); + assert!(matches!(d, DedupedVecMap::Owned(_))); + assert_eq!(d.len(), 2); + + let items: HashMap<&&str, &i32> = d.iter().collect(); + assert_eq!(items[&"a"], &2); + assert_eq!(items[&"b"], &3); + } + + #[test] + fn deduped_vec_map_iter_exact_size() { + let mut m = VecMap::new(); + m.insert("a", 1); + m.insert("b", 2); + m.insert("c", 3); + m.dedup(); + + let d = m.defensive_dedup(); + let mut iter = d.iter(); + assert_eq!(iter.len(), 3); + iter.next(); + assert_eq!(iter.len(), 2); + } + + #[test] + fn deduped_vec_map_empty() { + let m: VecMap = VecMap::new(); + let d = DedupedVecMap::Borrowed(&m); + assert_eq!(d.len(), 0); + assert_eq!(d.iter().count(), 0); + } + #[test] fn serialize_deduplicates_keeping_last() { let mut m = VecMap::new();