1010
1111use serde:: ser:: { Serialize , Serializer } ;
1212use std:: borrow:: Borrow ;
13- use std:: collections:: HashSet ;
13+ use std:: collections:: { HashMap , HashSet } ;
1414use std:: hash:: Hash ;
15+ use std:: slice;
16+ use std:: sync:: atomic:: { AtomicBool , Ordering } ;
1517
1618/// A Vec-backed map that provides HashMap-like lookup by key.
1719///
@@ -39,7 +41,7 @@ use std::hash::Hash;
3941///
4042/// As this is a map, iteration order is not defined nor guaranteed. In practice, iteration follows
4143/// insertion order, but [Self::dedup] will reverse the underlying vector.
42- #[ derive( Clone , Debug , PartialEq ) ]
44+ #[ derive( Clone , Debug ) ]
4345pub struct VecMap < K , V > {
4446 data : Vec < ( K , V ) > ,
4547 /// Deduped is a flag that is set after entry deduplication. It is dirtied (set to `false`)
@@ -59,6 +61,21 @@ impl<K, V> Default for VecMap<K, V> {
5961 }
6062}
6163
64+ // This implementation allocates, which isn't expected for equality testing (not allocating would be
65+ // rather tedious though). It's fine for tests (where `PartialEq` is currently needed), so we
66+ // cfg-gate it to avoid unintended usage in prod.
67+ #[ cfg( any( test, feature = "test-utils" ) ) ]
68+ impl < K : Eq + Hash , V : PartialEq > PartialEq for VecMap < K , V > {
69+ fn eq ( & self , other : & Self ) -> bool {
70+ let lhs: HashMap < & K , & V > = self . data . iter ( ) . map ( |( k, v) | ( k, v) ) . collect ( ) ;
71+ let rhs: HashMap < & K , & V > = other. data . iter ( ) . map ( |( k, v) | ( k, v) ) . collect ( ) ;
72+ lhs == rhs
73+ }
74+ }
75+
76+ #[ cfg( any( test, feature = "test-utils" ) ) ]
77+ impl < K : Eq + Hash , V : Eq > Eq for VecMap < K , V > { }
78+
6279impl < K , V > VecMap < K , V > {
6380 #[ must_use]
6481 #[ inline]
@@ -136,13 +153,13 @@ impl<K, V> VecMap<K, V> {
136153
137154 /// Iterate over the element, including duplicate entries.
138155 #[ inline]
139- pub fn iter ( & self ) -> std :: slice:: Iter < ' _ , ( K , V ) > {
156+ pub fn iter ( & self ) -> slice:: Iter < ' _ , ( K , V ) > {
140157 self . data . iter ( )
141158 }
142159
143160 /// Iterate mutably over the elements, including duplicate entries.
144161 #[ inline]
145- pub fn iter_mut ( & mut self ) -> std :: slice:: IterMut < ' _ , ( K , V ) > {
162+ pub fn iter_mut ( & mut self ) -> slice:: IterMut < ' _ , ( K , V ) > {
146163 self . dirty ( ) ;
147164 self . data . iter_mut ( )
148165 }
@@ -168,6 +185,37 @@ impl<K, V> VecMap<K, V> {
168185 }
169186}
170187
188+ impl < K : Eq + Hash , V > VecMap < K , V > {
189+ /// Returns a deduped map, that either borrows from `self` without performing any work if the
190+ /// map is already deduped, or dedup the entries in a new separate vec otherwise. As opposed to
191+ /// [Self::dedup], `as_deduped_map` takes an immutable reference to `self` but might allocate.
192+ /// Prefer [Self::dedup] when applicable.
193+ pub fn as_deduped_map ( & self ) -> DedupedVecMap < ' _ , K , V > {
194+ if self . deduped {
195+ DedupedVecMap :: Borrowed ( self )
196+ } else {
197+ DedupedVecMap :: Owned ( self . data . iter ( ) . map ( |( k, v) | ( k, v) ) . collect ( ) )
198+ }
199+ }
200+
201+ /// This is a convenience wrapper around [Self::as_deduped_map] used in the msgpack encoder,
202+ /// where we expect the map to be deduped, but call `as_deduped_map` as a defensive measure. If
203+ /// the latter had to deduplicate and allocate a new vec, we log a warning (at most once).
204+ #[ allow( unused) ]
205+ pub ( crate ) fn defensive_dedup ( & self ) -> DedupedVecMap < ' _ , K , V > {
206+ if !self . is_deduped ( ) {
207+ static WARNED : AtomicBool = AtomicBool :: new ( false ) ;
208+ if !WARNED . swap ( true , Ordering :: Relaxed ) {
209+ tracing:: warn!(
210+ "VecMap not deduped before encoding. Performing defensive on-the-fly dedup"
211+ ) ;
212+ }
213+ }
214+
215+ self . as_deduped_map ( )
216+ }
217+ }
218+
171219impl < K : Hash + Eq + Clone , V > VecMap < K , V > {
172220 /// Remove entries with a duplicate key, only keeping the last one. After this, a flag is set
173221 /// internally, such that as long as the map isn't extended or mutably iterated, the next
@@ -222,7 +270,7 @@ impl<K, V> IntoIterator for VecMap<K, V> {
222270
223271impl < ' a , K , V > IntoIterator for & ' a VecMap < K , V > {
224272 type Item = & ' a ( K , V ) ;
225- type IntoIter = std :: slice:: Iter < ' a , ( K , V ) > ;
273+ type IntoIter = slice:: Iter < ' a , ( K , V ) > ;
226274
227275 fn into_iter ( self ) -> Self :: IntoIter {
228276 self . data . iter ( )
@@ -231,7 +279,7 @@ impl<'a, K, V> IntoIterator for &'a VecMap<K, V> {
231279
232280impl < ' a , K , V > IntoIterator for & ' a mut VecMap < K , V > {
233281 type Item = & ' a mut ( K , V ) ;
234- type IntoIter = std :: slice:: IterMut < ' a , ( K , V ) > ;
282+ type IntoIter = slice:: IterMut < ' a , ( K , V ) > ;
235283
236284 fn into_iter ( self ) -> Self :: IntoIter {
237285 // Since we iterate on keys as well, they can modified, and introduce duplicates.
@@ -250,33 +298,74 @@ impl<K, V> Extend<(K, V)> for VecMap<K, V> {
250298impl < K : Serialize + Eq + Hash , V : Serialize > Serialize for VecMap < K , V > {
251299 fn serialize < S : Serializer > ( & self , serializer : S ) -> Result < S :: Ok , S :: Error > {
252300 use serde:: ser:: SerializeMap ;
253- use std:: collections:: HashMap ;
254301
255- // We pre-compute the deduped map. If deduplication were done on the fly during
256- // serialization, we couldn't provide a length up front to the serializer, and the current
257- // one (rmp) will allocate an intermediate buffer defensively.
258- if self . deduped {
259- let mut map_ser = serializer. serialize_map ( Some ( self . len ( ) ) ) ?;
302+ let deduped = self . as_deduped_map ( ) ;
303+ let mut map_ser = serializer. serialize_map ( Some ( deduped. len ( ) ) ) ?;
260304
261- for ( k, v) in self {
262- map_ser. serialize_entry ( k, v) ?;
263- }
305+ for ( k, v) in deduped . iter ( ) {
306+ map_ser. serialize_entry ( k, v) ?;
307+ }
264308
265- map_ser. end ( )
266- } else {
267- // Note: using `dedup` would need an additional `clone()` of the whole map here. We can
268- // use references instead.
269- self . data
270- . iter ( )
271- . map ( |( k, v) | ( k, v) )
272- // Since the iterator is sized, `collect()` should pre-allocate with the right
273- // capacity in one shot.
274- . collect :: < HashMap < & K , & V > > ( )
275- . serialize ( serializer)
309+ map_ser. end ( )
310+ }
311+ }
312+
313+ pub enum DedupedVecMap < ' a , K , V > {
314+ Borrowed ( & ' a VecMap < K , V > ) ,
315+ Owned ( HashMap < & ' a K , & ' a V > ) ,
316+ }
317+
318+ impl < ' a , K , V > DedupedVecMap < ' a , K , V > {
319+ #[ inline]
320+ pub fn iter ( & self ) -> DedupedVecMapIter < ' _ , ' a , K , V > {
321+ match self {
322+ DedupedVecMap :: Borrowed ( vec_map) => DedupedVecMapIter :: Borrowed ( vec_map. iter ( ) ) ,
323+ DedupedVecMap :: Owned ( map) => DedupedVecMapIter :: Owned ( map. iter ( ) ) ,
276324 }
277325 }
326+
327+ #[ inline]
328+ pub fn len ( & self ) -> usize {
329+ match self {
330+ DedupedVecMap :: Borrowed ( vec_map) => vec_map. len ( ) ,
331+ DedupedVecMap :: Owned ( map) => map. len ( ) ,
332+ }
333+ }
334+
335+ #[ inline]
336+ pub fn is_empty ( & self ) -> bool {
337+ match self {
338+ DedupedVecMap :: Borrowed ( vec_map) => vec_map. is_empty ( ) ,
339+ DedupedVecMap :: Owned ( map) => map. is_empty ( ) ,
340+ }
341+ }
342+ }
343+
344+ pub enum DedupedVecMapIter < ' b , ' a : ' b , K , V > {
345+ Borrowed ( slice:: Iter < ' a , ( K , V ) > ) ,
346+ Owned ( std:: collections:: hash_map:: Iter < ' b , & ' a K , & ' a V > ) ,
278347}
279348
349+ impl < ' b , ' a : ' b , K , V > Iterator for DedupedVecMapIter < ' b , ' a , K , V > {
350+ type Item = ( & ' a K , & ' a V ) ;
351+
352+ fn next ( & mut self ) -> Option < Self :: Item > {
353+ match self {
354+ DedupedVecMapIter :: Borrowed ( iter) => iter. next ( ) . map ( |( k, v) | ( k, v) ) ,
355+ DedupedVecMapIter :: Owned ( iter) => iter. next ( ) . map ( |( & k, & v) | ( k, v) ) ,
356+ }
357+ }
358+
359+ fn size_hint ( & self ) -> ( usize , Option < usize > ) {
360+ match self {
361+ DedupedVecMapIter :: Borrowed ( iter) => iter. size_hint ( ) ,
362+ DedupedVecMapIter :: Owned ( iter) => iter. size_hint ( ) ,
363+ }
364+ }
365+ }
366+
367+ impl < ' b , ' a : ' b , K , V > ExactSizeIterator for DedupedVecMapIter < ' b , ' a , K , V > { }
368+
280369#[ cfg( test) ]
281370mod tests {
282371 use super :: * ;
@@ -448,6 +537,61 @@ mod tests {
448537 assert ! ( !m. is_deduped( ) ) ;
449538 }
450539
540+ #[ test]
541+ fn deduped_vec_map_borrowed_iter_and_len ( ) {
542+ let mut m = VecMap :: new ( ) ;
543+ m. insert ( "a" , 1 ) ;
544+ m. insert ( "b" , 2 ) ;
545+ m. dedup ( ) ;
546+
547+ let d = m. defensive_dedup ( ) ;
548+ assert ! ( matches!( d, DedupedVecMap :: Borrowed ( _) ) ) ;
549+ assert_eq ! ( d. len( ) , 2 ) ;
550+
551+ let mut items: Vec < _ > = d. iter ( ) . collect ( ) ;
552+ items. sort_by_key ( |( k, _) | * * k) ;
553+ assert_eq ! ( items, vec![ ( & "a" , & 1 ) , ( & "b" , & 2 ) ] ) ;
554+ }
555+
556+ #[ test]
557+ fn deduped_vec_map_copy_iter_and_len ( ) {
558+ let mut m = VecMap :: new ( ) ;
559+ m. insert ( "a" , 1 ) ;
560+ m. insert ( "a" , 2 ) ;
561+ m. insert ( "b" , 3 ) ;
562+
563+ let d = m. defensive_dedup ( ) ;
564+ assert ! ( matches!( d, DedupedVecMap :: Owned ( _) ) ) ;
565+ assert_eq ! ( d. len( ) , 2 ) ;
566+
567+ let items: HashMap < & & str , & i32 > = d. iter ( ) . collect ( ) ;
568+ assert_eq ! ( items[ & "a" ] , & 2 ) ;
569+ assert_eq ! ( items[ & "b" ] , & 3 ) ;
570+ }
571+
572+ #[ test]
573+ fn deduped_vec_map_iter_exact_size ( ) {
574+ let mut m = VecMap :: new ( ) ;
575+ m. insert ( "a" , 1 ) ;
576+ m. insert ( "b" , 2 ) ;
577+ m. insert ( "c" , 3 ) ;
578+ m. dedup ( ) ;
579+
580+ let d = m. defensive_dedup ( ) ;
581+ let mut iter = d. iter ( ) ;
582+ assert_eq ! ( iter. len( ) , 3 ) ;
583+ iter. next ( ) ;
584+ assert_eq ! ( iter. len( ) , 2 ) ;
585+ }
586+
587+ #[ test]
588+ fn deduped_vec_map_empty ( ) {
589+ let m: VecMap < String , i32 > = VecMap :: new ( ) ;
590+ let d = DedupedVecMap :: Borrowed ( & m) ;
591+ assert_eq ! ( d. len( ) , 0 ) ;
592+ assert_eq ! ( d. iter( ) . count( ) , 0 ) ;
593+ }
594+
451595 #[ test]
452596 fn serialize_deduplicates_keeping_last ( ) {
453597 let mut m = VecMap :: new ( ) ;
0 commit comments