Skip to content

Commit 226680c

Browse files
committed
[rust] Use hashbrown to create shared strings without unnecessary alloc
1 parent e223d69 commit 226680c

2 files changed

Lines changed: 71 additions & 22 deletions

File tree

rust/flatbuffers/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ serialize = ["serde"]
1818

1919
[dependencies]
2020
bitflags = "2.8.0"
21+
hashbrown = "0.16"
2122
serde = { version = "1.0", optional = true }
2223

2324
[build-dependencies]

rust/flatbuffers/src/builder.rs

Lines changed: 70 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ use core::marker::PhantomData;
2424
use core::ops::{Add, AddAssign, Deref, DerefMut, Index, IndexMut, Sub, SubAssign};
2525
use core::ptr::write_bytes;
2626

27-
#[cfg(feature = "std")]
28-
use std::collections::HashMap;
27+
use hashbrown::HashTable;
28+
use hashbrown::DefaultHashBuilder;
29+
use core::hash::BuildHasher;
2930

3031
use crate::endian_scalar::emplace_scalar;
3132
use crate::primitives::*;
@@ -129,7 +130,7 @@ struct FieldLoc {
129130
/// FlatBufferBuilder builds a FlatBuffer through manipulating its internal
130131
/// state. It has an owned `Vec<u8>` that grows as needed (up to the hardcoded
131132
/// limit of 2GiB, which is set by the FlatBuffers format).
132-
#[derive(Clone, Debug, Eq, PartialEq)]
133+
#[derive(Clone, Debug)]
133134
pub struct FlatBufferBuilder<'fbb, A: Allocator = DefaultAllocator> {
134135
allocator: A,
135136
head: ReverseIndex,
@@ -142,14 +143,40 @@ pub struct FlatBufferBuilder<'fbb, A: Allocator = DefaultAllocator> {
142143

143144
min_align: usize,
144145
force_defaults: bool,
145-
#[cfg(feature = "std")]
146-
strings_pool: HashMap<String, WIPOffset<&'fbb str>>,
147-
#[cfg(not(feature = "std"))]
148-
strings_pool: Vec<WIPOffset<&'fbb str>>,
146+
strings_pool: HashTable<WIPOffset<&'fbb str>>,
147+
random_state: DefaultHashBuilder,
149148

150149
_phantom: PhantomData<&'fbb ()>,
151150
}
152151

152+
impl<A: Allocator + PartialEq> PartialEq for FlatBufferBuilder<'_, A> {
153+
fn eq(&self, other: &Self) -> bool {
154+
let FlatBufferBuilder {
155+
allocator,
156+
head,
157+
field_locs,
158+
written_vtable_revpos,
159+
nested,
160+
finished,
161+
min_align,
162+
force_defaults,
163+
strings_pool: _,
164+
random_state: _,
165+
_phantom,
166+
} = self;
167+
allocator == &other.allocator
168+
&& head == &other.head
169+
&& field_locs == &other.field_locs
170+
&& written_vtable_revpos == &other.written_vtable_revpos
171+
&& nested == &other.nested
172+
&& finished == &other.finished
173+
&& min_align == &other.min_align
174+
&& force_defaults == &other.force_defaults
175+
}
176+
}
177+
178+
impl<A: Allocator + Eq> Eq for FlatBufferBuilder<'_, A> {}
179+
153180
impl<'fbb> FlatBufferBuilder<'fbb, DefaultAllocator> {
154181
/// Create a FlatBufferBuilder that is ready for writing.
155182
pub fn new() -> Self {
@@ -257,10 +284,8 @@ impl<'fbb, A: Allocator> FlatBufferBuilder<'fbb, A> {
257284

258285
min_align: 0,
259286
force_defaults: false,
260-
#[cfg(feature = "std")]
261-
strings_pool: HashMap::new(),
262-
#[cfg(not(feature = "std"))]
263-
strings_pool: Vec::new(),
287+
strings_pool: HashTable::new(),
288+
random_state: DefaultHashBuilder::default(),
264289

265290
_phantom: PhantomData,
266291
}
@@ -294,10 +319,8 @@ impl<'fbb, A: Allocator> FlatBufferBuilder<'fbb, A> {
294319

295320
min_align: 0,
296321
force_defaults: false,
297-
#[cfg(feature = "std")]
298-
strings_pool: HashMap::with_capacity(strings_pool_capacity),
299-
#[cfg(not(feature = "std"))]
300-
strings_pool: Vec::with_capacity(strings_pool_capacity),
322+
strings_pool: HashTable::with_capacity(strings_pool_capacity),
323+
random_state: DefaultHashBuilder::default(),
301324

302325
_phantom: PhantomData,
303326
}
@@ -506,9 +529,8 @@ impl<'fbb, A: Allocator> FlatBufferBuilder<'fbb, A> {
506529

507530
/// Fallible version of [`create_shared_string`](Self::create_shared_string).
508531
///
509-
/// Uses a HashMap to track previously written strings, providing O(1)
532+
/// Uses a HashTable to track previously written strings, providing O(1)
510533
/// amortized lookup and insertion.
511-
#[cfg(feature = "std")]
512534
#[inline]
513535
pub fn try_create_shared_string<'a: 'b, 'b>(
514536
&'a mut self,
@@ -518,18 +540,44 @@ impl<'fbb, A: Allocator> FlatBufferBuilder<'fbb, A> {
518540
"create_shared_string can not be called when a table or vector is under construction",
519541
);
520542

521-
if let Some(&offset) = self.strings_pool.get(s) {
522-
return Ok(offset);
543+
fn read_string<'a, A: Allocator>(
544+
allocator: &'a A,
545+
wip_offset: WIPOffset<&'a str>,
546+
) -> &'a [u8] {
547+
let ptr = wip_offset.value() as usize;
548+
let str_start = allocator.len() - ptr;
549+
let len = u32::from_le_bytes([
550+
allocator[str_start],
551+
allocator[str_start + 1],
552+
allocator[str_start + 2],
553+
allocator[str_start + 3],
554+
]) as usize;
555+
&allocator[str_start + 4..str_start + 4 + len]
556+
}
557+
558+
let hash = self.random_state.hash_one(s.as_bytes());
559+
560+
if let Some(found) = self.strings_pool.find(hash, |wip_offset| {
561+
let stored = read_string(&self.allocator, *wip_offset);
562+
stored == s.as_bytes()
563+
}) {
564+
return Ok(*found);
523565
}
524566

525567
let address = WIPOffset::new(self.try_create_byte_string(s.as_bytes())?.value());
526-
self.strings_pool.insert(s.to_owned(), address);
568+
let allocator = &self.allocator;
569+
let random_state = &self.random_state;
570+
self.strings_pool
571+
.insert_unique(hash, address, |wip_offset| {
572+
let stored = read_string(allocator, *wip_offset);
573+
random_state.hash_one(stored)
574+
});
527575
Ok(address)
528576
}
529577

530578
/// Create a utf8 string, and de-duplicate if already created.
531579
///
532-
/// Uses a HashMap to track previously written strings, providing O(1)
580+
/// Uses a HashTable to track previously written strings, providing O(1)
533581
/// amortized lookup and insertion.
534582
#[cfg(feature = "std")]
535583
#[inline]
@@ -1224,7 +1272,7 @@ impl<T> IndexMut<ReverseIndexRange> for [T] {
12241272
}
12251273
}
12261274

1227-
#[cfg(test)]
1275+
#[cfg(all(test, feature = "std"))]
12281276
mod tests {
12291277
use super::*;
12301278
use core::sync::atomic::{AtomicUsize, Ordering};

0 commit comments

Comments
 (0)