Skip to content

Commit 65c8a8a

Browse files
authored
Fixes #3240, perf of indexing many rows with same key (#3971)
# Description of Changes Fixes #3240. Non-unique indices are now backed by a type `SameKeyEntry` which holds the `RowPointer`s for the same key. When these `RowPointer`s exceed 4KiB (512 entries), the data structure switches from using an array list to a hash set. # API and ABI breaking changes None # Expected complexity level and risk 2? # Testing Covered by existing tests, though more test will come in future PRs.
1 parent 8ab3ef4 commit 65c8a8a

6 files changed

Lines changed: 206 additions & 51 deletions

File tree

crates/bindings-typescript/src/server/index.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@ export { type Reducer, type ReducerCtx } from '../lib/reducers';
77
export { type DbView } from './db_view';
88
export { and, or, not } from './query';
99
export type { ProcedureCtx, TransactionCtx } from '../lib/procedures';
10+
export { toCamelCase } from '../lib/util';
11+
export { type Uuid } from '../lib/uuid';
1012

1113
import './polyfills'; // Ensure polyfills are loaded
1214
import './register_hooks'; // Ensure module hooks are registered

crates/table/src/table_index/mod.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
/// We also represent unique indices more compactly than non-unique ones, avoiding the multi-map.
2525
/// Additionally, beyond our btree indices,
2626
/// we support direct unique indices, where key are indices into `Vec`s.
27+
use self::same_key_entry::SameKeyEntryIter;
2728
use super::indexes::RowPointer;
2829
use super::table::RowRef;
2930
use crate::{read_column::ReadColumn, static_assert_size};
@@ -37,6 +38,7 @@ use spacetimedb_sats::{
3738

3839
mod key_size;
3940
mod multimap;
41+
mod same_key_entry;
4042
pub mod unique_direct_fixed_cap_index;
4143
pub mod unique_direct_index;
4244
pub mod uniquemap;
@@ -47,7 +49,7 @@ use unique_direct_fixed_cap_index::{UniqueDirectFixedCapIndex, UniqueDirectFixed
4749
use unique_direct_index::{UniqueDirectIndex, UniqueDirectIndexPointIter, UniqueDirectIndexRangeIter};
4850

4951
type BtreeIndex<K> = multimap::MultiMap<K, RowPointer>;
50-
type BtreeIndexPointIter<'a> = multimap::MultiMapPointIter<'a, RowPointer>;
52+
type BtreeIndexPointIter<'a> = SameKeyEntryIter<'a, RowPointer>;
5153
type BtreeIndexRangeIter<'a, K> = multimap::MultiMapRangeIter<'a, K, RowPointer>;
5254
type BtreeUniqueIndex<K> = uniquemap::UniqueMap<K, RowPointer>;
5355
type BtreeUniqueIndexPointIter<'a> = uniquemap::UniqueMapPointIter<'a, RowPointer>;
Lines changed: 32 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -1,38 +1,39 @@
1-
use core::ops::RangeBounds;
2-
use core::slice;
3-
use smallvec::SmallVec;
1+
use super::same_key_entry::{same_key_iter, SameKeyEntry, SameKeyEntryIter};
2+
use core::{hash::Hash, ops::RangeBounds};
43
use spacetimedb_sats::memory_usage::MemoryUsage;
54
use std::collections::btree_map::{BTreeMap, Range};
65

76
/// A multi map that relates a `K` to a *set* of `V`s.
87
#[derive(Debug, PartialEq, Eq)]
9-
pub struct MultiMap<K, V> {
8+
pub struct MultiMap<K, V: Eq + Hash> {
109
/// The map is backed by a `BTreeMap` for relating keys to values.
1110
///
1211
/// A value set is stored as a `SmallVec`.
1312
/// This is an optimization over a `Vec<_>`
1413
/// as we allow a single element to be stored inline
1514
/// to improve performance for the common case of one element.
16-
map: BTreeMap<K, SmallVec<[V; 1]>>,
15+
map: BTreeMap<K, SameKeyEntry<V>>,
1716
}
1817

19-
impl<K, V> Default for MultiMap<K, V> {
18+
impl<K, V: Eq + Hash> Default for MultiMap<K, V> {
2019
fn default() -> Self {
2120
Self { map: BTreeMap::new() }
2221
}
2322
}
2423

25-
impl<K: MemoryUsage, V: MemoryUsage> MemoryUsage for MultiMap<K, V> {
24+
impl<K: MemoryUsage, V: MemoryUsage + Eq + Hash> MemoryUsage for MultiMap<K, V> {
2625
fn heap_usage(&self) -> usize {
2726
let Self { map } = self;
2827
map.heap_usage()
2928
}
3029
}
3130

32-
impl<K: Ord, V: Ord> MultiMap<K, V> {
31+
impl<K: Ord, V: Ord + Hash> MultiMap<K, V> {
3332
/// Inserts the relation `key -> val` to this multimap.
3433
///
3534
/// The map does not check whether `key -> val` was already in the map.
35+
/// It's assumed that the same `val` is never added twice,
36+
/// and multimaps do not bind one `key` to the same `val`.
3637
pub fn insert(&mut self, key: K, val: V) {
3738
self.map.entry(key).or_default().push(val);
3839
}
@@ -41,30 +42,31 @@ impl<K: Ord, V: Ord> MultiMap<K, V> {
4142
///
4243
/// Returns whether `key -> val` was present.
4344
pub fn delete(&mut self, key: &K, val: &V) -> bool {
44-
if let Some(vset) = self.map.get_mut(key) {
45-
// The `vset` is not sorted, so we have to do a linear scan first.
46-
if let Some(idx) = vset.iter().position(|v| v == val) {
47-
vset.swap_remove(idx);
48-
return true;
49-
}
45+
let Some(vset) = self.map.get_mut(key) else {
46+
return false;
47+
};
48+
49+
let (deleted, is_empty) = vset.delete(val);
50+
51+
if is_empty {
52+
self.map.remove(key);
5053
}
51-
false
54+
55+
deleted
5256
}
5357

5458
/// Returns an iterator over the multimap that yields all the `V`s
5559
/// of the `K`s that fall within the specified `range`.
5660
pub fn values_in_range(&self, range: &impl RangeBounds<K>) -> MultiMapRangeIter<'_, K, V> {
5761
MultiMapRangeIter {
5862
outer: self.map.range((range.start_bound(), range.end_bound())),
59-
inner: None,
63+
inner: SameKeyEntry::empty_iter(),
6064
}
6165
}
6266

6367
/// Returns an iterator over the multimap that yields all the `V`s of the `key: &K`.
64-
pub fn values_in_point(&self, key: &K) -> MultiMapPointIter<'_, V> {
65-
let vals = self.map.get(key).map(|vs| &**vs).unwrap_or_default();
66-
let iter = vals.iter();
67-
MultiMapPointIter { iter }
68+
pub fn values_in_point(&self, key: &K) -> SameKeyEntryIter<'_, V> {
69+
same_key_iter(self.map.get(key))
6870
}
6971

7072
/// Returns the number of unique keys in the multimap.
@@ -75,7 +77,7 @@ impl<K: Ord, V: Ord> MultiMap<K, V> {
7577
/// Returns the total number of entries in the multimap.
7678
#[allow(unused)] // No use for this currently.
7779
pub fn len(&self) -> usize {
78-
self.map.values().map(|ptrs| ptrs.len()).sum()
80+
self.map.values().map(|vals: &SameKeyEntry<V>| vals.len()).sum()
7981
}
8082

8183
/// Returns whether there are any entries in the multimap.
@@ -91,46 +93,28 @@ impl<K: Ord, V: Ord> MultiMap<K, V> {
9193
}
9294
}
9395

94-
/// An iterator over values in a [`MultiMap`] where the key is a point.
95-
pub struct MultiMapPointIter<'a, V> {
96-
/// The inner iterator for the value set for a found key.
97-
iter: slice::Iter<'a, V>,
98-
}
99-
100-
impl<'a, V> Iterator for MultiMapPointIter<'a, V> {
101-
type Item = &'a V;
102-
103-
fn next(&mut self) -> Option<Self::Item> {
104-
self.iter.next()
105-
}
106-
}
107-
10896
/// An iterator over values in a [`MultiMap`] where the keys are in a certain range.
109-
pub struct MultiMapRangeIter<'a, K, V> {
97+
pub struct MultiMapRangeIter<'a, K, V: Eq + Hash> {
11098
/// The outer iterator seeking for matching keys in the range.
111-
outer: Range<'a, K, SmallVec<[V; 1]>>,
99+
outer: Range<'a, K, SameKeyEntry<V>>,
112100
/// The inner iterator for the value set for a found key.
113-
inner: Option<slice::Iter<'a, V>>,
101+
inner: SameKeyEntryIter<'a, V>,
114102
}
115103

116-
impl<'a, K, V> Iterator for MultiMapRangeIter<'a, K, V> {
104+
impl<'a, K, V: Eq + Hash> Iterator for MultiMapRangeIter<'a, K, V> {
117105
type Item = &'a V;
118106

119107
fn next(&mut self) -> Option<Self::Item> {
120108
loop {
121-
if let Some(inner) = self.inner.as_mut() {
122-
if let Some(val) = inner.next() {
123-
// While the inner iterator has elements, yield them.
124-
return Some(val);
125-
}
109+
// While the inner iterator has elements, yield them.
110+
if let Some(val) = self.inner.next() {
111+
return Some(val);
126112
}
127113

128-
// This makes the iterator fused.
129-
self.inner = None;
130114
// Advance and get a new inner, if possible, or quit.
131115
// We'll come back and yield elements from it in the next iteration.
132-
let (_, next) = self.outer.next()?;
133-
self.inner = Some(next.iter());
116+
let inner = self.outer.next().map(|(_, i)| i)?;
117+
self.inner = inner.iter();
134118
}
135119
}
136120
}
Lines changed: 165 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,165 @@
1+
use crate::{indexes::RowPointer, static_assert_size};
2+
use core::hash::Hash;
3+
use core::slice;
4+
use smallvec::SmallVec;
5+
use spacetimedb_data_structures::map::{hash_set, HashCollectionExt, HashSet};
6+
use spacetimedb_memory_usage::MemoryUsage;
7+
8+
/// A supporting type for multimap implementations
9+
/// that handles all the values for the same key,
10+
/// leaving the multimap to only have to care about the keys.
11+
///
12+
/// For performance reasons,
13+
/// this is an enum
14+
/// that deals with a smaller number of values in the first variant
15+
/// and with a larger number in the second variant.
16+
#[derive(Debug, PartialEq, Eq)]
17+
pub(super) enum SameKeyEntry<V: Eq + Hash> {
18+
/// A small number of values.
19+
///
20+
/// No ordering is kept between values.
21+
/// This makes insertions into amortized `O(k)`
22+
/// whereas deletions become `O(|values|)` instead.
23+
/// This is acceptable as `|values|` is small
24+
/// and because deleting from an array list is `O(n)` either way.
25+
///
26+
/// This also represents the "no values" case,
27+
/// although the multimap may want to delete the key in that case.
28+
///
29+
/// Up to two values are represented inline here.
30+
/// It's not profitable to represent this as a separate variant
31+
/// as that would increase `size_of::<SameKeyEntry>()` by 8 bytes.
32+
Small(SmallVec<[V; 2]>),
33+
34+
/// A large number of values.
35+
///
36+
/// Used when the heap size of `Small` would exceed one standard page.
37+
/// See [`SameKeyEntry::LARGE_AFTER_LEN`] for details.
38+
///
39+
/// Note that using a `HashSet`, with `S = RandomState`,
40+
/// entails that the iteration order is not deterministic.
41+
/// This is observed when doing queries against the index.
42+
Large(HashSet<V>),
43+
}
44+
45+
static_assert_size!(SameKeyEntry<RowPointer>, 32);
46+
47+
impl<V: Eq + Hash> Default for SameKeyEntry<V> {
48+
fn default() -> Self {
49+
Self::Small(<_>::default())
50+
}
51+
}
52+
53+
impl<V: MemoryUsage + Eq + Hash> MemoryUsage for SameKeyEntry<V> {
54+
fn heap_usage(&self) -> usize {
55+
match self {
56+
Self::Small(x) => x.heap_usage(),
57+
Self::Large(x) => x.heap_usage(),
58+
}
59+
}
60+
}
61+
62+
impl<V: Eq + Hash> SameKeyEntry<V> {
63+
/// The number of elements
64+
/// beyond which the strategy is changed from small to large storage.
65+
const LARGE_AFTER_LEN: usize = 4096 / size_of::<V>();
66+
67+
/// Pushes `val` as an entry for the key.
68+
///
69+
/// This assumes that `val` was previously not recorded.
70+
/// The structure does not check whether it was previously resident.
71+
/// As a consequence, the time complexity is `O(k)` amortized.
72+
pub(super) fn push(&mut self, val: V) {
73+
match self {
74+
Self::Small(list) if list.len() <= Self::LARGE_AFTER_LEN => {
75+
list.push(val);
76+
}
77+
Self::Small(list) => {
78+
// Reconstruct into a set.
79+
let mut set = HashSet::with_capacity(list.len() + 1);
80+
set.extend(list.drain(..));
81+
82+
// Add `val`.
83+
set.insert(val);
84+
85+
*self = Self::Large(set);
86+
}
87+
Self::Large(set) => {
88+
set.insert(val);
89+
}
90+
}
91+
}
92+
93+
/// Deletes `val` as an entry for the key.
94+
///
95+
/// Returns `(was_deleted, is_empty)`.
96+
pub(super) fn delete(&mut self, val: &V) -> (bool, bool) {
97+
match self {
98+
Self::Small(list) => {
99+
// The `list` is not sorted, so we have to do a linear scan first.
100+
if let Some(idx) = list.iter().position(|v| v == val) {
101+
list.swap_remove(idx);
102+
(true, list.is_empty())
103+
} else {
104+
(false, false)
105+
}
106+
}
107+
Self::Large(set) => {
108+
let removed = set.remove(val);
109+
let empty = set.is_empty();
110+
(removed, empty)
111+
}
112+
}
113+
}
114+
115+
/// Returns an iterator over all the entries for this key.
116+
pub(super) fn iter(&self) -> SameKeyEntryIter<'_, V> {
117+
match self {
118+
Self::Small(list) => SameKeyEntryIter::Small(list.iter()),
119+
Self::Large(set) => SameKeyEntryIter::Large(set.iter().into()),
120+
}
121+
}
122+
123+
/// Returns an iterator over no entries.
124+
pub(super) fn empty_iter<'a>() -> SameKeyEntryIter<'a, V> {
125+
SameKeyEntryIter::Small(const { &[] }.iter())
126+
}
127+
128+
/// Returns the number of entries for the same key.
129+
pub(super) fn len(&self) -> usize {
130+
match self {
131+
Self::Small(list) => list.len(),
132+
Self::Large(set) => set.len(),
133+
}
134+
}
135+
}
136+
137+
/// Returns an iterator for a key's entries `ske`.
138+
/// This efficiently handles the case where there's no key (`None`).
139+
pub(super) fn same_key_iter<V: Eq + Hash>(ske: Option<&SameKeyEntry<V>>) -> SameKeyEntryIter<'_, V> {
140+
match ske {
141+
None => SameKeyEntry::empty_iter(),
142+
Some(ske) => ske.iter(),
143+
}
144+
}
145+
146+
/// An iterator over values in a [`SameKeyEntry`].
147+
pub enum SameKeyEntryIter<'a, V> {
148+
Small(slice::Iter<'a, V>),
149+
/// This variant doesn't occur so much
150+
/// and we'd like to reduce the footprint of `SameKeyEntryIter`.
151+
Large(Box<hash_set::Iter<'a, V>>),
152+
}
153+
154+
static_assert_size!(SameKeyEntryIter<RowPointer>, 16);
155+
156+
impl<'a, V> Iterator for SameKeyEntryIter<'a, V> {
157+
type Item = &'a V;
158+
159+
fn next(&mut self) -> Option<Self::Item> {
160+
match self {
161+
Self::Small(list) => list.next(),
162+
Self::Large(set) => set.next(),
163+
}
164+
}
165+
}

sdks/rust/tests/test-client/src/module_bindings/mod.rs

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

sdks/rust/tests/test-counter/src/lib.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@ use std::{
66
time::Duration,
77
};
88

9+
const TEST_TIMEOUT_SECS: u64 = 2 * 60;
10+
911
#[derive(Default)]
1012
struct TestCounterInner {
1113
/// Maps test names to their outcomes
@@ -57,7 +59,7 @@ impl TestCounter {
5759
let lock = self.inner.lock().expect("TestCounterInner Mutex is poisoned");
5860
let (lock, timeout_result) = self
5961
.wait_until_done
60-
.wait_timeout_while(lock, Duration::from_secs(90), |inner| {
62+
.wait_timeout_while(lock, Duration::from_secs(TEST_TIMEOUT_SECS), |inner| {
6163
inner.outcomes.len() != inner.registered.len()
6264
})
6365
.expect("TestCounterInner Mutex is poisoned");

0 commit comments

Comments
 (0)