Skip to content
Merged
7 changes: 7 additions & 0 deletions crates/memory-usage/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,13 @@ impl<K: MemoryUsage, V: MemoryUsage> MemoryUsage for std::collections::BTreeMap<
}
}

impl<T: MemoryUsage> MemoryUsage for std::collections::BTreeSet<T> {
fn heap_usage(&self) -> usize {
// NB: this is best-effort, since we don't have a `capacity()` method on `BTreeMap`.
Comment thread
kim marked this conversation as resolved.
self.len() * mem::size_of::<T>() + self.iter().map(|t| t.heap_usage()).sum::<usize>()
}
}

#[cfg(feature = "smallvec")]
impl<A: smallvec::Array> MemoryUsage for smallvec::SmallVec<A>
where
Expand Down
13 changes: 13 additions & 0 deletions crates/table/src/page.rs
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,13 @@ impl PageHeader {
pub(super) fn present_rows_storage_ptr_for_test(&self) -> *const () {
self.fixed.present_rows.storage().as_ptr().cast()
}

/// Returns the number of var-len granules available for allocation,
/// including those in the "gap" between the fixed-len and var-len part of the page.
fn available_var_len_granules(&self) -> usize {
self.var.freelist_len as usize
+ VarLenGranule::space_to_granules(gap_remaining_size(self.var.first, self.fixed.last))
}
}

/// Fixed-length row portions must be at least large enough to store a `FreeCellRef`.
Expand Down Expand Up @@ -1195,6 +1202,12 @@ impl Page {
self.header.var.num_granules as usize
}

/// Returns the number of var-len granules free to store data,
/// including those in the "gap" between the fixed-len and var-len part of the page.
pub fn available_var_len_granules(&self) -> usize {
self.header.available_var_len_granules()
}

#[cfg(test)]
/// # Safety
///
Expand Down
37 changes: 27 additions & 10 deletions crates/table/src/pages.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ use super::var_len::VarLenMembers;
use core::ops::{ControlFlow, Deref, Index, IndexMut};
use spacetimedb_sats::layout::Size;
use spacetimedb_sats::memory_usage::MemoryUsage;
use std::collections::BTreeSet;
use std::ops::DerefMut;
use thiserror::Error;

Expand Down Expand Up @@ -39,8 +40,15 @@ impl IndexMut<PageIndex> for Pages {
pub struct Pages {
/// The collection of pages under management.
pages: Vec<Box<Page>>,
/// The set of pages that aren't yet full.
non_full_pages: Vec<PageIndex>,
/// The set of pages that aren't yet full.,
/// sorted by the number of var-len granules available in each page.
///
/// If multiple pages have the same number of granules available, they are then sorted by `PageIndex`.
/// This maintains a deterministic sort order,
/// so that replaying the same set of operations on multiple datastores
/// will always result in the same layout of rows in pages,
/// regardless of when those datastores were (re)started prior to or during the sequence of operations.
non_full_pages: BTreeSet<(usize, PageIndex)>,
}

impl MemoryUsage for Pages {
Expand Down Expand Up @@ -78,7 +86,14 @@ impl Pages {
page.clear();
}
// Mark every page non-full.
self.non_full_pages = (0..self.pages.len()).map(|idx| PageIndex(idx as u64)).collect();
self.non_full_pages = (0..self.pages.len())
// We could probably compute the number of available granules once and use it for all pages,
// rather than calling the method on each page,
// but we'd have to do some amount of reasoning to demonstrate it was correct
// based on the definition of `Page::clear`,
// and why bother?
.map(|idx| (self.pages[idx].available_var_len_granules(), PageIndex(idx as u64)))
Comment thread
kim marked this conversation as resolved.
.collect();
}

/// Get a reference to fixed-len row data.
Expand Down Expand Up @@ -113,14 +128,15 @@ impl Pages {

/// Mark the page at `idx` as non-full.
pub fn mark_page_non_full(&mut self, idx: PageIndex) {
self.non_full_pages.push(idx);
let available_granules = self.pages[idx.idx()].available_var_len_granules();
self.non_full_pages.insert((available_granules, idx));
}

/// If the page at `page_index` is not full,
/// add it to the non-full set so that later insertions can access it.
pub fn maybe_mark_page_non_full(&mut self, page_index: PageIndex, fixed_row_size: Size) {
if !self[page_index].is_full(fixed_row_size) {
self.non_full_pages.push(page_index);
self.mark_page_non_full(page_index);
}
}

Expand Down Expand Up @@ -151,14 +167,13 @@ impl Pages {
fixed_row_size: Size,
num_var_len_granules: usize,
) -> Result<PageIndex, Error> {
if let Some((page_idx_idx, page_idx)) = self
if let Some((page_num_free_granules, page_idx)) = self
.non_full_pages
.iter()
.range((num_var_len_granules, PageIndex(0))..)
.copied()
.enumerate()
.find(|(_, page_idx)| self[*page_idx].has_space_for_row(fixed_row_size, num_var_len_granules))
{
self.non_full_pages.swap_remove(page_idx_idx);
self.non_full_pages.remove(&(page_num_free_granules, page_idx));
Comment thread
gefjon marked this conversation as resolved.
return Ok(page_idx);
}

Expand Down Expand Up @@ -358,7 +373,9 @@ impl Pages {
self.non_full_pages = pages
.iter()
.enumerate()
.filter_map(|(idx, page)| (!page.is_full(fixed_row_size)).then_some(PageIndex(idx as _)))
.filter_map(|(idx, page)| {
(!page.is_full(fixed_row_size)).then_some((page.available_var_len_granules(), PageIndex(idx as _)))
})
.collect();
self.pages = pages;
}
Expand Down
Loading