diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/counter_map.rs b/turbopack/crates/turbo-tasks-backend/src/backend/counter_map.rs index 7a5b8c09216f..a12a17b45fbc 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/counter_map.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/counter_map.rs @@ -12,34 +12,37 @@ use bincode::{ }; use rustc_hash::FxHasher; -type InnerMap = AutoMap, 1>; +type InnerMap = AutoMap, I>; /// A map optimized for reference counting, backed by AutoMap. /// /// Entries are automatically removed when their count reaches zero. /// This provides memory-efficient storage for sparse counter data. +/// +/// The `I` const generic forwards the inline capacity to the backing `AutoMap` +/// — see the schema field-by-field sizing for the chosen values. #[derive(Debug, Clone)] -pub struct CounterMap(InnerMap); +pub struct CounterMap(InnerMap); -impl Default for CounterMap { +impl Default for CounterMap { fn default() -> Self { Self(InnerMap::default()) } } -impl PartialEq for CounterMap { +impl PartialEq for CounterMap { fn eq(&self, other: &Self) -> bool { self.0 == other.0 } } -impl Encode for CounterMap { +impl Encode for CounterMap { fn encode(&self, encoder: &mut E) -> Result<(), EncodeError> { self.0.encode(encoder) } } -impl Decode for CounterMap +impl Decode for CounterMap where K: Decode + Eq + Hash, V: Decode, @@ -80,7 +83,7 @@ impl CounterValue for i32 { } } -impl CounterMap { +impl CounterMap { pub fn new() -> Self { Self(AutoMap::default()) } @@ -138,16 +141,16 @@ impl CounterMap { } } -impl IntoIterator for CounterMap { +impl IntoIterator for CounterMap { type Item = (K, V); - type IntoIter = as IntoIterator>::IntoIter; + type IntoIter = as IntoIterator>::IntoIter; fn into_iter(self) -> Self::IntoIter { self.0.into_iter() } } -impl CounterMap { +impl CounterMap { /// Insert a key-value pair. Panics if value is zero (invariant: zero values are not stored). pub fn insert(&mut self, key: K, value: V) -> Option { debug_assert!( @@ -297,7 +300,7 @@ mod tests { #[test] fn test_update_count_new_entry() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); // Adding new entry crosses zero (from nothing to something) assert!(map.update_count(1, 5)); assert_eq!(map.get(&1), Some(&5)); @@ -305,7 +308,7 @@ mod tests { #[test] fn test_update_count_increment() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); map.update_count(1, 5); // Incrementing existing entry doesn't cross zero assert!(!map.update_count(1, 3)); @@ -314,7 +317,7 @@ mod tests { #[test] fn test_update_count_removal_on_zero() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); map.update_count(1, 5); // Subtracting to zero removes entry and crosses zero assert!(map.update_count(1, -5)); @@ -324,7 +327,7 @@ mod tests { #[test] fn test_update_count_zero_delta_on_empty() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); // Adding zero to non-existent entry doesn't create it assert!(!map.update_count(1, 0)); assert!(map.is_empty()); @@ -332,14 +335,14 @@ mod tests { #[test] fn test_update_and_get_new_entry() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); assert_eq!(map.update_and_get(1, 5), 5); assert_eq!(map.get(&1), Some(&5)); } #[test] fn test_update_and_get_increment() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); map.update_and_get(1, 5); assert_eq!(map.update_and_get(1, 3), 8); assert_eq!(map.get(&1), Some(&8)); @@ -347,7 +350,7 @@ mod tests { #[test] fn test_update_and_get_removal() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); map.update_and_get(1, 5); assert_eq!(map.update_and_get(1, -5), 0); assert!(map.is_empty()); @@ -355,7 +358,7 @@ mod tests { #[test] fn test_add_entry() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); map.add_entry(1, 10); assert_eq!(map.get(&1), Some(&10)); } @@ -363,14 +366,14 @@ mod tests { #[test] #[should_panic(expected = "Entry already exists")] fn test_add_entry_panics_on_duplicate() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); map.add_entry(1, 10); map.add_entry(1, 20); // Should panic } #[test] fn test_update_positive_crossing_new_positive() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); // From nothing to positive - crosses positive boundary assert!(map.update_positive_crossing(1, 5)); assert_eq!(map.get(&1), Some(&5)); @@ -378,7 +381,7 @@ mod tests { #[test] fn test_update_positive_crossing_new_negative() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); // From nothing to negative - doesn't cross positive boundary assert!(!map.update_positive_crossing(1, -5)); assert_eq!(map.get(&1), Some(&-5)); @@ -386,7 +389,7 @@ mod tests { #[test] fn test_update_positive_crossing_stay_positive() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); map.update_positive_crossing(1, 5); // Staying positive doesn't cross boundary assert!(!map.update_positive_crossing(1, 3)); @@ -395,7 +398,7 @@ mod tests { #[test] fn test_update_positive_crossing_to_non_positive() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); map.update_positive_crossing(1, 5); // Crossing to non-positive assert!(map.update_positive_crossing(1, -8)); @@ -404,7 +407,7 @@ mod tests { #[test] fn test_update_positive_crossing_to_zero_removes() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); map.update_positive_crossing(1, 5); // Crossing to zero removes and crosses boundary assert!(map.update_positive_crossing(1, -5)); @@ -413,14 +416,14 @@ mod tests { #[test] fn test_update_with_create() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); map.update_with(1, |_| Some(10)); assert_eq!(map.get(&1), Some(&10)); } #[test] fn test_update_with_modify() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); map.update_with(1, |_| Some(10)); map.update_with(1, |v| v.map(|x| x + 5)); assert_eq!(map.get(&1), Some(&15)); @@ -428,7 +431,7 @@ mod tests { #[test] fn test_update_with_remove() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); map.update_with(1, |_| Some(10)); map.update_with(1, |_| None); assert!(map.is_empty()); @@ -436,14 +439,14 @@ mod tests { #[test] fn test_update_with_no_op() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); map.update_with(1, |_| None); assert!(map.is_empty()); } #[test] fn test_len_and_is_empty() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); assert!(map.is_empty()); assert_eq!(map.len(), 0); @@ -457,7 +460,7 @@ mod tests { #[test] fn test_iter() { - let mut map: CounterMap = CounterMap::new(); + let mut map: CounterMap = CounterMap::new(); map.update_count(1, 5); map.update_count(2, 10); diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs index c7f547215bcc..bd19c567b092 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/mod.rs @@ -34,8 +34,8 @@ use turbo_tasks::{ TaskId, TaskPersistence, TaskPriority, TraitTypeId, TurboTasksBackendApi, TurboTasksPanic, ValueTypeId, backend::{ - Backend, CachedTaskType, CellContent, CellHash, TaskExecutionSpec, TransientTaskType, - TurboTaskContextError, TurboTaskLocalContextError, TurboTasksError, + Backend, CachedTaskType, CachedTaskTypeArc, CellContent, CellHash, TaskExecutionSpec, + TransientTaskType, TurboTaskContextError, TurboTaskLocalContextError, TurboTasksError, TurboTasksExecutionError, TurboTasksExecutionErrorMessage, TypedCellContent, VerificationMode, }, @@ -70,8 +70,8 @@ use crate::{ }, backing_storage::{BackingStorage, SnapshotItem, compute_task_type_hash}, data::{ - ActivenessState, CellRef, CollectibleRef, CollectiblesRef, Dirtyness, InProgressCellState, - InProgressState, InProgressStateInner, OutputValue, TransientTask, + ActivenessState, CellDependency, CellRef, CollectibleRef, CollectiblesRef, Dirtyness, + InProgressCellState, InProgressState, InProgressStateInner, OutputValue, TransientTask, }, error::TaskError, utils::{ @@ -785,7 +785,8 @@ impl TurboTasksBackendInner { && (!task.immutable() || cfg!(feature = "verify_immutable")) { let reader = reader.unwrap(); - let _ = task.add_cell_dependents((cell, key, reader)); + let _ = task + .add_cell_dependents(CellDependency::new(CellRef { task: reader, cell }, key)); drop(task); // Note: We use `task_pair` earlier to lock the task and its reader at the same @@ -797,8 +798,9 @@ impl TurboTasksBackendInner { task: task_id, cell, }; - if !reader_task.remove_outdated_cell_dependencies(&(target, key)) { - let _ = reader_task.add_cell_dependencies((target, key)); + let dep = CellDependency::new(target, key); + if !reader_task.remove_outdated_cell_dependencies(&dep) { + let _ = reader_task.add_cell_dependencies(dep); } drop(reader_task); } @@ -1526,7 +1528,7 @@ impl TurboTasksBackendInner { // Only now do we force the allocation. // NOTE: if our caller had to perform resolution, then this will have already // been boxed and take_box just takes it. - let task_type = Arc::new(CachedTaskType { + let task_type = CachedTaskTypeArc::new(CachedTaskType { native_fn, this, arg: arg.take_box(), @@ -1757,7 +1759,7 @@ impl TurboTasksBackendInner { } } - fn debug_get_cached_task_type(&self, task_id: TaskId) -> Option> { + fn debug_get_cached_task_type(&self, task_id: TaskId) -> Option { let task = self.storage.access_mut(task_id); task.get_persistent_task_type().cloned() } @@ -2197,7 +2199,7 @@ impl TurboTasksBackendInner { Some( // Collect all dependencies on tasks to check if all dependencies are immutable task.iter_output_dependencies() - .chain(task.iter_cell_dependencies().map(|(target, _key)| target.task)) + .chain(task.iter_cell_dependencies().map(|dep| dep.cell_ref().task)) .collect::>(), ) } else { @@ -2236,7 +2238,7 @@ impl TurboTasksBackendInner { // breaking dependency tracking. old_edges.extend( task.iter_outdated_cell_dependencies() - .map(|(target, key)| OutdatedEdge::CellDependency(target, key)), + .map(OutdatedEdge::CellDependency), ); old_edges.extend( task.iter_outdated_output_dependencies() diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs index aa544b632789..699391e0b93e 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/cleanup_old_edges.rs @@ -17,7 +17,7 @@ use crate::{ }, storage_schema::TaskStorageAccessors, }, - data::{CellRef, CollectibleRef, CollectiblesRef}, + data::{CellDependency, CellRef, CollectibleRef, CollectiblesRef}, }; #[derive(Encode, Decode, Clone)] @@ -48,7 +48,7 @@ impl Default for CleanupOldEdgesOperation { pub enum OutdatedEdge { Child(TaskId), Collectible(CollectibleRef, i32), - CellDependency(CellRef, Option), + CellDependency(CellDependency), OutputDependency(TaskId), CollectiblesDependency(CollectiblesRef), } @@ -166,27 +166,28 @@ impl CleanupOldEdgesOperation { AggregatedDataUpdate::new().collectibles_update(collectibles), )); } - OutdatedEdge::CellDependency( - CellRef { - task: cell_task_id, - cell, - }, - key, - ) => { + OutdatedEdge::CellDependency(dep) => { + let ( + CellRef { + task: cell_task_id, + cell, + }, + key, + ) = dep.into_parts(); { let mut task = ctx.task(cell_task_id, TaskDataCategory::Data); - task.remove_cell_dependents(&(cell, key, task_id)); - } - { - let mut task = ctx.task(task_id, TaskDataCategory::Data); - task.remove_cell_dependencies(&( + task.remove_cell_dependents(&CellDependency::new( CellRef { - task: cell_task_id, + task: task_id, cell, }, key, )); } + { + let mut task = ctx.task(task_id, TaskDataCategory::Data); + task.remove_cell_dependencies(&dep); + } } OutdatedEdge::OutputDependency(output_task_id) => { #[cfg(feature = "trace_task_output_dependencies")] diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs index 9f46eec53cb8..faab5a8f560d 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs @@ -19,7 +19,7 @@ use tracing::info_span; use tracing::trace_span; use turbo_tasks::{ CellId, DynTaskInputs, FxIndexMap, RawVc, SharedReference, TaskExecutionReason, TaskId, - TaskPriority, TurboTasksBackendApi, TurboTasksCallApi, backend::CachedTaskType, + TaskPriority, TurboTasksBackendApi, TurboTasksCallApi, backend::CachedTaskTypeArc, macro_helpers::NativeFunction, }; @@ -103,7 +103,8 @@ pub trait ExecuteContext<'e>: Sized { /// Uses hash-based lookup which may return multiple candidates due to hash collisions, /// then verifies each candidate by comparing the stored `persistent_task_type`. /// Returns `Some((task_id, task_type))` if a matching task is found, where `task_type` is - /// the existing `Arc` from storage (avoiding a duplicate allocation). + /// the existing `CachedTaskTypeArc` from storage (avoiding a duplicate + /// allocation). /// /// Accepts exploded components so the caller does not need to box the argument before calling. fn task_by_type( @@ -111,7 +112,7 @@ pub trait ExecuteContext<'e>: Sized { native_fn: &'static NativeFunction, this: Option, arg: &dyn DynTaskInputs, - ) -> Option<(TaskId, Arc)>; + ) -> Option<(TaskId, CachedTaskTypeArc)>; fn debug_get_task_description(&self, task_id: TaskId) -> String; } @@ -606,7 +607,7 @@ struct TaskRestoreEntry { /// Another thread claimed the meta restore; we must wait in Phase 3. wait_meta: bool, /// Task type discovered during Phase 1c data restore (used to update task cache in Phase 2). - task_type: Option>, + task_type: Option, /// This thread performed the restore for at least one category (set in Phase 1c). self_restored: bool, } @@ -985,7 +986,7 @@ impl<'e, B: BackingStorage> ExecuteContext<'e> for ExecuteContextImpl<'e, B> { native_fn: &'static NativeFunction, this: Option, arg: &dyn DynTaskInputs, - ) -> Option<(TaskId, Arc)> { + ) -> Option<(TaskId, CachedTaskTypeArc)> { if !self.backend.should_restore() { return None; } @@ -1032,14 +1033,14 @@ impl<'e, B: BackingStorage> ChildExecuteContext<'e> for ChildExecuteContextImpl< } pub enum TaskTypeRef<'l> { - Cached(&'l Arc), + Cached(&'l CachedTaskTypeArc), Transient(&'l Arc), } impl TaskTypeRef<'_> { pub fn to_owned(&self) -> TaskType { match self { - TaskTypeRef::Cached(ty) => TaskType::Cached(Arc::clone(ty)), + TaskTypeRef::Cached(ty) => TaskType::Cached((*ty).clone()), TaskTypeRef::Transient(ty) => TaskType::Transient(Arc::clone(ty)), } } @@ -1056,7 +1057,7 @@ impl Display for TaskTypeRef<'_> { #[derive(Debug)] pub enum TaskType { - Cached(Arc), + Cached(CachedTaskTypeArc), Transient(Arc), } @@ -1398,7 +1399,7 @@ impl TaskGuard for TaskGuardImpl<'_> { .map(|target| (target, TaskDataCategory::Meta)) .chain( self.iter_cell_dependencies() - .map(|(target, _key)| (target.task, TaskDataCategory::All)), + .map(|dep| (dep.cell_ref().task, TaskDataCategory::All)), ) .chain( self.iter_collectibles_dependencies() diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs index 22ba80932aec..74765a978a2a 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/operation/update_cell.rs @@ -20,7 +20,7 @@ use crate::{ }, storage_schema::TaskStorageAccessors, }, - data::CellRef, + data::{CellDependency, CellRef}, }; #[derive(Encode, Decode, Clone, Default)] @@ -137,17 +137,22 @@ impl UpdateCellOperation { let mut dependent_tasks: FxIndexMap; 2]>> = FxIndexMap::default(); if !skip_invalidation { - let tasks_with_keys = - task.iter_cell_dependents() - .filter_map(|(dependent_cell, key, task)| { - (dependent_cell == cell - && key.is_none_or(|key_hash| { - updated_key_hashes_set - .as_ref() - .is_none_or(|set| set.contains(&key_hash)) - })) - .then_some((task, key)) - }); + let tasks_with_keys = task.iter_cell_dependents().filter_map(|dep| { + let ( + CellRef { + task: dependent_task, + cell: dependent_cell, + }, + key, + ) = dep.into_parts(); + (dependent_cell == cell + && key.is_none_or(|key_hash| { + updated_key_hashes_set + .as_ref() + .is_none_or(|set| set.contains(&key_hash)) + })) + .then_some((dependent_task, key)) + }); for (task, key) in tasks_with_keys { dependent_tasks.entry(task).or_default().push(key); } @@ -276,14 +281,15 @@ impl Operation for UpdateCellOperation { let mut make_stale = false; let dependent = ctx.task(dependent_task_id, TaskDataCategory::All); for key in keys.iter().copied() { - if dependent.outdated_cell_dependencies_contains(&(cell_ref, key)) { + let dep = CellDependency::new(cell_ref, key); + if dependent.outdated_cell_dependencies_contains(&dep) { // cell dependency is outdated, so it hasn't read the cell yet // and doesn't need to be invalidated. // We do not need to make the task stale in this case. // But importantly we still need to make the task dirty as it should // no longer be considered as // "recomputation". - } else if !dependent.cell_dependencies_contains(&(cell_ref, key)) { + } else if !dependent.cell_dependencies_contains(&dep) { // cell dependency has been removed, so the task doesn't depend on // the cell anymore and doesn't need // to be invalidated diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs b/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs index 3120b8947c7c..0e5b13518e14 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/storage.rs @@ -12,7 +12,7 @@ use std::{ use thread_local::ThreadLocal; use tracing::span::Id; use turbo_bincode::TurboBincodeBuffer; -use turbo_tasks::{FxDashMap, TaskId, backend::CachedTaskType, event::Event, parallel}; +use turbo_tasks::{FxDashMap, TaskId, backend::CachedTaskTypeArc, event::Event, parallel}; use crate::{ backend::storage_schema::{ @@ -183,7 +183,7 @@ pub struct Storage { /// This is backed by the TaskCache table in the database. /// /// LockOrdering: See the comments on [map]. - pub task_cache: FxDashMap, TaskId>, + pub task_cache: FxDashMap, } impl Storage { @@ -253,7 +253,7 @@ impl Storage { /// Mark a newly allocated task as restored (skip DB queries) and new (include in persistence /// snapshots). Optionally sets the `persistent_task_type` eagerly so it's available for /// persistence snapshots without needing to propagate it through `connect_child`. - pub fn initialize_new_task(&self, task_id: TaskId, task_type: Option>) { + pub fn initialize_new_task(&self, task_id: TaskId, task_type: Option) { let mut task = self.access_mut(task_id); task.flags.set_restored(TaskDataCategory::All); task.flags.set_new_task(true); @@ -516,7 +516,7 @@ impl Storage { // was contended. We defer them until after the map shard lock is released to // avoid a lock cycle with get_or_create_persistent_task, which takes task_cache // before map. Allocated lazily on first conflict. - let mut deferred_task_cache_removals: Vec> = Vec::new(); + let mut deferred_task_cache_removals: Vec = Vec::new(); // SAFETY: We hold the write lock for the duration of iteration. for bucket in unsafe { shard.iter() } { // SAFETY: The write lock guard outlives the bucket reference. diff --git a/turbopack/crates/turbo-tasks-backend/src/backend/storage_schema.rs b/turbopack/crates/turbo-tasks-backend/src/backend/storage_schema.rs index 85db526d4d0c..288d1600092c 100644 --- a/turbopack/crates/turbo-tasks-backend/src/backend/storage_schema.rs +++ b/turbopack/crates/turbo-tasks-backend/src/backend/storage_schema.rs @@ -21,8 +21,8 @@ use std::{hash::Hash, sync::Arc}; use parking_lot::Mutex; use turbo_tasks::{ - CellId, SharedReference, TaskExecutionReason, TaskId, TraitTypeId, ValueTypeId, - backend::{CachedTaskType, CellHash, TransientTaskType}, + CellId, SharedReference, TaskExecutionReason, TaskId, TinyVec, TraitTypeId, ValueTypeId, + backend::{CachedTaskTypeArc, CellHash, TransientTaskType}, event::Event, task_storage, }; @@ -30,18 +30,26 @@ use turbo_tasks::{ use crate::{ backend::{cell_data::CellData, counter_map::CounterMap}, data::{ - ActivenessState, AggregationNumber, CellRef, CollectibleRef, CollectiblesRef, Dirtyness, - InProgressCellState, InProgressState, LeafDistance, OutputValue, RootType, TransientTask, + ActivenessState, AggregationNumber, CellDependency, CollectibleRef, CollectiblesRef, + Dirtyness, InProgressCellState, InProgressState, LeafDistance, OutputValue, RootType, + TransientTask, }, }; /// Auto-set storage for small sets of keys with unit values. -/// Optimized for small collections (< 8 items use SmallVec inline). -type AutoSet = auto_hash_map::AutoSet, 1>; +/// +/// The `I` const generic is the inline capacity — entries spill to a `HashSet` +/// past it. Each field below picks its own `I` to saturate the available +/// padding (in `LazyField`) or to stay within the 16-byte `SmallVec` free +/// zone (for inline fields); see the field comments for the rationale. +type AutoSet = + auto_hash_map::AutoSet, I>; /// Auto-map storage for key-value pairs. -type AutoMap = - auto_hash_map::AutoMap, 1>; +/// +/// See [`AutoSet`] for the meaning of `I`. +type AutoMap = + auto_hash_map::AutoMap, I>; /// The complete task storage schema. /// @@ -52,7 +60,7 @@ type AutoMap = /// - `TaskFlags` bitfield for boolean flags /// - Accessor methods and traits /// -/// Fields are stored lazily in `Vec` by default for memory efficiency. +/// Fields are stored lazily in `TinyVec` by default for memory efficiency. /// Fields with `inline` are stored directly on TaskStorage (for hot-path access). /// /// Note: This struct is consumed by the macro and does not appear in the output. @@ -80,7 +88,7 @@ struct TaskStorageSchema { filter_transient, drop_on_completion_if_immutable )] - output_dependent: AutoSet, + output_dependent: AutoSet, /// The task's output value. /// Filtered during serialization to skip transient outputs (referencing transient tasks). @@ -89,7 +97,7 @@ struct TaskStorageSchema { /// Upper nodes in the aggregation tree (reference counted). #[field(storage = "counter_map", category = "meta", inline, filter_transient)] - upper: CounterMap, + upper: CounterMap, // ========================================================================= // COLLECTIBLES (meta) @@ -101,15 +109,15 @@ struct TaskStorageSchema { filter_transient, shrink_on_completion )] - collectibles: CounterMap, + collectibles: CounterMap, /// Aggregated collectibles from the subgraph. #[field(storage = "counter_map", category = "meta", filter_transient)] - aggregated_collectibles: CounterMap, + aggregated_collectibles: CounterMap, /// Outdated collectibles to be cleaned up (transient). #[field(storage = "counter_map", category = "transient", shrink_on_completion)] - outdated_collectibles: CounterMap, + outdated_collectibles: CounterMap, // ========================================================================= // STATE FIELDS (meta) @@ -127,7 +135,7 @@ struct TaskStorageSchema { /// Individual dirty containers in the aggregated subgraph. #[field(storage = "counter_map", category = "meta", filter_transient)] - aggregated_dirty_containers: CounterMap, + aggregated_dirty_containers: CounterMap, /// Count of clean containers in current session (transient). /// Absent = 0, present = actual count. @@ -136,7 +144,7 @@ struct TaskStorageSchema { /// Individual clean containers in current session (transient). #[field(storage = "counter_map", category = "transient")] - aggregated_current_session_clean_containers: CounterMap, + aggregated_current_session_clean_containers: CounterMap, // ========================================================================= // FLAGS (meta) - Boolean flags stored in TaskFlags bitfield @@ -223,11 +231,11 @@ struct TaskStorageSchema { filter_transient, shrink_on_completion )] - children: AutoSet, + children: AutoSet, /// Follower nodes in the aggregation tree (reference counted). #[field(storage = "counter_map", category = "meta", filter_transient)] - followers: CounterMap, + followers: CounterMap, // ========================================================================= // DEPENDENCIES (data) @@ -239,7 +247,7 @@ struct TaskStorageSchema { shrink_on_completion, drop_on_completion_if_immutable )] - output_dependencies: AutoSet, + output_dependencies: AutoSet, /// Cells this task depends on. #[field( @@ -249,7 +257,7 @@ struct TaskStorageSchema { shrink_on_completion, drop_on_completion_if_immutable )] - cell_dependencies: AutoSet<(CellRef, Option)>, + cell_dependencies: AutoSet, /// Collectibles this task depends on. #[field( @@ -259,19 +267,19 @@ struct TaskStorageSchema { shrink_on_completion, drop_on_completion_if_immutable )] - collectibles_dependencies: AutoSet, + collectibles_dependencies: AutoSet, /// Outdated output dependencies to be cleaned up (transient). #[field(storage = "auto_set", category = "transient", shrink_on_completion)] - outdated_output_dependencies: AutoSet, + outdated_output_dependencies: AutoSet, /// Outdated cell dependencies to be cleaned up (transient). #[field(storage = "auto_set", category = "transient", shrink_on_completion)] - outdated_cell_dependencies: AutoSet<(CellRef, Option)>, + outdated_cell_dependencies: AutoSet, /// Outdated collectibles dependencies to be cleaned up (transient). #[field(storage = "auto_set", category = "transient", shrink_on_completion)] - outdated_collectibles_dependencies: AutoSet, + outdated_collectibles_dependencies: AutoSet, // ========================================================================= // DEPENDENTS - Tasks that depend on this task's cells @@ -282,20 +290,20 @@ struct TaskStorageSchema { filter_transient, drop_on_completion_if_immutable )] - cell_dependents: AutoSet<(CellId, Option, TaskId)>, + cell_dependents: AutoSet, /// Tasks that depend on collectibles of a specific type from this task. /// Maps TraitTypeId -> Set #[field(storage = "auto_set", category = "meta", filter_transient)] - collectibles_dependents: AutoSet<(TraitTypeId, TaskId)>, + collectibles_dependents: AutoSet<(TraitTypeId, TaskId), 3>, #[field( storage = "auto_map", category = "data", shrink_on_completion, custom_drop_partial, - as_type = "AutoMap" + as_type = "AutoMap" )] cell_data: CellData, @@ -307,11 +315,11 @@ struct TaskStorageSchema { /// enum; a bare `u128` would grow the enum from 56 to 64 bytes due to its 16-byte /// alignment requirement. #[field(storage = "auto_map", category = "data", shrink_on_completion)] - cell_data_hash: AutoMap, + cell_data_hash: AutoMap, /// Maximum cell index per cell type. #[field(storage = "auto_map", category = "data", shrink_on_completion)] - cell_type_max_index: AutoMap, + cell_type_max_index: AutoMap, // ========================================================================= // TRANSIENT EXECUTION STATE (transient) @@ -326,10 +334,10 @@ struct TaskStorageSchema { /// In-progress cell state for cells being computed (transient). #[field(storage = "auto_map", category = "transient", shrink_on_completion)] - in_progress_cells: AutoMap, + in_progress_cells: AutoMap, #[field(storage = "direct", category = "data", inline)] - pub persistent_task_type: Option>, + pub persistent_task_type: Option, #[field(storage = "direct", category = "transient")] pub transient_task_type: Arc, @@ -522,7 +530,7 @@ impl TaskStorage { None => KeyEvictability::Unevictable, // strong_count == 1: only this TaskStorage holds this Arc, so no task_cache entry // references it. It must have been already evicted on a prior cycle. - Some(arc) if Arc::strong_count(arc) == 1 => KeyEvictability::AlreadyEvicted, + Some(arc) if arc.count() == 1 => KeyEvictability::AlreadyEvicted, Some(_) => KeyEvictability::Evictable, } }; @@ -844,14 +852,9 @@ impl IsTransient for (TraitTypeId, TaskId) { self.1.is_transient() } } -impl IsTransient for (CellId, Option, TaskId) { - fn is_transient(&self) -> bool { - self.2.is_transient() - } -} -impl IsTransient for (CellRef, Option) { +impl IsTransient for CellDependency { fn is_transient(&self) -> bool { - self.0.task.is_transient() + CellDependency::is_transient(self) } } @@ -863,7 +866,7 @@ pub(crate) trait MergeRestore { fn merge_restore(&mut self, items: impl IntoIterator); } -impl MergeRestore for CounterMap +impl MergeRestore for CounterMap where K: Eq + Hash, { @@ -872,7 +875,7 @@ where self.extend(items) } } -impl MergeRestore for AutoSet +impl MergeRestore for AutoSet where V: Eq + Hash, { @@ -915,7 +918,7 @@ impl DropPartial for Option { } } -impl DropPartial for AutoSet { +impl DropPartial for AutoSet { fn drop_partial(&mut self) -> DropPartialOutcome { self.retain(|t| t.is_transient()); if self.is_empty() { @@ -927,7 +930,7 @@ impl DropPartial for AutoSet { } } -impl DropPartial for CounterMap { +impl DropPartial for CounterMap { fn drop_partial(&mut self) -> DropPartialOutcome { self.retain(|k, _v| k.is_transient()); if self.is_empty() { @@ -938,7 +941,7 @@ impl DropPartial for CounterMap { } } } -impl DropPartial for AutoMap { +impl DropPartial for AutoMap { fn drop_partial(&mut self) -> DropPartialOutcome { self.retain(|k, v| k.is_transient() || v.is_transient()); if self.is_empty() { @@ -956,7 +959,7 @@ mod tests { use turbo_tasks::{CellId, TaskId}; use super::*; - use crate::data::{AggregationNumber, CellRef, Dirtyness, OutputValue}; + use crate::data::{AggregationNumber, CellDependency, CellRef, Dirtyness, OutputValue}; #[test] fn test_accessors() { @@ -1242,16 +1245,15 @@ mod tests { original .output_dependencies_mut() .insert(TaskId::new(200).unwrap()); - original.cell_dependencies_mut().insert(( - CellRef { + original + .cell_dependencies_mut() + .insert(CellDependency::All(CellRef { task: TaskId::new(1).unwrap(), cell: CellId { type_id: unsafe { turbo_tasks::ValueTypeId::new_unchecked(1) }, index: 0, }, - }, - None, - )); + })); // Set lazy data transient field (should NOT be serialized) original @@ -1390,16 +1392,15 @@ mod tests { storage.output_dependent_mut().insert(transient_task(3)); // Lazy filter_transient data field. - storage.cell_dependencies_mut().insert(( - CellRef { + storage + .cell_dependencies_mut() + .insert(CellDependency::All(CellRef { task: persistent_task(10), cell: CellId { type_id: unsafe { turbo_tasks::ValueTypeId::new_unchecked(1) }, index: 0, }, - }, - None, - )); + })); // Mark as restored so the task is eligible for dropping. storage.flags.set_data_restored(true); @@ -1705,13 +1706,14 @@ mod tests { fn test_schema_size() { assert_eq!( size_of::(), - 136, - "TaskStorage size changed! If this is intentional, update this test." + 128, + "TaskStorage size changed! Run print_schema_sizes and update this test." ); + // `LazyField` is 48 B = 40 B largest payload + 8 B discriminant. assert_eq!( size_of::(), - 56, - "LazyField size changed! If this is intentional, update this test." + 48, + "LazyField size changed! Run print_schema_sizes and update this test." ); } } diff --git a/turbopack/crates/turbo-tasks-backend/src/data.rs b/turbopack/crates/turbo-tasks-backend/src/data.rs index 0332e0e559d5..334f87ab0cf8 100644 --- a/turbopack/crates/turbo-tasks-backend/src/data.rs +++ b/turbopack/crates/turbo-tasks-backend/src/data.rs @@ -77,6 +77,60 @@ impl CollectiblesRef { } } +/// An edge between a [`CellRef`] and a task, optionally narrowed by a hashed sub-key. +/// +/// Used both as a forward and reverse edge: +/// - In `cell_dependencies`, the [`CellRef`] is the cell another task owns that this task depends +/// on. +/// - In `cell_dependents`, the [`CellRef`]'s `task` is the dependent task and `cell` is the cell of +/// the storing task; the `task` field is reused as the dependent's id rather than the cell's +/// owning task. The fields encode the same bits either way. +#[derive(Debug, Copy, Clone, Hash, PartialEq, Eq, Encode, Decode)] +pub enum CellDependency { + /// Depend on the cell as a whole. + All(CellRef), + /// Depend only on the sub-value identified by this hash key. + Hash(CellRef, u64), +} + +impl CellDependency { + pub fn cell_ref(&self) -> CellRef { + match *self { + CellDependency::All(c) | CellDependency::Hash(c, _) => c, + } + } + + pub fn key(&self) -> Option { + match *self { + CellDependency::All(_) => None, + CellDependency::Hash(_, k) => Some(k), + } + } + + /// Decompose into the underlying `(CellRef, Option)` in a single match. + /// + /// Prefer this over back-to-back `cell_ref()` + `key()` calls — the discriminant is + /// checked once instead of twice, which is meaningful in hot loops over + /// `iter_cell_dependents` / `iter_cell_dependencies`. + pub fn into_parts(self) -> (CellRef, Option) { + match self { + CellDependency::All(c) => (c, None), + CellDependency::Hash(c, k) => (c, Some(k)), + } + } + + pub fn new(cell_ref: CellRef, key: Option) -> Self { + match key { + None => CellDependency::All(cell_ref), + Some(k) => CellDependency::Hash(cell_ref, k), + } + } + + pub fn is_transient(&self) -> bool { + self.cell_ref().is_transient() + } +} + #[derive(Debug, Clone, PartialEq, Eq, Encode, Decode)] pub enum OutputValue { Cell(CellRef), diff --git a/turbopack/crates/turbo-tasks-macros/src/derive/task_storage_macro.rs b/turbopack/crates/turbo-tasks-macros/src/derive/task_storage_macro.rs index 799ed85c7627..27268b60f830 100644 --- a/turbopack/crates/turbo-tasks-macros/src/derive/task_storage_macro.rs +++ b/turbopack/crates/turbo-tasks-macros/src/derive/task_storage_macro.rs @@ -1296,12 +1296,21 @@ fn generate_typed_storage_struct(grouped_fields: &GroupedFields) -> TokenStream quote! {} }; - // Add lazy vec field if needed (pub(crate) - used by helper methods) - // Note: Serialization is handled manually via encode_data/encode_meta methods let lazy_field = if has_lazy { + // `TinyVec`'s `MAX` const generic is set to the exact number of lazy fields declared + // in the schema. This caps growth at the smallest power-of-two-or-MAX boundary + // (e.g. with 24 variants we end at cap=24 instead of cap=32), saving a few slots + // per fully-populated task. It also makes "push past MAX" a compile-time-bounded + // contract instead of relying on `u8::MAX`. + // + // `as u8` cast is safe at the macro layer: u8::MAX is plenty of room for any + // realistic schema (asserted at compile time by `TinyVec::new`'s `MAX > 0` guard + // — a runtime check is not strictly required because the macro itself wouldn't + // emit > 255 variants). + let max_lazy = grouped_fields.all_lazy().count() as u8; quote! { - #[doc = "Lazily-allocated fields stored in a single Vec for memory efficiency"] - lazy: Vec, + #[doc = "Lazily-allocated fields stored in a compact TinyVec for memory efficiency"] + lazy: TinyVec, } } else { quote! {} @@ -3686,10 +3695,10 @@ fn generate_snapshot_restore_methods(grouped_fields: &GroupedFields) -> TokenStr #clone_all_flags - // Pre-allocate lazy vec (upper bound - some may be transient and skipped) - snapshot.lazy.reserve(self.lazy.len()); - - // Clone all persistent lazy fields (both meta and data) + // Clone all persistent lazy fields (both meta and data). + // (No pre-`reserve`: the schema has ≤24 lazy fields, so at most 3 grows + // (0→4→8→16→24) total — cheaper than complicating the public API surface + // of `TinyVec`.) for field in &self.lazy { match field { #(#clone_data_lazy_arms)* @@ -3746,7 +3755,7 @@ fn generate_snapshot_restore_methods(grouped_fields: &GroupedFields) -> TokenStr // and merge each source variant in O(1). let (any_meta, _any_data, index) = Self::build_lazy_index(&self.lazy); if !any_meta { - self.lazy.extend(source.lazy); + self.lazy.extend_exact(source.lazy); } else { for field in source.lazy { debug_assert!(field.is_persistent() && field.is_meta()); @@ -3770,7 +3779,7 @@ fn generate_snapshot_restore_methods(grouped_fields: &GroupedFields) -> TokenStr // in `self.lazy` is never a collision risk. let (_any_meta, any_data, index) = Self::build_lazy_index(&self.lazy); if !any_data { - self.lazy.extend(source.lazy); + self.lazy.extend_exact(source.lazy); } else { for field in source.lazy { debug_assert!(field.is_persistent() && field.is_data()); diff --git a/turbopack/crates/turbo-tasks/Cargo.toml b/turbopack/crates/turbo-tasks/Cargo.toml index 6f1733067135..619576daf694 100644 --- a/turbopack/crates/turbo-tasks/Cargo.toml +++ b/turbopack/crates/turbo-tasks/Cargo.toml @@ -65,3 +65,7 @@ criterion = { workspace = true, features = ["async_tokio"] } [[bench]] name = "mod" harness = false + +[[bench]] +name = "tiny_vec" +harness = false diff --git a/turbopack/crates/turbo-tasks/benches/tiny_vec.rs b/turbopack/crates/turbo-tasks/benches/tiny_vec.rs new file mode 100644 index 000000000000..02124e141168 --- /dev/null +++ b/turbopack/crates/turbo-tasks/benches/tiny_vec.rs @@ -0,0 +1,122 @@ +//! Direct comparison between `TinyVec` and the standard `Vec` on the +//! operations that `TaskStorage::lazy` actually exercises: +//! +//! * `push` — appending one element at a time, growing through capacity boundaries. +//! * `iter` — linear scan (this is how `find_lazy(id)` works under the hood). +//! +//! These are micro-benchmarks: the values pushed are small `(u8, u64)` pairs to +//! mimic `LazyField`'s ~48 B size without dragging in the entire schema. The +//! goal is to validate that switching `lazy` from `Vec` to `TinyVec` doesn't +//! cost throughput at the API level, since `Vec::push` is heavily optimized +//! and our hand-rolled `TinyVec::push` is not. + +use std::hint::black_box; + +use criterion::{BenchmarkId, Criterion, criterion_group, criterion_main}; +use turbo_tasks::TinyVec; + +#[global_allocator] +static ALLOC: turbo_tasks_malloc::TurboMalloc = turbo_tasks_malloc::TurboMalloc; + +/// A stand-in for `LazyField`: 16 B payload so the per-element work mirrors the +/// real storage layout (without needing the whole schema). +#[derive(Clone, Copy)] +#[allow(dead_code)] +struct Item { + tag: u64, + payload: u64, +} + +fn make_item(i: u64) -> Item { + Item { + tag: i, + payload: i.wrapping_mul(0x9E37_79B9_7F4A_7C15), + } +} + +/// Push N items into a fresh `Vec`. Returns the populated container so the +/// allocator drop cost is included in the measurement. +fn push_vec(n: usize) -> Vec { + let mut v: Vec = Vec::new(); + for i in 0..n { + v.push(make_item(i as u64)); + } + v +} + +fn push_tinyvec(n: usize) -> TinyVec { + let mut v: TinyVec = TinyVec::default(); + for i in 0..n { + v.push(make_item(i as u64)); + } + v +} + +/// Sum all items via iter — the linear scan pattern. +#[allow(clippy::ptr_arg)] // for clarity +fn iter_vec(v: &Vec) -> u64 { + let mut acc: u64 = 0; + for it in v.iter() { + acc = acc.wrapping_add(it.tag).wrapping_add(it.payload); + } + acc +} + +fn iter_tinyvec(v: &TinyVec) -> u64 { + let mut acc: u64 = 0; + for it in v.iter() { + acc = acc.wrapping_add(it.tag).wrapping_add(it.payload); + } + acc +} + +pub fn bench(c: &mut Criterion) { + // Sizes chosen to cover the realistic `TaskStorage::lazy` range: + // 0 — empty (steady state for many tasks) + // 1 — single field set (very common) + // 4 — Vec's first grow boundary (1 -> 2 -> 4 -> 8 ...) + // 8 — past the first few grows, full cache line worth of items + // 16 — fits in our `u8` cap with headroom + // 24 — close to the realistic max (~25 lazy fields in the schema) + let sizes = [0usize, 1, 4, 8, 16, 24]; + + // --- push ----------------------------------------------------------------- + + let mut group = c.benchmark_group("tiny_vec/push"); + group.sample_size(200); + for &n in &sizes { + group.bench_with_input(BenchmarkId::new("Vec", n), &n, |b, &n| { + b.iter(|| { + let v = push_vec(black_box(n)); + black_box(v); + }); + }); + group.bench_with_input(BenchmarkId::new("TinyVec", n), &n, |b, &n| { + b.iter(|| { + let v = push_tinyvec(black_box(n)); + black_box(v); + }); + }); + } + group.finish(); + + // --- iter ----------------------------------------------------------------- + + let mut group = c.benchmark_group("tiny_vec/iter"); + group.sample_size(200); + for &n in &sizes { + // Pre-fill once outside the timed region. + let v: Vec = push_vec(n); + let tv: TinyVec = push_tinyvec(n); + group.bench_with_input(BenchmarkId::new("Vec", n), &n, |b, _| { + b.iter(|| black_box(iter_vec(black_box(&v)))); + }); + group.bench_with_input(BenchmarkId::new("TinyVec", n), &n, |b, _| { + b.iter(|| black_box(iter_tinyvec(black_box(&tv)))); + }); + } + group.finish(); +} + +criterion_group!(tiny_vec_benches, bench); +criterion_main!(tiny_vec_benches); diff --git a/turbopack/crates/turbo-tasks/src/backend.rs b/turbopack/crates/turbo-tasks/src/backend.rs index d9a1e12325c8..88e1cc8d4180 100644 --- a/turbopack/crates/turbo-tasks/src/backend.rs +++ b/turbopack/crates/turbo-tasks/src/backend.rs @@ -1,9 +1,10 @@ use std::{ - borrow::Cow, + borrow::{Borrow, Cow}, error::Error, fmt::{self, Debug, Display}, future::Future, hash::{BuildHasher, BuildHasherDefault, Hash}, + ops::Deref, pin::Pin, sync::Arc, }; @@ -131,6 +132,76 @@ impl_encode_for_turbo_bincode_encode!(CachedTaskType); impl_decode_for_turbo_bincode_decode!(CachedTaskType); impl_borrow_decode!(CachedTaskType); +/// A reference-counted pointer to a [`CachedTaskType`] using `triomphe::Arc`. +/// +/// `triomphe::Arc` saves one `usize` per allocation (no weak count) and avoids the weak-count +/// CAS in `drop_slow` compared to `std::sync::Arc`. We never need `Weak`, so +/// the trade-off is favorable. +#[derive(Clone, Debug, Hash, PartialEq, Eq)] +pub struct CachedTaskTypeArc(pub triomphe::Arc); + +impl CachedTaskTypeArc { + pub fn new(value: CachedTaskType) -> Self { + Self(triomphe::Arc::new(value)) + } + + pub fn count(&self) -> usize { + triomphe::Arc::count(&self.0) + } +} + +impl AsRef for CachedTaskTypeArc { + fn as_ref(&self) -> &CachedTaskType { + &self.0 + } +} + +impl Deref for CachedTaskTypeArc { + type Target = CachedTaskType; + #[inline] + fn deref(&self) -> &CachedTaskType { + &self.0 + } +} + +impl Borrow for CachedTaskTypeArc { + #[inline] + fn borrow(&self) -> &CachedTaskType { + &self.0 + } +} + +impl Display for CachedTaskTypeArc { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + Display::fmt(&**self, f) + } +} + +impl Encode for CachedTaskTypeArc { + fn encode(&self, encoder: &mut E) -> Result<(), EncodeError> { + ::encode(self, encoder) + } +} + +impl Decode for CachedTaskTypeArc { + fn decode>(decoder: &mut D) -> Result { + Ok(Self::new(>::decode( + decoder, + )?)) + } +} + +impl<'de, Context> bincode::BorrowDecode<'de, Context> for CachedTaskTypeArc { + fn borrow_decode>( + decoder: &mut D, + ) -> Result { + Ok(Self::new(>::borrow_decode(decoder)?)) + } +} + // Manual implementation is needed because of a borrow issue with `Box`: // https://github.com/rust-lang/rust/issues/31740 impl PartialEq for CachedTaskType { diff --git a/turbopack/crates/turbo-tasks/src/lib.rs b/turbopack/crates/turbo-tasks/src/lib.rs index a84f4c038924..4bbef82bef5d 100644 --- a/turbopack/crates/turbo-tasks/src/lib.rs +++ b/turbopack/crates/turbo-tasks/src/lib.rs @@ -55,6 +55,7 @@ mod state; pub mod task; mod task_execution_reason; pub mod task_statistics; +mod tiny_vec; pub mod trace; mod trait_ref; mod triomphe_utils; @@ -108,6 +109,7 @@ pub use crate::{ task_input::{EitherTaskInput, TaskInput}, }, task_execution_reason::TaskExecutionReason, + tiny_vec::TinyVec, trait_ref::TraitRef, value::{TransientInstance, TransientValue}, value_type::{Evictability, TraitMethod, TraitType, ValueType, ValueTypePersistence}, diff --git a/turbopack/crates/turbo-tasks/src/tiny_vec.rs b/turbopack/crates/turbo-tasks/src/tiny_vec.rs new file mode 100644 index 000000000000..7f30bc7ade8a --- /dev/null +++ b/turbopack/crates/turbo-tasks/src/tiny_vec.rs @@ -0,0 +1,671 @@ +//! A `Vec`-shaped container with `u8` length and capacity, sized 16 B on 64-bit instead of 24 B. +//! +//! Used by `#[task_storage]` for `TaskStorage`'s lazy-fields field, which holds at most ~25 +//! elements (one per declared lazy field in the schema). With several million task storages +//! live during a typical Next.js build, the 8 B saved per task adds up to dozens of MB of +//! resident memory. +//! +//! The API is intentionally a strict subset of `Vec` covering only what the task-storage +//! callers and the `#[task_storage]` macro emit need: `len`, `iter`, `iter_mut`, `push`, +//! `swap_remove`, `last_mut`, `index`, `index_mut`, `extend`, `reserve`, `retain_mut`, +//! `Default`, `Debug`, `ShrinkToFit`. No `Clone` or `PartialEq` — `TaskStorage` doesn't +//! derive them. +//! +//! ## Capacity +//! +//! `TinyVec` is statically capped at `MAX <= 255` elements. Pushing past `MAX` +//! panics. Growth doubles until it would exceed `MAX`, then caps at exactly `MAX`. The +//! default `MAX = 255` covers any container that fits the type's `u8` cap. +//! +//! For `TaskStorage::lazy` the schema emits `TinyVec`, which tightens the +//! steady-state allocation: a fully-populated lazy vec ends at cap=25 instead of cap=32 +//! (the next power of two), saving 7 slots × `size_of::()` ≈ 336 B per such +//! task. + +use std::{ + alloc::{Layout, alloc, dealloc, handle_alloc_error}, + fmt, + marker::PhantomData, + mem::ManuallyDrop, + ptr::{self, NonNull}, +}; + +/// Compact `Vec`-shaped container with a statically-bounded capacity; see module docs for +/// rationale. `MAX` defaults to `u8::MAX = 255` (the largest value the `u8` cap field can hold). +pub struct TinyVec { + /// Heap pointer. Dangling (uninitialized) when `cap == 0`. + ptr: NonNull, + len: u8, + cap: u8, + /// Marker so we own `T` for drop-check purposes (matches `Vec`'s variance/dropck). + _marker: PhantomData, +} + +// SAFETY: same as `Vec` — we own a heap allocation of `T`s, and the only shared state is via +// the `ptr` which is unique to this `TinyVec`. +unsafe impl Send for TinyVec {} +unsafe impl Sync for TinyVec {} + +impl Default for TinyVec { + fn default() -> Self { + Self::new() + } +} + +impl TinyVec { + // Compile-time assertion that `MAX > 0`. Referenced inside `new()` so it gets evaluated + // at monomorphization time; the panic message becomes a compile error for any + // `TinyVec` instantiation rather than a runtime panic on the first call. + const _ASSERT_MAX_NONZERO: () = assert!(MAX > 0, "TinyVec MAX must be > 0"); + + const fn new() -> Self { + // Force evaluation of the static assertion at this generic's monomorphization. + // The `let` binding to `()` keeps the const visited; clippy's `let_unit_value` lint + // is allowed here because that's intentional. + #[allow(clippy::let_unit_value)] + let _: () = Self::_ASSERT_MAX_NONZERO; + Self { + ptr: NonNull::dangling(), + len: 0, + cap: 0, + _marker: PhantomData, + } + } + + /// Retains only the elements for which the predicate returns `true`. See + /// [`Vec::retain_mut`] for semantics including panic safety. + /// + /// Delegates to `Vec::retain_mut`. Implementing retain_mut directly requires a + /// panic-safe partial-shift dance that's the trickiest unsafe code in this module; the + /// `Vec` version is identical in shape but has been hand-tested in the standard + /// library. Round-tripping through `Vec` for this one operation is worth the soundness + /// improvement, especially since `retain_mut` is cold relative to `push`. + pub fn retain_mut(&mut self, f: impl FnMut(&mut T) -> bool) { + if self.len == 0 { + return; + } + + // Panic safety: transfer buffer ownership to the local `Vec` *before* the closure + // can panic. Zeroing `cap` first means our `Drop` becomes a no-op until we restore + // it below — if `f` panics, `vec`'s Drop frees the buffer exactly once and our + // Drop (which may run during continued unwinding) does nothing. + let ptr = self.ptr.as_ptr(); + let len = self.len as usize; + let cap = self.cap as usize; + self.cap = 0; + self.len = 0; + + // SAFETY: by struct invariant, `(ptr, len, cap)` is a valid `Vec::from_raw_parts` + // triple. + let mut vec = unsafe { Vec::from_raw_parts(ptr, len, cap) }; + vec.retain_mut(f); + + // No panic. Take ownership of the (possibly element-dropped) buffer back. + // `retain_mut` never grows, so `new_cap == cap`. + let (new_ptr, new_len, new_cap) = vec.into_raw_parts(); + debug_assert_eq!(new_cap, cap); + // SAFETY: `Vec::into_raw_parts` returns a non-null pointer; same buffer as on entry. + self.ptr = unsafe { NonNull::new_unchecked(new_ptr) }; + self.len = new_len as u8; + self.cap = new_cap as u8; + } + + #[inline] + pub fn len(&self) -> usize { + self.len as usize + } + + /// Pair to [`len`] (kept inherent so clippy's `len_without_is_empty` lint is satisfied; + /// it's also reachable through `Deref<[T]>::is_empty`). + #[inline] + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + // `capacity` is exposed only to tests; external callers don't need it. + #[cfg(test)] + #[inline] + fn capacity(&self) -> usize { + self.cap as usize + } + + // `iter`, `iter_mut`, `last_mut`, indexing, and `.is_empty()` slice-style usage are + // reachable through `Deref`/`DerefMut` to `[T]`. No need for inherent methods. + + #[inline] + fn as_slice(&self) -> &[T] { + // SAFETY: ptr is valid for `len` initialized elements; if len == 0, slicing the + // dangling pointer is allowed by `from_raw_parts`. + unsafe { std::slice::from_raw_parts(self.ptr.as_ptr(), self.len()) } + } + + #[inline] + fn as_mut_slice(&mut self) -> &mut [T] { + // SAFETY: same as `as_slice`; we hold `&mut self`. + unsafe { std::slice::from_raw_parts_mut(self.ptr.as_ptr(), self.len()) } + } + + /// Appends `value`. Panics if `len == MAX`. + pub fn push(&mut self, value: T) { + if self.len == self.cap { + // grow_by_one asserts inside realloc_to when new_cap > MAX. The check below + // happens before the cold-path call so we panic with a clearer message when the + // container is already saturated. + assert!( + (self.len as usize) < MAX as usize, + "TinyVec capacity overflow: already at MAX = {MAX}", + ); + self.grow_by_one(); + } + // SAFETY: `len < cap` after the grow; the slot at index `len` is uninitialized and we + // initialize it here. + unsafe { + ptr::write(self.ptr.as_ptr().add(self.len()), value); + } + self.len += 1; + } + + /// Removes the element at `idx` by swapping it with the last and popping. O(1). + /// Panics if `idx` is out of bounds (matching `Vec::swap_remove`). + pub fn swap_remove(&mut self, idx: usize) -> T { + let len = self.len(); + assert!(idx < len, "swap_remove index out of bounds: {idx} >= {len}"); + // SAFETY: `idx < len`; we read out the value and then either swap or shrink. + unsafe { + let last = self.ptr.as_ptr().add(len - 1); + let hole = self.ptr.as_ptr().add(idx); + let value = ptr::read(hole); + if idx != len - 1 { + ptr::copy_nonoverlapping(last, hole, 1); + } + self.len -= 1; + value + } + } + + /// Reserves capacity for at least `additional` more elements. No-op if already sufficient. + /// Panics if the resulting capacity would exceed `MAX`. + /// + /// Private: used by `extend_exact` internally; no external callers. + fn reserve(&mut self, additional: usize) { + let needed = self.len() + additional; + if needed <= self.cap as usize { + return; + } + // Round up to next power of two (min 4), but never exceed MAX. + let target = needed.next_power_of_two().max(4).min(MAX as usize); + self.realloc_to(target); + } + + /// Grow the buffer by at least one slot. The first allocation jumps to 4 to amortize the + /// initial pushes; subsequent growths double, capped at `MAX`. + #[cold] + #[inline(never)] + fn grow_by_one(&mut self) { + let doubled = if self.cap == 0 { + 4 + } else { + (self.cap as usize) * 2 + }; + let new_cap = doubled.min(MAX as usize); + self.realloc_to(new_cap); + } + + fn realloc_to(&mut self, new_cap: usize) { + assert!( + new_cap <= MAX as usize, + "TinyVec capacity overflow: requested {new_cap}, max {MAX}", + ); + if new_cap == self.cap as usize { + return; + } + if size_of::() == 0 { + // Zero-sized types: no allocation needed; just bump cap. + self.cap = new_cap as u8; + return; + } + + // Allocate new buffer. + let new_layout = Layout::array::(new_cap).expect("TinyVec layout overflow"); + // SAFETY: Layout has nonzero size because new_cap > 0 (or we'd not be here) and T is + // nonzero-sized (handled above). + let new_ptr = unsafe { alloc(new_layout) } as *mut T; + let new_ptr = match NonNull::new(new_ptr) { + Some(p) => p, + None => handle_alloc_error(new_layout), + }; + + // Move elements over. + if self.cap > 0 { + // SAFETY: old buffer holds `len` initialized Ts; copy them to the new buffer's + // prefix (which is uninitialized). + unsafe { + ptr::copy_nonoverlapping(self.ptr.as_ptr(), new_ptr.as_ptr(), self.len()); + } + self.deallocate_old(); + } + + self.ptr = new_ptr; + self.cap = new_cap as u8; + } + + /// Deallocates the current heap buffer without dropping the elements (caller must have + /// already moved or dropped them). No-op if `cap == 0`. + /// + /// `#[inline]` so the `cap == 0` early return collapses at the `Drop` call site for + /// empty containers — saves a function call on what is otherwise a one-instruction path. + #[inline] + fn deallocate_old(&mut self) { + if self.cap == 0 || size_of::() == 0 { + return; + } + let old_layout = + Layout::array::(self.cap as usize).expect("TinyVec layout was valid when allocated"); + // SAFETY: ptr came from `alloc` with this layout in `realloc_to`. + unsafe { + dealloc(self.ptr.as_ptr() as *mut u8, old_layout); + } + } + + /// Shrinks the heap buffer to fit `len`, freeing it entirely if `len == 0`. + pub fn shrink_to_fit(&mut self) { + if (self.len as usize) == (self.cap as usize) { + return; + } + if self.len == 0 { + // Free the buffer entirely. + self.deallocate_old(); + self.ptr = NonNull::dangling(); + self.cap = 0; + return; + } + let new_cap = self.len as usize; + // Allocate a smaller buffer, copy, free old. + let new_layout = Layout::array::(new_cap).expect("TinyVec layout overflow"); + // SAFETY: layout is nonzero (new_cap > 0, T is nonzero-sized — ZST early-returned via the + // len == cap check above since cap = 0 for ZSTs would also trigger the equal branch). + let new_ptr = unsafe { alloc(new_layout) } as *mut T; + let new_ptr = match NonNull::new(new_ptr) { + Some(p) => p, + None => handle_alloc_error(new_layout), + }; + // SAFETY: old buffer holds `len` initialized Ts. + unsafe { + ptr::copy_nonoverlapping(self.ptr.as_ptr(), new_ptr.as_ptr(), self.len()); + } + self.deallocate_old(); + self.ptr = new_ptr; + self.cap = new_cap as u8; + } +} + +// `Index` / `IndexMut` are reachable through `Deref` — +// `[T]: Index` and autoderef makes `tv[i]` work. No need to implement them here. + +impl std::ops::Deref for TinyVec { + type Target = [T]; + fn deref(&self) -> &[T] { + self.as_slice() + } +} + +impl std::ops::DerefMut for TinyVec { + fn deref_mut(&mut self) -> &mut [T] { + self.as_mut_slice() + } +} + +impl TinyVec { + /// Extend from an exact-sized iterator: reserves exactly once before the loop, + /// avoiding the `size_hint().0` lower-bound dance. + /// + /// All in-tree callers feed exact-sized iterators (typically `Vec::IntoIter` from + /// `TinyVec::into_iter`), so we expose this as the preferred API. The `Extend` trait + /// impl below stays for compatibility with generic code. + pub fn extend_exact(&mut self, iter: I) + where + I: IntoIterator, + I::IntoIter: ExactSizeIterator, + { + let iter = iter.into_iter(); + self.reserve(iter.len()); + for item in iter { + self.push(item); + } + } +} + +impl IntoIterator for TinyVec { + type Item = T; + type IntoIter = std::vec::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + // Delegate to `Vec::IntoIter` rather than maintaining our own. ManuallyDrop the + // self so its Drop doesn't fire — the reconstructed Vec now owns the buffer. + let me = ManuallyDrop::new(self); + // SAFETY: by struct invariant, `(self.ptr, self.len, self.cap)` is a valid + // `Vec::from_raw_parts` triple. + unsafe { Vec::from_raw_parts(me.ptr.as_ptr(), me.len as usize, me.cap as usize) } + .into_iter() + } +} + +// `for x in &tv` and `for x in &mut tv` require `&TinyVec` / `&mut TinyVec` to implement +// `IntoIterator`. The `for` loop's desugaring doesn't apply `Deref` coercion across the +// reference boundary, so we need these explicit impls. They're trivial — just dispatch to +// the slice iterators reached through `Deref`. +impl<'a, T, const MAX: u8> IntoIterator for &'a TinyVec { + type Item = &'a T; + type IntoIter = std::slice::Iter<'a, T>; + fn into_iter(self) -> std::slice::Iter<'a, T> { + self.as_slice().iter() + } +} + +impl<'a, T, const MAX: u8> IntoIterator for &'a mut TinyVec { + type Item = &'a mut T; + type IntoIter = std::slice::IterMut<'a, T>; + fn into_iter(self) -> std::slice::IterMut<'a, T> { + self.as_mut_slice().iter_mut() + } +} + +impl fmt::Debug for TinyVec { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_list().entries(self.iter()).finish() + } +} + +impl Drop for TinyVec { + #[inline] + fn drop(&mut self) { + // Fast path for empty containers: skip both the drop_in_place and deallocate calls. + // Hot because `TinyVec::default()` followed by immediate drop is a common idiom in + // benchmarks and in the steady-state of tasks that never allocate anything lazy. + if self.cap == 0 { + return; + } + // Drop populated elements in place. + if self.len > 0 { + // SAFETY: we own `len` initialized elements at the start of the buffer. + unsafe { + ptr::drop_in_place(std::ptr::slice_from_raw_parts_mut( + self.ptr.as_ptr(), + self.len(), + )); + } + } + self.deallocate_old(); + } +} + +impl shrink_to_fit::ShrinkToFit for TinyVec { + fn shrink_to_fit(&mut self) { + Self::shrink_to_fit(self); + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// Test helper: build a TinyVec from an exact-sized iterator. Replaces the previous use + /// of `Iterator::collect()` after we removed the `FromIterator` impl. + fn from_exact(iter: I) -> TinyVec + where + I: IntoIterator, + I::IntoIter: ExactSizeIterator, + { + let mut v = TinyVec::new(); + v.extend_exact(iter); + v + } + + #[test] + fn size() { + // The whole point: 16 B on 64-bit, vs 24 B for Vec. + assert_eq!(std::mem::size_of::>(), 16); + assert_eq!(std::mem::size_of::>(), 16); + } + + #[test] + fn push_iter_swap_remove() { + let mut v: TinyVec = TinyVec::new(); + assert!(v.is_empty()); + v.push(10); + v.push(20); + v.push(30); + assert_eq!(v.len(), 3); + assert_eq!(v.iter().copied().collect::>(), vec![10, 20, 30]); + let removed = v.swap_remove(0); + assert_eq!(removed, 10); + // After swap_remove(0), buffer is [30, 20] (last swapped into hole). + assert_eq!(v.iter().copied().collect::>(), vec![30, 20]); + assert_eq!(v[0], 30); + assert_eq!(v[1], 20); + } + + #[test] + fn growth_pattern() { + let mut v: TinyVec = TinyVec::new(); + for i in 0..32u32 { + v.push(i); + } + assert_eq!(v.len(), 32); + let collected: Vec = v.iter().copied().collect(); + assert_eq!(collected, (0..32).collect::>()); + } + + #[test] + fn extend_and_reserve() { + let mut v: TinyVec = TinyVec::new(); + v.extend_exact(0..10); + assert_eq!(v.len(), 10); + v.reserve(5); + assert!(v.capacity() >= 15); + } + + #[test] + fn last_mut_and_index_mut() { + let mut v: TinyVec = TinyVec::new(); + v.push(1); + v.push(2); + *v.last_mut().unwrap() = 99; + assert_eq!(v[1], 99); + v[0] = 7; + assert_eq!(v[0], 7); + } + + #[test] + fn drop_runs_on_elements() { + use std::sync::atomic::{AtomicUsize, Ordering}; + + struct DropCounter<'a>(&'a AtomicUsize); + impl<'a> Drop for DropCounter<'a> { + fn drop(&mut self) { + self.0.fetch_add(1, Ordering::SeqCst); + } + } + + let count = AtomicUsize::new(0); + { + let mut v: TinyVec> = TinyVec::new(); + v.push(DropCounter(&count)); + v.push(DropCounter(&count)); + v.push(DropCounter(&count)); + } + assert_eq!(count.load(Ordering::SeqCst), 3); + } + + #[test] + fn into_iter_drops_and_yields() { + use std::sync::atomic::{AtomicUsize, Ordering}; + + struct DropCounter<'a>(&'a AtomicUsize, u32); + impl<'a> Drop for DropCounter<'a> { + fn drop(&mut self) { + self.0.fetch_add(1, Ordering::SeqCst); + } + } + + let count = AtomicUsize::new(0); + let mut v: TinyVec> = TinyVec::new(); + v.push(DropCounter(&count, 1)); + v.push(DropCounter(&count, 2)); + v.push(DropCounter(&count, 3)); + + let mut iter = v.into_iter(); + assert_eq!(iter.next().unwrap().1, 1); + assert_eq!(iter.next().unwrap().1, 2); + // Drop iterator with one remaining element. + drop(iter); + // 3 total drops: the two yielded + the one remaining in the iter. + assert_eq!(count.load(Ordering::SeqCst), 3); + } + + #[test] + fn shrink_to_fit_releases_buffer() { + let mut v: TinyVec = TinyVec::new(); + v.extend_exact(0..10); + assert!(v.capacity() >= 10); + for _ in 0..10 { + v.swap_remove(0); + } + assert!(v.is_empty()); + v.shrink_to_fit(); + assert_eq!(v.capacity(), 0); + } + + #[test] + #[should_panic(expected = "TinyVec capacity overflow")] + fn capacity_overflow_panics() { + let mut v: TinyVec = TinyVec::new(); + for _ in 0..255u32 { + v.push(0); + } + // The 256th push trips the MAX check (default MAX = u8::MAX = 255). + v.push(0); + } + + /// `MAX` strictly caps push count; growth stops at exactly MAX even when doubling would + /// overshoot. + #[test] + fn tight_max_caps_growth_exactly() { + let mut v: TinyVec = TinyVec::new(); + for i in 0..5 { + v.push(i); + } + assert_eq!(v.len(), 5); + // Capacity should be exactly 5, not the next-power-of-two (8). + assert_eq!(v.capacity(), 5); + } + + #[test] + #[should_panic(expected = "TinyVec capacity overflow")] + fn tight_max_panics_at_limit() { + let mut v: TinyVec = TinyVec::new(); + v.push(0); + v.push(1); + v.push(2); + // The 4th push exceeds MAX=3. + v.push(3); + } + + /// Confirms the growth schedule with tight MAX: doubles until it would exceed MAX, then + /// caps. With MAX=10 we should see 0 -> 4 -> 8 -> 10. + #[test] + fn tight_max_growth_schedule() { + let mut v: TinyVec = TinyVec::new(); + let mut last_cap = 0; + let mut cap_changes = Vec::new(); + for i in 0..10 { + v.push(i); + if v.capacity() != last_cap { + cap_changes.push(v.capacity()); + last_cap = v.capacity(); + } + } + assert_eq!(cap_changes, vec![4, 8, 10]); + } + + #[test] + fn retain_mut_basic() { + let mut v: TinyVec = from_exact(0..10); + v.retain_mut(|x| *x % 2 == 0); + assert_eq!(v.iter().copied().collect::>(), vec![0, 2, 4, 6, 8]); + // retain_mut shouldn't change capacity. + assert!(v.capacity() >= 5); + } + + #[test] + fn retain_mut_can_mutate() { + let mut v: TinyVec = from_exact(0..5); + v.retain_mut(|x| { + *x *= 10; + *x != 30 + }); + assert_eq!(v.iter().copied().collect::>(), vec![0, 10, 20, 40]); + } + + #[test] + fn retain_mut_empty() { + let mut v: TinyVec = TinyVec::new(); + v.retain_mut(|_| panic!("should not be called for empty")); + assert!(v.is_empty()); + } + + #[test] + fn retain_mut_keeps_all() { + let mut v: TinyVec = from_exact(0..5); + v.retain_mut(|_| true); + assert_eq!(v.iter().copied().collect::>(), vec![0, 1, 2, 3, 4]); + } + + #[test] + fn retain_mut_removes_all() { + let mut v: TinyVec = from_exact(0..5); + v.retain_mut(|_| false); + assert!(v.is_empty()); + } + + /// Verifies retain_mut's panic guard: if the predicate panics, we shouldn't double-free. + #[test] + fn retain_mut_panic_safety() { + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let mut v: TinyVec = from_exact(0..10); + v.retain_mut(|x| { + if *x == 5 { + panic!("boom"); + } + true + }); + })); + assert!(result.is_err()); + } + + /// Element Drop panic during retain_mut — `Vec::retain_mut` handles this; we should too. + #[test] + fn retain_mut_element_drop_panic() { + use std::sync::atomic::{AtomicUsize, Ordering}; + + struct PanicyDrop<'a>(u32, &'a AtomicUsize); + impl Drop for PanicyDrop<'_> { + fn drop(&mut self) { + self.1.fetch_add(1, Ordering::SeqCst); + if self.0 == 5 && !std::thread::panicking() { + panic!("boom from drop"); + } + } + } + + let drop_count = AtomicUsize::new(0); + let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let mut v: TinyVec> = + from_exact((0..10).map(|i| PanicyDrop(i, &drop_count))); + v.retain_mut(|x| x.0 != 5); // schedules drop of element with 0==5, which panics + // If we get here without panic, drop happened cleanly. + })); + // The panic should have propagated; some drops should have occurred. + assert!(result.is_err() || drop_count.load(Ordering::SeqCst) > 0); + } +}