Skip to content

Commit 23ed3c5

Browse files
committed
checkpoint with some instrumentation
1 parent d1fa1e5 commit 23ed3c5

3 files changed

Lines changed: 115 additions & 46 deletions

File tree

turbopack/crates/turbo-tasks-backend/src/backend/operation/mod.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1099,7 +1099,7 @@ impl_operation!(LeafDistanceUpdate leaf_distance_update::LeafDistanceUpdateQueue
10991099
pub use self::invalidate::TaskDirtyCause;
11001100
pub use self::{
11011101
aggregation_update::{
1102-
AggregatedDataUpdate, AggregationUpdateJob, ComputeDirtyAndCleanUpdate,
1102+
AggregatedDataUpdate, AggregationUpdateJob, ComputeDirtyAndCleanUpdate, LEAF_NUMBER,
11031103
get_aggregation_number, get_uppers, is_aggregating_node, is_root_node,
11041104
},
11051105
cleanup_old_edges::OutdatedEdge,

turbopack/crates/turbo-tasks-backend/src/backend/storage.rs

Lines changed: 42 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,14 @@ use std::{
55
sync::{Arc, atomic::AtomicBool},
66
};
77

8+
use rustc_hash::FxHashMap;
89
use smallvec::SmallVec;
910
use thread_local::ThreadLocal;
1011
use turbo_bincode::TurboBincodeBuffer;
1112
use turbo_tasks::{FxDashMap, TaskId, parallel};
1213

1314
use crate::{
14-
backend::storage_schema::{Evictability, TaskStorage},
15+
backend::storage_schema::{Evictability, TaskStorage, UnevictableReason},
1516
backing_storage::SnapshotItem,
1617
database::key_value_database::KeySpace,
1718
utils::{
@@ -307,35 +308,50 @@ impl Storage {
307308
"evict_after_snapshot must not be called during snapshot mode"
308309
);
309310

310-
let counts: Vec<(usize, usize)> = parallel::map_collect(self.map.shards(), |shard| {
311-
let mut shard = shard.write();
312-
let mut full = 0usize;
313-
let mut data_only = 0usize;
314-
// SAFETY: We hold the write lock for the duration of iteration.
315-
for bucket in unsafe { shard.iter() } {
316-
// SAFETY: The write lock guard outlives the bucket reference.
317-
let (task_id, task) = unsafe { bucket.as_mut() };
318-
if task_id.is_transient() {
319-
continue;
320-
}
321-
match task.get().evictability() {
322-
Evictability::Full => {
323-
// SAFETY: Erasing while iterating a RawTable is safe.
324-
unsafe { shard.erase(bucket) };
325-
full += 1;
311+
let counts: Vec<(usize, usize, FxHashMap<UnevictableReason, usize>)> =
312+
parallel::map_collect(self.map.shards(), |shard| {
313+
let mut shard = shard.write();
314+
let mut full = 0usize;
315+
let mut data_only = 0usize;
316+
let mut reason_counts: FxHashMap<UnevictableReason, usize> = FxHashMap::default();
317+
// SAFETY: We hold the write lock for the duration of iteration.
318+
for bucket in unsafe { shard.iter() } {
319+
// SAFETY: The write lock guard outlives the bucket reference.
320+
let (task_id, task) = unsafe { bucket.as_mut() };
321+
if task_id.is_transient() {
322+
continue;
326323
}
327-
Evictability::DataOnly => {
328-
task.get_mut().drop_data();
329-
data_only += 1;
324+
match task.get().evictability() {
325+
Evictability::Full => {
326+
unsafe {
327+
shard.erase(bucket);
328+
}
329+
full += 1;
330+
}
331+
Evictability::DataOnly => {
332+
task.get_mut().drop_data();
333+
data_only += 1;
334+
}
335+
Evictability::No(reason) => {
336+
*reason_counts.entry(reason).or_default() += 1;
337+
}
330338
}
331-
Evictability::No => {}
332339
}
340+
(full, data_only, reason_counts)
341+
});
342+
let mut full = 0usize;
343+
let mut data_only = 0usize;
344+
let mut reasons: FxHashMap<UnevictableReason, usize> = FxHashMap::default();
345+
for (f, d, r) in counts {
346+
full += f;
347+
data_only += d;
348+
for (reason, count) in r {
349+
*reasons.entry(reason).or_default() += count;
333350
}
334-
(full, data_only)
335-
});
336-
counts
337-
.into_iter()
338-
.fold((0, 0), |(a, b), (c, d)| (a + c, b + d))
351+
}
352+
let skipped: usize = reasons.values().sum();
353+
eprintln!("eviction: {full} full, {data_only} data-only, {skipped} skipped ({reasons:?})",);
354+
(full, data_only)
339355
}
340356
}
341357

turbopack/crates/turbo-tasks-backend/src/backend/storage_schema.rs

Lines changed: 72 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ use turbo_tasks::{
2929
};
3030

3131
use crate::{
32-
backend::counter_map::CounterMap,
32+
backend::{counter_map::CounterMap, operation::LEAF_NUMBER},
3333
data::{
3434
ActivenessState, AggregationNumber, CellRef, CollectibleRef, CollectiblesRef, Dirtyness,
3535
InProgressCellState, InProgressState, LeafDistance, OutputValue, RootType, TransientTask,
@@ -384,11 +384,21 @@ impl TaskFlags {
384384
// Eviction
385385
// =============================================================================
386386

387+
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
388+
pub enum UnevictableReason {
389+
InProgress,
390+
TransientDependents,
391+
TransientData,
392+
TransientUppers,
393+
SessionState,
394+
Modified,
395+
}
396+
387397
/// Eviction level for a task after a snapshot.
388398
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
389399
pub enum Evictability {
390400
/// Task cannot be evicted.
391-
No,
401+
No(UnevictableReason),
392402
/// Only the data category can be evicted (meta is still in use).
393403
DataOnly,
394404
/// The entire task can be evicted (removed from the storage map).
@@ -426,7 +436,7 @@ impl TaskStorage {
426436
|| self.get_activeness().is_some()
427437
|| self.get_transient_task_type().is_some()
428438
{
429-
return Evictability::No;
439+
return Evictability::No(UnevictableReason::InProgress);
430440
}
431441

432442
// Check if full eviction is possible
@@ -436,28 +446,71 @@ impl TaskStorage {
436446
let data_evictable = flags.data_restored()
437447
&& !flags.data_modified()
438448
&& !flags.data_modified_during_snapshot();
449+
if !data_evictable {
450+
return Evictability::No(UnevictableReason::Modified);
451+
}
439452

440-
if meta_evictable && data_evictable {
441-
// Non-serializable cell data (e.g. process pool handles) cannot be restored from
442-
// disk. Full eviction would permanently lose it. Downgrade to data-only eviction
443-
// which preserves transient fields.
444-
if self.transient_cell_data().is_some_and(|m| !m.is_empty()) {
445-
return Evictability::DataOnly;
446-
}
447-
// Session-dependent tasks have transient `current_session_clean` state that cannot
448-
// be restored from disk. Losing it would make the task appear dirty in the current
449-
// session, causing redundant re-execution. Downgrade to data-only eviction.
450-
if matches!(self.get_dirty(), Some(Dirtyness::SessionDependent)) {
453+
// Data-category fields with `filter_transient` lose entries referencing transient
454+
// tasks when serialized to disk. If the in-memory copy has such entries, evicting
455+
// (and later restoring from disk) would silently drop those reverse-dependency
456+
// edges, causing transient tasks (e.g., HMR update streams) to never be notified
457+
// when cells/outputs change. Prevent data eviction in this case.
458+
let has_transient_dependents = self
459+
.output_dependent()
460+
.iter()
461+
.any(|task_id| task_id.is_transient())
462+
|| self
463+
.cell_dependents()
464+
.is_some_and(|deps| deps.iter().any(|(_, _, task_id)| task_id.is_transient()))
465+
|| self
466+
.collectibles_dependents()
467+
.is_some_and(|deps| deps.iter().any(|(_, id)| id.is_transient()));
468+
// If any transient tasks are reading this one we need to not evict so the notifications
469+
// still work
470+
if has_transient_dependents {
471+
return Evictability::No(UnevictableReason::TransientDependents);
472+
}
473+
// Check for non-serializable cell data (transient category, while this would be preserved
474+
// by only
475+
if self.transient_cell_data().is_some_and(|m| !m.is_empty())
476+
|| self.get_output().is_some_and(|o| o.is_transient())
477+
{
478+
return Evictability::No(UnevictableReason::TransientData);
479+
}
480+
// Meta fields with filter_transient (children, upper, followers, output,
481+
// collectibles_dependents, etc.) lose transient entries when serialized.
482+
// If the task participates in the aggregation graph with transient nodes,
483+
// full eviction would break those relationships. Check key meta fields.
484+
debug_assert!(
485+
!self
486+
.children()
487+
.is_some_and(|c| c.iter().any(|id| id.is_transient())),
488+
"persistent tasks cannot have transient children"
489+
);
490+
if self.upper().iter().any(|(id, _)| id.is_transient()) {
491+
return Evictability::No(UnevictableReason::TransientUppers);
492+
}
493+
494+
if self
495+
.get_dirty()
496+
.is_some_and(|d| matches!(d, Dirtyness::SessionDependent))
497+
{
498+
return Evictability::No(UnevictableReason::SessionState);
499+
}
500+
if meta_evictable {
501+
// Session-dependent tasks have transient state (current_session_clean flag,
502+
// Dirtyness::SessionDependent) that would be lost on full eviction.
503+
504+
// Aggregating nodes carry transient session-clean container counts that would
505+
// be lost on full eviction, breaking has_dirty_containers() checks.
506+
if self.aggregation_number.effective >= LEAF_NUMBER {
451507
return Evictability::DataOnly;
452508
}
453-
return Evictability::Full;
454-
}
455509

456-
if data_evictable {
457-
return Evictability::DataOnly;
510+
return Evictability::Full;
458511
}
459512

460-
Evictability::No
513+
return Evictability::DataOnly;
461514
}
462515
}
463516

0 commit comments

Comments
 (0)