Skip to content

Commit 665d553

Browse files
authored
feat: skip caching tasks that modify their inputs (#248)
1 parent 3e8e24d commit 665d553

File tree

20 files changed

+312
-55
lines changed

20 files changed

+312
-55
lines changed

crates/vite_task/src/session/event.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
use std::{process::ExitStatus, time::Duration};
22

3+
use vite_path::RelativePathBuf;
4+
35
use super::cache::CacheMiss;
46

57
/// The cache operation that failed.
@@ -57,6 +59,12 @@ pub enum CacheNotUpdatedReason {
5759
CacheDisabled,
5860
/// Execution exited with non-zero status
5961
NonZeroExitStatus,
62+
/// Task modified files it read during execution (read-write overlap detected by fspy).
63+
/// Caching such tasks is unsound because the prerun input hashes become stale.
64+
InputModified {
65+
/// First path that was both read and written during execution.
66+
path: RelativePathBuf,
67+
},
6068
}
6169

6270
#[derive(Debug)]
@@ -66,13 +74,7 @@ pub enum CacheUpdateStatus {
6674
/// Cache was not updated (with reason).
6775
/// The reason is part of the `LeafExecutionReporter` trait contract — reporters
6876
/// can use it for detailed logging, even if current implementations don't.
69-
NotUpdated(
70-
#[expect(
71-
dead_code,
72-
reason = "part of LeafExecutionReporter trait contract; reporters may use for detailed logging"
73-
)]
74-
CacheNotUpdatedReason,
75-
),
77+
NotUpdated(CacheNotUpdatedReason),
7678
}
7779

7880
#[derive(Debug)]

crates/vite_task/src/session/execute/fingerprint.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,19 +55,24 @@ impl PostRunFingerprint {
5555
/// Creates a new fingerprint from path accesses after task execution.
5656
///
5757
/// Negative glob filtering is done upstream in `spawn_with_tracking`.
58-
/// Paths may contain `..` components from fspy, so this method cleans them
59-
/// before fingerprinting.
58+
/// Paths already present in `globbed_inputs` are skipped — they are
59+
/// already tracked by the prerun glob fingerprint, and the read-write
60+
/// overlap check in `execute_spawn` guarantees the task did not modify
61+
/// them, so the prerun hash is still correct.
6062
///
6163
/// # Arguments
6264
/// * `inferred_path_reads` - Map of paths that were read during execution (from fspy)
6365
/// * `base_dir` - Workspace root for resolving relative paths
66+
/// * `globbed_inputs` - Prerun glob fingerprint; paths here are skipped
6467
#[tracing::instrument(level = "debug", skip_all, name = "create_post_run_fingerprint")]
6568
pub fn create(
6669
inferred_path_reads: &HashMap<RelativePathBuf, PathRead>,
6770
base_dir: &AbsolutePath,
71+
globbed_inputs: &BTreeMap<RelativePathBuf, u64>,
6872
) -> anyhow::Result<Self> {
6973
let inferred_inputs = inferred_path_reads
7074
.par_iter()
75+
.filter(|(path, _)| !globbed_inputs.contains_key(*path))
7176
.map(|(relative_path, path_read)| {
7277
let full_path = Arc::<AbsolutePath>::from(base_dir.join(relative_path));
7378
let fingerprint = fingerprint_path(&full_path, *path_read)?;

crates/vite_task/src/session/execute/mod.rs

Lines changed: 47 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -397,34 +397,55 @@ pub async fn execute_spawn(
397397
cache_metadata_and_inputs
398398
{
399399
if result.exit_status.success() {
400-
// path_reads is empty when inference is disabled (path_accesses is None)
401-
let empty_path_reads = HashMap::default();
402-
let path_reads = path_accesses.as_ref().map_or(&empty_path_reads, |pa| &pa.path_reads);
403-
404-
// Execution succeeded — attempt to create fingerprint and update cache
405-
match PostRunFingerprint::create(path_reads, cache_base_path) {
406-
Ok(post_run_fingerprint) => {
407-
let new_cache_value = CacheEntryValue {
408-
post_run_fingerprint,
409-
std_outputs: std_outputs.unwrap_or_default().into(),
410-
duration: result.duration,
411-
globbed_inputs,
412-
};
413-
match cache.update(cache_metadata, new_cache_value).await {
414-
Ok(()) => (CacheUpdateStatus::Updated, None),
415-
Err(err) => (
416-
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheDisabled),
417-
Some(ExecutionError::Cache {
418-
kind: CacheErrorKind::Update,
419-
source: err,
420-
}),
421-
),
400+
// Check for read-write overlap: if the task wrote to any file it also
401+
// read, the inputs were modified during execution — don't cache.
402+
// Note: this only checks fspy-inferred reads, not globbed_inputs keys.
403+
// A task that writes to a glob-matched file without reading it causes
404+
// perpetual cache misses (glob detects the hash change) but not a
405+
// correctness bug, so we don't handle that case here.
406+
if let Some(path) = path_accesses
407+
.as_ref()
408+
.and_then(|pa| pa.path_reads.keys().find(|p| pa.path_writes.contains(*p)))
409+
{
410+
(
411+
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::InputModified {
412+
path: path.clone(),
413+
}),
414+
None,
415+
)
416+
} else {
417+
// path_reads is empty when inference is disabled (path_accesses is None)
418+
let empty_path_reads = HashMap::default();
419+
let path_reads =
420+
path_accesses.as_ref().map_or(&empty_path_reads, |pa| &pa.path_reads);
421+
422+
// Execution succeeded — attempt to create fingerprint and update cache.
423+
// Paths already in globbed_inputs are skipped: Rule 1 (above) guarantees
424+
// no input modification, so the prerun hash is the correct post-exec hash.
425+
match PostRunFingerprint::create(path_reads, cache_base_path, &globbed_inputs) {
426+
Ok(post_run_fingerprint) => {
427+
let new_cache_value = CacheEntryValue {
428+
post_run_fingerprint,
429+
std_outputs: std_outputs.unwrap_or_default().into(),
430+
duration: result.duration,
431+
globbed_inputs,
432+
};
433+
match cache.update(cache_metadata, new_cache_value).await {
434+
Ok(()) => (CacheUpdateStatus::Updated, None),
435+
Err(err) => (
436+
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheDisabled),
437+
Some(ExecutionError::Cache {
438+
kind: CacheErrorKind::Update,
439+
source: err,
440+
}),
441+
),
442+
}
422443
}
444+
Err(err) => (
445+
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheDisabled),
446+
Some(ExecutionError::PostRunFingerprint(err)),
447+
),
423448
}
424-
Err(err) => (
425-
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheDisabled),
426-
Some(ExecutionError::PostRunFingerprint(err)),
427-
),
428449
}
429450
} else {
430451
// Execution failed with non-zero exit status — don't update cache

crates/vite_task/src/session/reporter/labeled.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -251,7 +251,7 @@ impl LeafExecutionReporter for LabeledLeafReporter {
251251
async fn finish(
252252
self: Box<Self>,
253253
status: Option<StdExitStatus>,
254-
_cache_update_status: CacheUpdateStatus,
254+
cache_update_status: CacheUpdateStatus,
255255
error: Option<ExecutionError>,
256256
) {
257257
// Convert error before consuming it (need the original for display formatting).
@@ -276,7 +276,12 @@ impl LeafExecutionReporter for LabeledLeafReporter {
276276
task_name: display.task_display.task_name.clone(),
277277
command: display.command.clone(),
278278
cwd: cwd_relative,
279-
result: TaskResult::from_execution(&cache_status, status, saved_error.as_ref()),
279+
result: TaskResult::from_execution(
280+
&cache_status,
281+
status,
282+
saved_error.as_ref(),
283+
&cache_update_status,
284+
),
280285
};
281286

282287
shared.borrow_mut().tasks.push(task_summary);

0 commit comments

Comments
 (0)