Skip to content

Commit 5619cb0

Browse files
branchseerclaude
andcommitted
feat: skip caching tasks that modify their inputs and deduplicate postrun fingerprint
Two changes to improve cache correctness and performance: 1. If fspy detects a task both read and wrote the same file, skip cache storage (InputModified). Caching such tasks is unsound because the prerun glob hashes become stale. 2. Skip paths already in globbed_inputs when building the postrun fingerprint. This avoids redundant storage and double-validation on cache hit checks. Safe because rule 1 guarantees no input was modified. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent f8cc431 commit 5619cb0

File tree

8 files changed

+86
-28
lines changed

8 files changed

+86
-28
lines changed

crates/vite_task/src/session/event.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,9 @@ pub enum CacheNotUpdatedReason {
5757
CacheDisabled,
5858
/// Execution exited with non-zero status
5959
NonZeroExitStatus,
60+
/// Task modified files it read during execution (read-write overlap detected by fspy).
61+
/// Caching such tasks is unsound because the prerun input hashes become stale.
62+
InputModified,
6063
}
6164

6265
#[derive(Debug)]

crates/vite_task/src/session/execute/fingerprint.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -55,19 +55,24 @@ impl PostRunFingerprint {
5555
/// Creates a new fingerprint from path accesses after task execution.
5656
///
5757
/// Negative glob filtering is done upstream in `spawn_with_tracking`.
58-
/// Paths may contain `..` components from fspy, so this method cleans them
59-
/// before fingerprinting.
58+
/// Paths already present in `globbed_inputs` are skipped — they are
59+
/// already tracked by the prerun glob fingerprint, and the read-write
60+
/// overlap check in `execute_spawn` guarantees the task did not modify
61+
/// them, so the prerun hash is still correct.
6062
///
6163
/// # Arguments
6264
/// * `inferred_path_reads` - Map of paths that were read during execution (from fspy)
6365
/// * `base_dir` - Workspace root for resolving relative paths
66+
/// * `globbed_inputs` - Prerun glob fingerprint; paths here are skipped
6467
#[tracing::instrument(level = "debug", skip_all, name = "create_post_run_fingerprint")]
6568
pub fn create(
6669
inferred_path_reads: &HashMap<RelativePathBuf, PathRead>,
6770
base_dir: &AbsolutePath,
71+
globbed_inputs: &BTreeMap<RelativePathBuf, u64>,
6872
) -> anyhow::Result<Self> {
6973
let inferred_inputs = inferred_path_reads
7074
.par_iter()
75+
.filter(|(path, _)| !globbed_inputs.contains_key(*path))
7176
.map(|(relative_path, path_read)| {
7277
let full_path = Arc::<AbsolutePath>::from(base_dir.join(relative_path));
7378
let fingerprint = fingerprint_path(&full_path, *path_read)?;

crates/vite_task/src/session/execute/mod.rs

Lines changed: 42 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -397,34 +397,50 @@ pub async fn execute_spawn(
397397
cache_metadata_and_inputs
398398
{
399399
if result.exit_status.success() {
400-
// path_reads is empty when inference is disabled (path_accesses is None)
401-
let empty_path_reads = HashMap::default();
402-
let path_reads = path_accesses.as_ref().map_or(&empty_path_reads, |pa| &pa.path_reads);
403-
404-
// Execution succeeded — attempt to create fingerprint and update cache
405-
match PostRunFingerprint::create(path_reads, cache_base_path) {
406-
Ok(post_run_fingerprint) => {
407-
let new_cache_value = CacheEntryValue {
408-
post_run_fingerprint,
409-
std_outputs: std_outputs.unwrap_or_default().into(),
410-
duration: result.duration,
411-
globbed_inputs,
412-
};
413-
match cache.update(cache_metadata, new_cache_value).await {
414-
Ok(()) => (CacheUpdateStatus::Updated, None),
415-
Err(err) => (
416-
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheDisabled),
417-
Some(ExecutionError::Cache {
418-
kind: CacheErrorKind::Update,
419-
source: err,
420-
}),
421-
),
400+
// Check for read-write overlap: if the task wrote to any file it also
401+
// read, the inputs were modified during execution — don't cache.
402+
// Note: this only checks fspy-inferred reads, not globbed_inputs keys.
403+
// A task that writes to a glob-matched file without reading it causes
404+
// perpetual cache misses (glob detects the hash change) but not a
405+
// correctness bug, so we don't handle that case here.
406+
if path_accesses
407+
.as_ref()
408+
.is_some_and(|pa| pa.path_reads.keys().any(|p| pa.path_writes.contains(p)))
409+
{
410+
(CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::InputModified), None)
411+
} else {
412+
// path_reads is empty when inference is disabled (path_accesses is None)
413+
let empty_path_reads = HashMap::default();
414+
let path_reads =
415+
path_accesses.as_ref().map_or(&empty_path_reads, |pa| &pa.path_reads);
416+
417+
// Execution succeeded — attempt to create fingerprint and update cache.
418+
// Paths already in globbed_inputs are skipped: Rule 1 (above) guarantees
419+
// no input modification, so the prerun hash is the correct post-exec hash.
420+
match PostRunFingerprint::create(path_reads, cache_base_path, &globbed_inputs) {
421+
Ok(post_run_fingerprint) => {
422+
let new_cache_value = CacheEntryValue {
423+
post_run_fingerprint,
424+
std_outputs: std_outputs.unwrap_or_default().into(),
425+
duration: result.duration,
426+
globbed_inputs,
427+
};
428+
match cache.update(cache_metadata, new_cache_value).await {
429+
Ok(()) => (CacheUpdateStatus::Updated, None),
430+
Err(err) => (
431+
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheDisabled),
432+
Some(ExecutionError::Cache {
433+
kind: CacheErrorKind::Update,
434+
source: err,
435+
}),
436+
),
437+
}
422438
}
439+
Err(err) => (
440+
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheDisabled),
441+
Some(ExecutionError::PostRunFingerprint(err)),
442+
),
423443
}
424-
Err(err) => (
425-
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheDisabled),
426-
Some(ExecutionError::PostRunFingerprint(err)),
427-
),
428444
}
429445
} else {
430446
// Execution failed with non-zero exit status — don't update cache
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"name": "input-read-write-not-cached",
3+
"private": true
4+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Test that tasks modifying their own inputs (read-write overlap) are not cached.
2+
# replace-file-content reads then writes the same file — fspy detects both.
3+
# Without this behavior, the second run would be a cache hit.
4+
5+
[[e2e]]
6+
name = "read-write task is not cached"
7+
steps = [
8+
# First run - executes, but cache NOT stored (read-write overlap detected)
9+
"vt run rw-task",
10+
# Second run - still miss (no cache was stored on first run)
11+
"vt run rw-task",
12+
]
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
---
2+
source: crates/vite_task_bin/tests/e2e_snapshots/main.rs
3+
expression: e2e_outputs
4+
---
5+
> vt run rw-task
6+
$ replace-file-content src/data.txt original modified
7+
> vt run rw-task
8+
$ replace-file-content src/data.txt original modified
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
original
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
{
2+
"tasks": {
3+
"rw-task": {
4+
"command": "replace-file-content src/data.txt original modified",
5+
"input": [{ "auto": true }],
6+
"cache": true
7+
}
8+
}
9+
}

0 commit comments

Comments
 (0)