Skip to content

Commit 0b78403

Browse files
branchseerclaude
andcommitted
feat(execute): implement output file collection, archiving, and restoration
- Enable fspy tracking when either input or output uses auto-detection - Add `collect_and_archive_outputs()` to gather output files from fspy writes and/or output globs, then create a tar.zst archive on cache update - Restore output files from archive on cache hit via `extract_output_archive()` - Add `collect_glob_paths()` to glob_inputs.rs for path-only collection - Thread `cache_dir` through `ExecutionContext` and `execute_spawn` Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 7876951 commit 0b78403

File tree

3 files changed

+226
-67
lines changed

3 files changed

+226
-67
lines changed

crates/vite_task/src/session/execute/glob_inputs.rs

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -109,6 +109,58 @@ pub fn compute_globbed_inputs(
109109
Ok(result)
110110
}
111111

112+
/// Collect file paths matching positive globs, filtered by negative globs.
113+
///
114+
/// Like [`compute_globbed_inputs`] but only collects paths (no hashing).
115+
/// Used for determining which output files to archive.
116+
pub fn collect_glob_paths(
117+
workspace_root: &AbsolutePath,
118+
positive_globs: &std::collections::BTreeSet<Str>,
119+
negative_globs: &std::collections::BTreeSet<Str>,
120+
) -> anyhow::Result<Vec<RelativePathBuf>> {
121+
if positive_globs.is_empty() {
122+
return Ok(Vec::new());
123+
}
124+
125+
let negatives: Vec<Glob<'static>> = negative_globs
126+
.iter()
127+
.map(|p| Ok(Glob::new(p.as_str())?.into_owned()))
128+
.collect::<anyhow::Result<_>>()?;
129+
let negation = wax::any(negatives)?;
130+
131+
let mut result = Vec::new();
132+
133+
for pattern in positive_globs {
134+
let glob = Glob::new(pattern.as_str())?.into_owned();
135+
let walk = glob.walk(workspace_root.as_path());
136+
for entry in walk.not(negation.clone())? {
137+
let entry = match entry {
138+
Ok(entry) => entry,
139+
Err(err) => {
140+
let io_err: io::Error = err.into();
141+
if io_err.kind() == io::ErrorKind::NotFound {
142+
continue;
143+
}
144+
return Err(io_err.into());
145+
}
146+
};
147+
if !entry.file_type().is_file() {
148+
continue;
149+
}
150+
let path = entry.path();
151+
let Some(stripped) = path.strip_prefix(workspace_root.as_path()).ok() else {
152+
continue;
153+
};
154+
let relative = RelativePathBuf::new(stripped)?;
155+
result.push(relative);
156+
}
157+
}
158+
159+
result.sort();
160+
result.dedup();
161+
Ok(result)
162+
}
163+
112164
#[expect(clippy::disallowed_types, reason = "receives std::path::Path from wax glob walker")]
113165
fn hash_file_content(path: &std::path::Path) -> io::Result<u64> {
114166
super::hash::hash_content(io::BufReader::new(File::open(path)?))

crates/vite_task/src/session/execute/mod.rs

Lines changed: 173 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -7,22 +7,24 @@ use std::{cell::RefCell, collections::BTreeMap, io::Write as _, process::Stdio,
77

88
use futures_util::{FutureExt, StreamExt, future::LocalBoxFuture, stream::FuturesUnordered};
99
use petgraph::Direction;
10-
use rustc_hash::FxHashMap;
10+
use rustc_hash::{FxHashMap, FxHashSet};
1111
use tokio::sync::Semaphore;
1212
use tokio_util::sync::CancellationToken;
13-
use vite_path::AbsolutePath;
13+
use vite_path::{AbsolutePath, RelativePathBuf};
14+
use vite_str::Str;
1415
use vite_task_plan::{
1516
ExecutionGraph, ExecutionItemDisplay, ExecutionItemKind, LeafExecutionKind, SpawnCommand,
1617
SpawnExecution, execution_graph::ExecutionNodeIndex,
1718
};
19+
use wax::Program as _;
1820

1921
use self::{
2022
fingerprint::PostRunFingerprint,
2123
glob_inputs::compute_globbed_inputs,
2224
spawn::{SpawnResult, TrackedPathAccesses, spawn_with_tracking},
2325
};
2426
use super::{
25-
cache::{CacheEntryValue, ExecutionCache},
27+
cache::{CacheEntryValue, ExecutionCache, archive},
2628
event::{
2729
CacheDisabledReason, CacheErrorKind, CacheNotUpdatedReason, CacheStatus, CacheUpdateStatus,
2830
ExecutionError,
@@ -67,6 +69,8 @@ struct ExecutionContext<'a> {
6769
/// Base path for resolving relative paths in cache entries.
6870
/// Typically the workspace root.
6971
cache_base_path: &'a Arc<AbsolutePath>,
72+
/// Directory where cache files (db, archives) are stored.
73+
cache_dir: &'a AbsolutePath,
7074
/// Token cancelled when a task fails. Kills in-flight child processes
7175
/// (via `start_kill` in spawn.rs), prevents scheduling new tasks, and
7276
/// prevents caching results of concurrently-running tasks.
@@ -228,6 +232,7 @@ impl ExecutionContext<'_> {
228232
spawn_execution,
229233
self.cache,
230234
self.cache_base_path,
235+
self.cache_dir,
231236
self.fast_fail_token.clone(),
232237
self.interrupt_token.clone(),
233238
)
@@ -269,6 +274,7 @@ pub async fn execute_spawn(
269274
spawn_execution: &SpawnExecution,
270275
cache: &ExecutionCache,
271276
cache_base_path: &Arc<AbsolutePath>,
277+
cache_dir: &AbsolutePath,
272278
fast_fail_token: CancellationToken,
273279
interrupt_token: CancellationToken,
274280
) -> SpawnOutcome {
@@ -341,6 +347,18 @@ pub async fn execute_spawn(
341347
let _ = writer.write_all(&output.content);
342348
let _ = writer.flush();
343349
}
350+
// Restore output files from the cached archive
351+
if let Some(ref archive_name) = cached.output_archive {
352+
let archive_path = cache_dir.join(archive_name.as_str());
353+
if let Err(err) = archive::extract_output_archive(cache_base_path, &archive_path) {
354+
leaf_reporter.finish(
355+
None,
356+
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheHit),
357+
Some(ExecutionError::Cache { kind: CacheErrorKind::Lookup, source: err }),
358+
);
359+
return SpawnOutcome::Failed;
360+
}
361+
}
344362
leaf_reporter.finish(
345363
None,
346364
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheHit),
@@ -385,15 +403,16 @@ pub async fn execute_spawn(
385403

386404
// 5. Piped mode: execute spawn with tracking, streaming output to writers.
387405
// - std_outputs: always captured when caching is enabled (for cache replay)
388-
// - path_accesses: only tracked when includes_auto is true (fspy inference)
406+
// - path_accesses: tracked when input or output uses auto (fspy inference)
389407
let (mut std_outputs, mut path_accesses, cache_metadata_and_inputs) =
390408
cache_metadata.map_or((None, None, None), |cache_metadata| {
391-
// On musl targets, LD_PRELOAD-based tracking is unavailable but seccomp
392-
// unotify provides equivalent file access tracing.
393-
let path_accesses = if cache_metadata.input_config.includes_auto {
409+
// Enable fspy when either input or output needs auto-detection.
410+
let needs_fspy = cache_metadata.input_config.includes_auto
411+
|| cache_metadata.output_config.includes_auto;
412+
let path_accesses = if needs_fspy {
394413
Some(TrackedPathAccesses::default())
395414
} else {
396-
None // Skip fspy when inference is disabled or unavailable
415+
None // Skip fspy when inference is disabled for both input and output
397416
};
398417
(Some(Vec::new()), path_accesses, Some((cache_metadata, globbed_inputs)))
399418
});
@@ -451,71 +470,95 @@ pub async fn execute_spawn(
451470

452471
// 6. Update cache if successful and determine cache update status.
453472
// Errors during cache update are terminal (reported through finish).
454-
let (cache_update_status, cache_error) = if let Some((cache_metadata, globbed_inputs)) =
455-
cache_metadata_and_inputs
456-
{
457-
let cancelled = fast_fail_token.is_cancelled() || interrupt_token.is_cancelled();
458-
if cancelled {
459-
// Cancelled (Ctrl-C or sibling failure) — result is untrustworthy
460-
(CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::Cancelled), None)
461-
} else if result.exit_status.success() {
462-
// Check for read-write overlap: if the task wrote to any file it also
463-
// read, the inputs were modified during execution — don't cache.
464-
// Note: this only checks fspy-inferred reads, not globbed_inputs keys.
465-
// A task that writes to a glob-matched file without reading it causes
466-
// perpetual cache misses (glob detects the hash change) but not a
467-
// correctness bug, so we don't handle that case here.
468-
if let Some(path) = path_accesses
469-
.as_ref()
470-
.and_then(|pa| pa.path_reads.keys().find(|p| pa.path_writes.contains(*p)))
471-
{
472-
(
473-
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::InputModified {
474-
path: path.clone(),
475-
}),
476-
None,
477-
)
478-
} else {
479-
// path_reads is empty when inference is disabled (path_accesses is None)
480-
let empty_path_reads = HashMap::default();
481-
let path_reads =
482-
path_accesses.as_ref().map_or(&empty_path_reads, |pa| &pa.path_reads);
483-
484-
// Execution succeeded — attempt to create fingerprint and update cache.
485-
// Paths already in globbed_inputs are skipped: Rule 1 (above) guarantees
486-
// no input modification, so the prerun hash is the correct post-exec hash.
487-
match PostRunFingerprint::create(path_reads, cache_base_path, &globbed_inputs) {
488-
Ok(post_run_fingerprint) => {
489-
let new_cache_value = CacheEntryValue {
490-
post_run_fingerprint,
491-
std_outputs: std_outputs.unwrap_or_default().into(),
492-
duration: result.duration,
493-
globbed_inputs,
494-
};
495-
match cache.update(cache_metadata, new_cache_value).await {
496-
Ok(()) => (CacheUpdateStatus::Updated, None),
497-
Err(err) => (
498-
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheDisabled),
499-
Some(ExecutionError::Cache {
500-
kind: CacheErrorKind::Update,
501-
source: err,
502-
}),
503-
),
473+
let (cache_update_status, cache_error) = 'cache_update: {
474+
if let Some((cache_metadata, globbed_inputs)) = cache_metadata_and_inputs {
475+
let cancelled = fast_fail_token.is_cancelled() || interrupt_token.is_cancelled();
476+
if cancelled {
477+
// Cancelled (Ctrl-C or sibling failure) — result is untrustworthy
478+
(CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::Cancelled), None)
479+
} else if result.exit_status.success() {
480+
// Check for read-write overlap: if the task wrote to any file it also
481+
// read, the inputs were modified during execution — don't cache.
482+
// Note: this only checks fspy-inferred reads, not globbed_inputs keys.
483+
// A task that writes to a glob-matched file without reading it causes
484+
// perpetual cache misses (glob detects the hash change) but not a
485+
// correctness bug, so we don't handle that case here.
486+
if let Some(path) = path_accesses
487+
.as_ref()
488+
.and_then(|pa| pa.path_reads.keys().find(|p| pa.path_writes.contains(*p)))
489+
{
490+
(
491+
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::InputModified {
492+
path: path.clone(),
493+
}),
494+
None,
495+
)
496+
} else {
497+
// path_reads is empty when inference is disabled (path_accesses is None)
498+
let empty_path_reads = HashMap::default();
499+
let path_reads =
500+
path_accesses.as_ref().map_or(&empty_path_reads, |pa| &pa.path_reads);
501+
502+
// Execution succeeded — attempt to create fingerprint and update cache.
503+
// Paths already in globbed_inputs are skipped: Rule 1 (above) guarantees
504+
// no input modification, so the prerun hash is the correct post-exec hash.
505+
match PostRunFingerprint::create(path_reads, cache_base_path, &globbed_inputs) {
506+
Ok(post_run_fingerprint) => {
507+
// Collect output files and create archive
508+
let output_archive = match collect_and_archive_outputs(
509+
cache_metadata,
510+
path_accesses.as_ref(),
511+
cache_base_path,
512+
cache_dir,
513+
) {
514+
Ok(archive) => archive,
515+
Err(err) => {
516+
break 'cache_update (
517+
CacheUpdateStatus::NotUpdated(
518+
CacheNotUpdatedReason::CacheDisabled,
519+
),
520+
Some(ExecutionError::Cache {
521+
kind: CacheErrorKind::Update,
522+
source: err,
523+
}),
524+
);
525+
}
526+
};
527+
528+
let new_cache_value = CacheEntryValue {
529+
post_run_fingerprint,
530+
std_outputs: std_outputs.unwrap_or_default().into(),
531+
duration: result.duration,
532+
globbed_inputs,
533+
output_archive,
534+
};
535+
match cache.update(cache_metadata, new_cache_value).await {
536+
Ok(()) => (CacheUpdateStatus::Updated, None),
537+
Err(err) => (
538+
CacheUpdateStatus::NotUpdated(
539+
CacheNotUpdatedReason::CacheDisabled,
540+
),
541+
Some(ExecutionError::Cache {
542+
kind: CacheErrorKind::Update,
543+
source: err,
544+
}),
545+
),
546+
}
504547
}
548+
Err(err) => (
549+
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheDisabled),
550+
Some(ExecutionError::PostRunFingerprint(err)),
551+
),
505552
}
506-
Err(err) => (
507-
CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheDisabled),
508-
Some(ExecutionError::PostRunFingerprint(err)),
509-
),
510553
}
554+
} else {
555+
// Execution failed with non-zero exit status — don't update cache
556+
(CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::NonZeroExitStatus), None)
511557
}
512558
} else {
513-
// Execution failed with non-zero exit status — don't update cache
514-
(CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::NonZeroExitStatus), None)
559+
// Caching was disabled for this task
560+
(CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheDisabled), None)
515561
}
516-
} else {
517-
// Caching was disabled for this task
518-
(CacheUpdateStatus::NotUpdated(CacheNotUpdatedReason::CacheDisabled), None)
519562
};
520563

521564
// 7. Finish the leaf execution with the result and optional cache error.
@@ -607,6 +650,68 @@ async fn spawn_inherited(
607650
Ok(SpawnResult { exit_status, duration: start.elapsed() })
608651
}
609652

653+
/// Collect output files and create a tar.zst archive in the cache directory.
654+
///
655+
/// Output files are determined by:
656+
/// - fspy-tracked writes (when `output_config.includes_auto` is true)
657+
/// - Positive output globs (always, if configured)
658+
/// - Filtered by negative output globs
659+
///
660+
/// Returns `Some(archive_filename)` if files were archived, `None` if no output files.
661+
fn collect_and_archive_outputs(
662+
cache_metadata: &vite_task_plan::cache_metadata::CacheMetadata,
663+
path_accesses: Option<&TrackedPathAccesses>,
664+
workspace_root: &AbsolutePath,
665+
cache_dir: &AbsolutePath,
666+
) -> anyhow::Result<Option<Str>> {
667+
let output_config = &cache_metadata.output_config;
668+
669+
// Collect output files from auto-detection (fspy writes)
670+
let mut output_files: FxHashSet<RelativePathBuf> = FxHashSet::default();
671+
672+
if output_config.includes_auto
673+
&& let Some(pa) = path_accesses
674+
{
675+
output_files.extend(pa.path_writes.iter().cloned());
676+
}
677+
678+
// Collect output files from positive globs
679+
if !output_config.positive_globs.is_empty() {
680+
let glob_paths = glob_inputs::collect_glob_paths(
681+
workspace_root,
682+
&output_config.positive_globs,
683+
&output_config.negative_globs,
684+
)?;
685+
output_files.extend(glob_paths);
686+
}
687+
688+
// Apply negative globs to auto-detected files
689+
if output_config.includes_auto && !output_config.negative_globs.is_empty() {
690+
let negatives: Vec<wax::Glob<'static>> = output_config
691+
.negative_globs
692+
.iter()
693+
.map(|p| Ok(wax::Glob::new(p.as_str())?.into_owned()))
694+
.collect::<anyhow::Result<_>>()?;
695+
output_files.retain(|path| !negatives.iter().any(|neg| neg.is_match(path.as_str())));
696+
}
697+
698+
if output_files.is_empty() {
699+
return Ok(None);
700+
}
701+
702+
// Sort for deterministic archive content
703+
let mut sorted_files: Vec<RelativePathBuf> = output_files.into_iter().collect();
704+
sorted_files.sort();
705+
706+
// Create archive with UUID filename
707+
let archive_name: Str = vite_str::format!("{}.tar.zst", uuid::Uuid::new_v4());
708+
let archive_path = cache_dir.join(archive_name.as_str());
709+
710+
archive::create_output_archive(workspace_root, &sorted_files, &archive_path)?;
711+
712+
Ok(Some(archive_name))
713+
}
714+
610715
/// Win32 Job Object utilities for process tree management.
611716
///
612717
/// On Windows, `TerminateProcess` only kills the direct child process, not its
@@ -733,6 +838,7 @@ impl Session<'_> {
733838
reporter: &reporter,
734839
cache,
735840
cache_base_path: &self.workspace_path,
841+
cache_dir: &self.cache_path,
736842
fast_fail_token: CancellationToken::new(),
737843
interrupt_token,
738844
};

crates/vite_task/src/session/mod.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,7 @@ impl<'a> Session<'a> {
676676
&spawn_execution,
677677
cache,
678678
&self.workspace_path,
679+
&self.cache_path,
679680
tokio_util::sync::CancellationToken::new(),
680681
tokio_util::sync::CancellationToken::new(),
681682
)

0 commit comments

Comments
 (0)