@@ -7,22 +7,24 @@ use std::{cell::RefCell, collections::BTreeMap, io::Write as _, process::Stdio,
77
88use futures_util:: { FutureExt , StreamExt , future:: LocalBoxFuture , stream:: FuturesUnordered } ;
99use petgraph:: Direction ;
10- use rustc_hash:: FxHashMap ;
10+ use rustc_hash:: { FxHashMap , FxHashSet } ;
1111use tokio:: sync:: Semaphore ;
1212use tokio_util:: sync:: CancellationToken ;
13- use vite_path:: AbsolutePath ;
13+ use vite_path:: { AbsolutePath , RelativePathBuf } ;
14+ use vite_str:: Str ;
1415use vite_task_plan:: {
1516 ExecutionGraph , ExecutionItemDisplay , ExecutionItemKind , LeafExecutionKind , SpawnCommand ,
1617 SpawnExecution , execution_graph:: ExecutionNodeIndex ,
1718} ;
19+ use wax:: Program as _;
1820
1921use self :: {
2022 fingerprint:: PostRunFingerprint ,
2123 glob_inputs:: compute_globbed_inputs,
2224 spawn:: { SpawnResult , TrackedPathAccesses , spawn_with_tracking} ,
2325} ;
2426use super :: {
25- cache:: { CacheEntryValue , ExecutionCache } ,
27+ cache:: { CacheEntryValue , ExecutionCache , archive } ,
2628 event:: {
2729 CacheDisabledReason , CacheErrorKind , CacheNotUpdatedReason , CacheStatus , CacheUpdateStatus ,
2830 ExecutionError ,
@@ -67,6 +69,8 @@ struct ExecutionContext<'a> {
6769 /// Base path for resolving relative paths in cache entries.
6870 /// Typically the workspace root.
6971 cache_base_path : & ' a Arc < AbsolutePath > ,
72+ /// Directory where cache files (db, archives) are stored.
73+ cache_dir : & ' a AbsolutePath ,
7074 /// Token cancelled when a task fails. Kills in-flight child processes
7175 /// (via `start_kill` in spawn.rs), prevents scheduling new tasks, and
7276 /// prevents caching results of concurrently-running tasks.
@@ -228,6 +232,7 @@ impl ExecutionContext<'_> {
228232 spawn_execution,
229233 self . cache ,
230234 self . cache_base_path ,
235+ self . cache_dir ,
231236 self . fast_fail_token . clone ( ) ,
232237 self . interrupt_token . clone ( ) ,
233238 )
@@ -269,6 +274,7 @@ pub async fn execute_spawn(
269274 spawn_execution : & SpawnExecution ,
270275 cache : & ExecutionCache ,
271276 cache_base_path : & Arc < AbsolutePath > ,
277+ cache_dir : & AbsolutePath ,
272278 fast_fail_token : CancellationToken ,
273279 interrupt_token : CancellationToken ,
274280) -> SpawnOutcome {
@@ -341,6 +347,18 @@ pub async fn execute_spawn(
341347 let _ = writer. write_all ( & output. content ) ;
342348 let _ = writer. flush ( ) ;
343349 }
350+ // Restore output files from the cached archive
351+ if let Some ( ref archive_name) = cached. output_archive {
352+ let archive_path = cache_dir. join ( archive_name. as_str ( ) ) ;
353+ if let Err ( err) = archive:: extract_output_archive ( cache_base_path, & archive_path) {
354+ leaf_reporter. finish (
355+ None ,
356+ CacheUpdateStatus :: NotUpdated ( CacheNotUpdatedReason :: CacheHit ) ,
357+ Some ( ExecutionError :: Cache { kind : CacheErrorKind :: Lookup , source : err } ) ,
358+ ) ;
359+ return SpawnOutcome :: Failed ;
360+ }
361+ }
344362 leaf_reporter. finish (
345363 None ,
346364 CacheUpdateStatus :: NotUpdated ( CacheNotUpdatedReason :: CacheHit ) ,
@@ -385,15 +403,16 @@ pub async fn execute_spawn(
385403
386404 // 5. Piped mode: execute spawn with tracking, streaming output to writers.
387405 // - std_outputs: always captured when caching is enabled (for cache replay)
388- // - path_accesses: only tracked when includes_auto is true (fspy inference)
406+ // - path_accesses: tracked when input or output uses auto (fspy inference)
389407 let ( mut std_outputs, mut path_accesses, cache_metadata_and_inputs) =
390408 cache_metadata. map_or ( ( None , None , None ) , |cache_metadata| {
391- // On musl targets, LD_PRELOAD-based tracking is unavailable but seccomp
392- // unotify provides equivalent file access tracing.
393- let path_accesses = if cache_metadata. input_config . includes_auto {
409+ // Enable fspy when either input or output needs auto-detection.
410+ let needs_fspy = cache_metadata. input_config . includes_auto
411+ || cache_metadata. output_config . includes_auto ;
412+ let path_accesses = if needs_fspy {
394413 Some ( TrackedPathAccesses :: default ( ) )
395414 } else {
396- None // Skip fspy when inference is disabled or unavailable
415+ None // Skip fspy when inference is disabled for both input and output
397416 } ;
398417 ( Some ( Vec :: new ( ) ) , path_accesses, Some ( ( cache_metadata, globbed_inputs) ) )
399418 } ) ;
@@ -451,71 +470,95 @@ pub async fn execute_spawn(
451470
452471 // 6. Update cache if successful and determine cache update status.
453472 // Errors during cache update are terminal (reported through finish).
454- let ( cache_update_status, cache_error) = if let Some ( ( cache_metadata, globbed_inputs) ) =
455- cache_metadata_and_inputs
456- {
457- let cancelled = fast_fail_token. is_cancelled ( ) || interrupt_token. is_cancelled ( ) ;
458- if cancelled {
459- // Cancelled (Ctrl-C or sibling failure) — result is untrustworthy
460- ( CacheUpdateStatus :: NotUpdated ( CacheNotUpdatedReason :: Cancelled ) , None )
461- } else if result. exit_status . success ( ) {
462- // Check for read-write overlap: if the task wrote to any file it also
463- // read, the inputs were modified during execution — don't cache.
464- // Note: this only checks fspy-inferred reads, not globbed_inputs keys.
465- // A task that writes to a glob-matched file without reading it causes
466- // perpetual cache misses (glob detects the hash change) but not a
467- // correctness bug, so we don't handle that case here.
468- if let Some ( path) = path_accesses
469- . as_ref ( )
470- . and_then ( |pa| pa. path_reads . keys ( ) . find ( |p| pa. path_writes . contains ( * p) ) )
471- {
472- (
473- CacheUpdateStatus :: NotUpdated ( CacheNotUpdatedReason :: InputModified {
474- path : path. clone ( ) ,
475- } ) ,
476- None ,
477- )
478- } else {
479- // path_reads is empty when inference is disabled (path_accesses is None)
480- let empty_path_reads = HashMap :: default ( ) ;
481- let path_reads =
482- path_accesses. as_ref ( ) . map_or ( & empty_path_reads, |pa| & pa. path_reads ) ;
483-
484- // Execution succeeded — attempt to create fingerprint and update cache.
485- // Paths already in globbed_inputs are skipped: Rule 1 (above) guarantees
486- // no input modification, so the prerun hash is the correct post-exec hash.
487- match PostRunFingerprint :: create ( path_reads, cache_base_path, & globbed_inputs) {
488- Ok ( post_run_fingerprint) => {
489- let new_cache_value = CacheEntryValue {
490- post_run_fingerprint,
491- std_outputs : std_outputs. unwrap_or_default ( ) . into ( ) ,
492- duration : result. duration ,
493- globbed_inputs,
494- } ;
495- match cache. update ( cache_metadata, new_cache_value) . await {
496- Ok ( ( ) ) => ( CacheUpdateStatus :: Updated , None ) ,
497- Err ( err) => (
498- CacheUpdateStatus :: NotUpdated ( CacheNotUpdatedReason :: CacheDisabled ) ,
499- Some ( ExecutionError :: Cache {
500- kind : CacheErrorKind :: Update ,
501- source : err,
502- } ) ,
503- ) ,
473+ let ( cache_update_status, cache_error) = ' cache_update: {
474+ if let Some ( ( cache_metadata, globbed_inputs) ) = cache_metadata_and_inputs {
475+ let cancelled = fast_fail_token. is_cancelled ( ) || interrupt_token. is_cancelled ( ) ;
476+ if cancelled {
477+ // Cancelled (Ctrl-C or sibling failure) — result is untrustworthy
478+ ( CacheUpdateStatus :: NotUpdated ( CacheNotUpdatedReason :: Cancelled ) , None )
479+ } else if result. exit_status . success ( ) {
480+ // Check for read-write overlap: if the task wrote to any file it also
481+ // read, the inputs were modified during execution — don't cache.
482+ // Note: this only checks fspy-inferred reads, not globbed_inputs keys.
483+ // A task that writes to a glob-matched file without reading it causes
484+ // perpetual cache misses (glob detects the hash change) but not a
485+ // correctness bug, so we don't handle that case here.
486+ if let Some ( path) = path_accesses
487+ . as_ref ( )
488+ . and_then ( |pa| pa. path_reads . keys ( ) . find ( |p| pa. path_writes . contains ( * p) ) )
489+ {
490+ (
491+ CacheUpdateStatus :: NotUpdated ( CacheNotUpdatedReason :: InputModified {
492+ path : path. clone ( ) ,
493+ } ) ,
494+ None ,
495+ )
496+ } else {
497+ // path_reads is empty when inference is disabled (path_accesses is None)
498+ let empty_path_reads = HashMap :: default ( ) ;
499+ let path_reads =
500+ path_accesses. as_ref ( ) . map_or ( & empty_path_reads, |pa| & pa. path_reads ) ;
501+
502+ // Execution succeeded — attempt to create fingerprint and update cache.
503+ // Paths already in globbed_inputs are skipped: Rule 1 (above) guarantees
504+ // no input modification, so the prerun hash is the correct post-exec hash.
505+ match PostRunFingerprint :: create ( path_reads, cache_base_path, & globbed_inputs) {
506+ Ok ( post_run_fingerprint) => {
507+ // Collect output files and create archive
508+ let output_archive = match collect_and_archive_outputs (
509+ cache_metadata,
510+ path_accesses. as_ref ( ) ,
511+ cache_base_path,
512+ cache_dir,
513+ ) {
514+ Ok ( archive) => archive,
515+ Err ( err) => {
516+ break ' cache_update (
517+ CacheUpdateStatus :: NotUpdated (
518+ CacheNotUpdatedReason :: CacheDisabled ,
519+ ) ,
520+ Some ( ExecutionError :: Cache {
521+ kind : CacheErrorKind :: Update ,
522+ source : err,
523+ } ) ,
524+ ) ;
525+ }
526+ } ;
527+
528+ let new_cache_value = CacheEntryValue {
529+ post_run_fingerprint,
530+ std_outputs : std_outputs. unwrap_or_default ( ) . into ( ) ,
531+ duration : result. duration ,
532+ globbed_inputs,
533+ output_archive,
534+ } ;
535+ match cache. update ( cache_metadata, new_cache_value) . await {
536+ Ok ( ( ) ) => ( CacheUpdateStatus :: Updated , None ) ,
537+ Err ( err) => (
538+ CacheUpdateStatus :: NotUpdated (
539+ CacheNotUpdatedReason :: CacheDisabled ,
540+ ) ,
541+ Some ( ExecutionError :: Cache {
542+ kind : CacheErrorKind :: Update ,
543+ source : err,
544+ } ) ,
545+ ) ,
546+ }
504547 }
548+ Err ( err) => (
549+ CacheUpdateStatus :: NotUpdated ( CacheNotUpdatedReason :: CacheDisabled ) ,
550+ Some ( ExecutionError :: PostRunFingerprint ( err) ) ,
551+ ) ,
505552 }
506- Err ( err) => (
507- CacheUpdateStatus :: NotUpdated ( CacheNotUpdatedReason :: CacheDisabled ) ,
508- Some ( ExecutionError :: PostRunFingerprint ( err) ) ,
509- ) ,
510553 }
554+ } else {
555+ // Execution failed with non-zero exit status — don't update cache
556+ ( CacheUpdateStatus :: NotUpdated ( CacheNotUpdatedReason :: NonZeroExitStatus ) , None )
511557 }
512558 } else {
513- // Execution failed with non-zero exit status — don't update cache
514- ( CacheUpdateStatus :: NotUpdated ( CacheNotUpdatedReason :: NonZeroExitStatus ) , None )
559+ // Caching was disabled for this task
560+ ( CacheUpdateStatus :: NotUpdated ( CacheNotUpdatedReason :: CacheDisabled ) , None )
515561 }
516- } else {
517- // Caching was disabled for this task
518- ( CacheUpdateStatus :: NotUpdated ( CacheNotUpdatedReason :: CacheDisabled ) , None )
519562 } ;
520563
521564 // 7. Finish the leaf execution with the result and optional cache error.
@@ -607,6 +650,68 @@ async fn spawn_inherited(
607650 Ok ( SpawnResult { exit_status, duration : start. elapsed ( ) } )
608651}
609652
653+ /// Collect output files and create a tar.zst archive in the cache directory.
654+ ///
655+ /// Output files are determined by:
656+ /// - fspy-tracked writes (when `output_config.includes_auto` is true)
657+ /// - Positive output globs (always, if configured)
658+ /// - Filtered by negative output globs
659+ ///
660+ /// Returns `Some(archive_filename)` if files were archived, `None` if no output files.
661+ fn collect_and_archive_outputs (
662+ cache_metadata : & vite_task_plan:: cache_metadata:: CacheMetadata ,
663+ path_accesses : Option < & TrackedPathAccesses > ,
664+ workspace_root : & AbsolutePath ,
665+ cache_dir : & AbsolutePath ,
666+ ) -> anyhow:: Result < Option < Str > > {
667+ let output_config = & cache_metadata. output_config ;
668+
669+ // Collect output files from auto-detection (fspy writes)
670+ let mut output_files: FxHashSet < RelativePathBuf > = FxHashSet :: default ( ) ;
671+
672+ if output_config. includes_auto
673+ && let Some ( pa) = path_accesses
674+ {
675+ output_files. extend ( pa. path_writes . iter ( ) . cloned ( ) ) ;
676+ }
677+
678+ // Collect output files from positive globs
679+ if !output_config. positive_globs . is_empty ( ) {
680+ let glob_paths = glob_inputs:: collect_glob_paths (
681+ workspace_root,
682+ & output_config. positive_globs ,
683+ & output_config. negative_globs ,
684+ ) ?;
685+ output_files. extend ( glob_paths) ;
686+ }
687+
688+ // Apply negative globs to auto-detected files
689+ if output_config. includes_auto && !output_config. negative_globs . is_empty ( ) {
690+ let negatives: Vec < wax:: Glob < ' static > > = output_config
691+ . negative_globs
692+ . iter ( )
693+ . map ( |p| Ok ( wax:: Glob :: new ( p. as_str ( ) ) ?. into_owned ( ) ) )
694+ . collect :: < anyhow:: Result < _ > > ( ) ?;
695+ output_files. retain ( |path| !negatives. iter ( ) . any ( |neg| neg. is_match ( path. as_str ( ) ) ) ) ;
696+ }
697+
698+ if output_files. is_empty ( ) {
699+ return Ok ( None ) ;
700+ }
701+
702+ // Sort for deterministic archive content
703+ let mut sorted_files: Vec < RelativePathBuf > = output_files. into_iter ( ) . collect ( ) ;
704+ sorted_files. sort ( ) ;
705+
706+ // Create archive with UUID filename
707+ let archive_name: Str = vite_str:: format!( "{}.tar.zst" , uuid:: Uuid :: new_v4( ) ) ;
708+ let archive_path = cache_dir. join ( archive_name. as_str ( ) ) ;
709+
710+ archive:: create_output_archive ( workspace_root, & sorted_files, & archive_path) ?;
711+
712+ Ok ( Some ( archive_name) )
713+ }
714+
610715/// Win32 Job Object utilities for process tree management.
611716///
612717/// On Windows, `TerminateProcess` only kills the direct child process, not its
@@ -733,6 +838,7 @@ impl Session<'_> {
733838 reporter : & reporter,
734839 cache,
735840 cache_base_path : & self . workspace_path ,
841+ cache_dir : & self . cache_path ,
736842 fast_fail_token : CancellationToken :: new ( ) ,
737843 interrupt_token,
738844 } ;
0 commit comments