33
44#![ expect( clippy:: unwrap_used) ]
55#![ expect( clippy:: expect_used) ]
6+ #![ expect( clippy:: panic) ]
67#![ expect( clippy:: use_debug) ]
78
89use std:: env;
9- use std:: fs:: File ;
10+ use std:: fs;
1011use std:: io;
1112use std:: path:: Path ;
1213use std:: path:: PathBuf ;
@@ -30,11 +31,10 @@ fn main() {
3031 // Output directory for compiled .ptx files - separate by profile.
3132 let kernels_gen = Path :: new ( & manifest_dir) . join ( "kernels/gen" ) . join ( & profile) ;
3233
33- std :: fs:: create_dir_all ( & kernels_gen) . expect ( "Failed to create kernels/gen directory" ) ;
34+ fs:: create_dir_all ( & kernels_gen) . expect ( "Failed to create kernels/gen directory" ) ;
3435
35- // Always emit the kernels output directory path as a compile-time env var so any binary
36- // linking against vortex-cuda can find the PTX files. This must be set regardless
37- // of CUDA availability since the code using env!() is always compiled.
36+ // Emit the kernels output directory path as a compile-time env var so any binary
37+ // linking against vortex-cuda can find the PTX files.
3838 // At runtime, VORTEX_CUDA_KERNELS_DIR can be set to override this path.
3939 println ! (
4040 "cargo:rustc-env=VORTEX_CUDA_KERNELS_DIR={}" ,
@@ -43,7 +43,7 @@ fn main() {
4343
4444 println ! ( "cargo:rerun-if-env-changed=PROFILE" ) ;
4545
46- // Regenerate bit_unpack kernels only when the generator changes
46+ // Regenerate bit_unpack kernels only when the generator changes.
4747 println ! (
4848 "cargo:rerun-if-changed={}" ,
4949 Path :: new( & manifest_dir)
@@ -63,55 +63,73 @@ fn main() {
6363 return ;
6464 }
6565
66- // Watch and compile .cu and .cuh files from kernels/src to PTX in kernels/gen
67- if let Ok ( entries) = std:: fs:: read_dir ( & kernels_src) {
68- for path in entries. flatten ( ) . map ( |entry| entry. path ( ) ) {
69- let is_generated = path
70- . file_name ( )
71- . and_then ( |n| n. to_str ( ) )
72- . is_some_and ( |n| n. starts_with ( "bit_unpack_" ) ) ;
66+ // Compile .cu files to PTX. We deliberately do NOT register .cu/.cuh files
67+ // with rerun-if-changed so that editing a .cu file does not trigger Rust
68+ // recompilation.
69+ let mut cu_files = Vec :: new ( ) ;
70+ let mut newest_header = std:: time:: SystemTime :: UNIX_EPOCH ;
7371
72+ if let Ok ( entries) = fs:: read_dir ( & kernels_src) {
73+ for path in entries. flatten ( ) . map ( |entry| entry. path ( ) ) {
7474 match path. extension ( ) . and_then ( |e| e. to_str ( ) ) {
7575 Some ( "cuh" ) | Some ( "h" ) => {
76- // Only watch hand-written .cuh/.h files, not generated ones
77- // (generated files are rebuilt when cuda_kernel_generator changes)
78- if !is_generated {
79- println ! ( "cargo:rerun-if-changed={}" , path. display( ) ) ;
76+ if let Ok ( mtime) = fs:: metadata ( & path) . and_then ( |m| m. modified ( ) ) {
77+ newest_header = newest_header. max ( mtime) ;
8078 }
8179 }
8280 Some ( "cu" ) => {
83- // Only watch hand-written .cu files, not generated ones
84- // (generated files are rebuilt when cuda_kernel_generator changes)
85- if !is_generated {
86- println ! ( "cargo:rerun-if-changed={}" , path. display( ) ) ;
87- }
88- // Compile all .cu files to PTX in gen directory
89- nvcc_compile_ptx ( & kernels_src, & kernels_gen, & path, & profile)
90- . map_err ( |e| {
91- format ! ( "Failed to compile CUDA kernel {}: {}" , path. display( ) , e)
92- } )
93- . unwrap ( ) ;
81+ cu_files. push ( path) ;
9482 }
9583 _ => { }
9684 }
9785 }
9886 }
87+
88+ // Only compile .cu files whose PTX is stale (older than the source or any header).
89+ for cu_path in & cu_files {
90+ let ptx_path = kernels_gen
91+ . join ( cu_path. file_name ( ) . unwrap ( ) )
92+ . with_extension ( "ptx" ) ;
93+
94+ let cu_mtime = fs:: metadata ( cu_path)
95+ . and_then ( |m| m. modified ( ) )
96+ . unwrap_or ( std:: time:: SystemTime :: UNIX_EPOCH ) ;
97+ let newest_input = cu_mtime. max ( newest_header) ;
98+
99+ let ptx_mtime = fs:: metadata ( & ptx_path) . and_then ( |m| m. modified ( ) ) . ok ( ) ;
100+ if ptx_mtime. is_some_and ( |t| t >= newest_input) {
101+ continue ;
102+ }
103+
104+ nvcc_compile_ptx ( & kernels_src, & kernels_gen, cu_path, & profile)
105+ . map_err ( |e| format ! ( "Failed to compile CUDA kernel {}: {}" , cu_path. display( ) , e) )
106+ . unwrap ( ) ;
107+ }
99108}
100109
101- fn generate_unpack < T : FastLanes > ( output_dir : & Path , thread_count : usize ) -> io:: Result < PathBuf > {
110+ fn generate_unpack < T : FastLanes > ( output_dir : & Path , thread_count : usize ) -> io:: Result < ( ) > {
102111 // Generate the lanes header (.cuh) — device functions only, no __global__ kernels.
103112 // This is what dynamic_dispatch.cu includes (via bit_unpack.cuh).
104113 let cuh_path = output_dir. join ( format ! ( "bit_unpack_{}_lanes.cuh" , T :: T ) ) ;
105- let mut cuh_file = File :: create ( & cuh_path) ?;
106- generate_cuda_unpack_lanes :: < T > ( & mut cuh_file) ?;
114+ let mut cuh_buf = Vec :: new ( ) ;
115+ generate_cuda_unpack_lanes :: < T > ( & mut cuh_buf) ?;
116+ write_if_changed ( & cuh_path, & cuh_buf) ;
107117
108118 // Generate the standalone kernels (.cu) — includes the lanes header,
109119 // adds _device template + __global__ wrappers. Compiled to its own PTX.
110120 let cu_path = output_dir. join ( format ! ( "bit_unpack_{}.cu" , T :: T ) ) ;
111- let mut cu_file = File :: create ( & cu_path) ?;
112- generate_cuda_unpack_kernels :: < T > ( & mut cu_file, thread_count) ?;
121+ let mut cu_buf = Vec :: new ( ) ;
122+ generate_cuda_unpack_kernels :: < T > ( & mut cu_buf, thread_count) ?;
123+ write_if_changed ( & cu_path, & cu_buf) ;
113124
114- Ok ( cu_path)
125+ Ok ( ( ) )
126+ }
127+
128+ fn write_if_changed ( path : & Path , content : & [ u8 ] ) {
129+ if fs:: read ( path) . is_ok_and ( |existing| existing == content) {
130+ return ;
131+ }
132+ fs:: write ( path, content) . unwrap_or_else ( |e| panic ! ( "Failed to write {}: {e}" , path. display( ) ) ) ;
115133}
116134
117135fn nvcc_compile_ptx (
@@ -223,14 +241,13 @@ fn generate_patches_bindings(kernels_src: &Path, out_dir: &Path) {
223241 . derive_copy ( true )
224242 . derive_debug ( true )
225243 . generate ( )
226- . expect ( "Failed to generate dynamic_dispatch bindings" ) ;
244+ . expect ( "Failed to generate patches bindings" ) ;
227245
228246 bindings
229247 . write_to_file ( out_dir. join ( "patches.rs" ) )
230248 . expect ( "Failed to write patches.rs" ) ;
231249}
232250
233- /// Check if CUDA is available based on nvcc.
234251fn is_cuda_available ( ) -> bool {
235252 Command :: new ( "nvcc" )
236253 . arg ( "--version" )
0 commit comments