66#![ expect( clippy:: use_debug) ]
77
88use std:: env;
9- use std:: fs;
9+ use std:: fs:: File ;
1010use std:: io;
1111use std:: path:: Path ;
1212use std:: path:: PathBuf ;
@@ -27,13 +27,13 @@ fn main() {
2727
2828 // Source directory for kernels (hand-written and generated .cu/.cuh files)
2929 let kernels_src = Path :: new ( & manifest_dir) . join ( "kernels/src" ) ;
30- // Output directory for compiled CUDA module files - separate by profile.
30+ // Output directory for compiled .ptx files - separate by profile.
3131 let kernels_gen = Path :: new ( & manifest_dir) . join ( "kernels/gen" ) . join ( & profile) ;
3232
33- fs:: create_dir_all ( & kernels_gen) . expect ( "Failed to create kernels/gen directory" ) ;
33+ std :: fs:: create_dir_all ( & kernels_gen) . expect ( "Failed to create kernels/gen directory" ) ;
3434
3535 // Always emit the kernels output directory path as a compile-time env var so any binary
36- // linking against vortex-cuda can find the CUDA module files. This must be set regardless
36+ // linking against vortex-cuda can find the PTX files. This must be set regardless
3737 // of CUDA availability since the code using env!() is always compiled.
3838 // At runtime, VORTEX_CUDA_KERNELS_DIR can be set to override this path.
3939 println ! (
@@ -64,8 +64,8 @@ fn main() {
6464 return ;
6565 }
6666
67- // Watch and compile .cu and .cuh files from kernels/src to CUDA modules in kernels/gen
68- if let Ok ( entries) = fs:: read_dir ( & kernels_src) {
67+ // Watch and compile .cu and .cuh files from kernels/src to PTX in kernels/gen
68+ if let Ok ( entries) = std :: fs:: read_dir ( & kernels_src) {
6969 for path in entries. flatten ( ) . map ( |entry| entry. path ( ) ) {
7070 let is_generated = path
7171 . file_name ( )
@@ -86,8 +86,8 @@ fn main() {
8686 if !is_generated {
8787 println ! ( "cargo:rerun-if-changed={}" , path. display( ) ) ;
8888 }
89- // Compile all .cu files to CUDA fatbins in gen directory
90- nvcc_compile_fatbin ( & kernels_src, & kernels_gen, & path, & profile)
89+ // Compile all .cu files to PTX in gen directory
90+ nvcc_compile_ptx ( & kernels_src, & kernels_gen, & path, & profile)
9191 . map_err ( |e| {
9292 format ! ( "Failed to compile CUDA kernel {}: {}" , path. display( ) , e)
9393 } )
@@ -103,19 +103,19 @@ fn generate_unpack<T: FastLanes>(output_dir: &Path, thread_count: usize) -> io::
103103 // Generate the lanes header (.cuh) — device functions only, no __global__ kernels.
104104 // This is what dynamic_dispatch.cu includes (via bit_unpack.cuh).
105105 let cuh_path = output_dir. join ( format ! ( "bit_unpack_{}_lanes.cuh" , T :: T ) ) ;
106- let mut cuh_file = fs :: File :: create ( & cuh_path) ?;
106+ let mut cuh_file = File :: create ( & cuh_path) ?;
107107 generate_cuda_unpack_lanes :: < T > ( & mut cuh_file) ?;
108108
109109 // Generate the standalone kernels (.cu) — includes the lanes header,
110- // adds _device template + __global__ wrappers. Compiled to its own CUDA module .
110+ // adds _device template + __global__ wrappers. Compiled to its own PTX .
111111 let cu_path = output_dir. join ( format ! ( "bit_unpack_{}.cu" , T :: T ) ) ;
112- let mut cu_file = fs :: File :: create ( & cu_path) ?;
112+ let mut cu_file = File :: create ( & cu_path) ?;
113113 generate_cuda_unpack_kernels :: < T > ( & mut cu_file, thread_count) ?;
114114
115115 Ok ( cu_path)
116116}
117117
118- fn nvcc_compile_fatbin (
118+ fn nvcc_compile_ptx (
119119 include_dir : & Path ,
120120 output_dir : & Path ,
121121 cu_path : & Path ,
@@ -148,24 +148,23 @@ fn nvcc_compile_fatbin(
148148 cmd. arg ( "-O3" ) ;
149149 }
150150
151- // Output CUDA fatbin file goes to output_dir with same base name.
152- let fatbin_path = output_dir
151+ // Output PTX file goes to output_dir with same base name
152+ let ptx_path = output_dir
153153 . join ( cu_path. file_name ( ) . unwrap ( ) )
154- . with_extension ( "fatbin " ) ;
154+ . with_extension ( "ptx " ) ;
155155
156- // Embed a single PTX image for Ampere and newer GPUs. The driver JIT-compiles
157- // PTX to the target GPU's SASS at runtime.
158156 cmd. arg ( "-std=c++20" )
159- . arg ( "-gencode= arch=compute_80,code=compute_80 " )
157+ . arg ( "-arch=native " )
160158 // Flags forwarded to Clang.
161159 . arg ( "--compiler-options=-Wall -Wextra -Wpedantic -Werror" )
162160 . arg ( "--restrict" )
163- . arg ( "--fatbin " )
161+ . arg ( "--ptx " )
164162 . arg ( "--include-path" )
165163 . arg ( include_dir)
164+ . arg ( "-c" )
166165 . arg ( cu_path)
167166 . arg ( "-o" )
168- . arg ( & fatbin_path ) ;
167+ . arg ( & ptx_path ) ;
169168
170169 let res = cmd. output ( ) ?;
171170
0 commit comments