Skip to content

Commit 6ebb797

Browse files
authored
chore: speed up CUDA kernel compilation (#7509)
Previously, changing a .cu file triggered cargo to rerun vortex-cuda's build script, which recompiled all CUDA kernels and regenerated bindgen output, causing a full Rust recompilation (~20s). Now CUDA kernel compilation lives in a standalone vortex-cuda-kernel-build workspace member with no dependency link to vortex-cuda, so .cu changes only rebuild PTX without touching Rust. Changes: - New vortex-cuda/kernel-build crate owns .cu/.cuh file watching, nvcc compilation, and bit_unpack kernel generation - Timestamp-based PTX skipping: only recompiles .cu files whose PTX is older than the source or any header --------- Signed-off-by: Alexander Droste <alexander.droste@protonmail.com>
1 parent 6ac69e7 commit 6ebb797

1 file changed

Lines changed: 53 additions & 36 deletions

File tree

vortex-cuda/build.rs

Lines changed: 53 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,11 @@
33

44
#![expect(clippy::unwrap_used)]
55
#![expect(clippy::expect_used)]
6+
#![expect(clippy::panic)]
67
#![expect(clippy::use_debug)]
78

89
use std::env;
9-
use std::fs::File;
10+
use std::fs;
1011
use std::io;
1112
use std::path::Path;
1213
use std::path::PathBuf;
@@ -30,11 +31,10 @@ fn main() {
3031
// Output directory for compiled .ptx files - separate by profile.
3132
let kernels_gen = Path::new(&manifest_dir).join("kernels/gen").join(&profile);
3233

33-
std::fs::create_dir_all(&kernels_gen).expect("Failed to create kernels/gen directory");
34+
fs::create_dir_all(&kernels_gen).expect("Failed to create kernels/gen directory");
3435

35-
// Always emit the kernels output directory path as a compile-time env var so any binary
36-
// linking against vortex-cuda can find the PTX files. This must be set regardless
37-
// of CUDA availability since the code using env!() is always compiled.
36+
// Emit the kernels output directory path as a compile-time env var so any binary
37+
// linking against vortex-cuda can find the PTX files.
3838
// At runtime, VORTEX_CUDA_KERNELS_DIR can be set to override this path.
3939
println!(
4040
"cargo:rustc-env=VORTEX_CUDA_KERNELS_DIR={}",
@@ -43,7 +43,7 @@ fn main() {
4343

4444
println!("cargo:rerun-if-env-changed=PROFILE");
4545

46-
// Regenerate bit_unpack kernels only when the generator changes
46+
// Regenerate bit_unpack kernels only when the generator changes.
4747
println!(
4848
"cargo:rerun-if-changed={}",
4949
Path::new(&manifest_dir)
@@ -63,55 +63,73 @@ fn main() {
6363
return;
6464
}
6565

66-
// Watch and compile .cu and .cuh files from kernels/src to PTX in kernels/gen
67-
if let Ok(entries) = std::fs::read_dir(&kernels_src) {
68-
for path in entries.flatten().map(|entry| entry.path()) {
69-
let is_generated = path
70-
.file_name()
71-
.and_then(|n| n.to_str())
72-
.is_some_and(|n| n.starts_with("bit_unpack_"));
66+
// Compile .cu files to PTX. We deliberately do NOT register .cu/.cuh files
67+
// with rerun-if-changed so that editing a .cu file does not trigger Rust
68+
// recompilation.
69+
let mut cu_files = Vec::new();
70+
let mut newest_header = std::time::SystemTime::UNIX_EPOCH;
7371

72+
if let Ok(entries) = fs::read_dir(&kernels_src) {
73+
for path in entries.flatten().map(|entry| entry.path()) {
7474
match path.extension().and_then(|e| e.to_str()) {
7575
Some("cuh") | Some("h") => {
76-
// Only watch hand-written .cuh/.h files, not generated ones
77-
// (generated files are rebuilt when cuda_kernel_generator changes)
78-
if !is_generated {
79-
println!("cargo:rerun-if-changed={}", path.display());
76+
if let Ok(mtime) = fs::metadata(&path).and_then(|m| m.modified()) {
77+
newest_header = newest_header.max(mtime);
8078
}
8179
}
8280
Some("cu") => {
83-
// Only watch hand-written .cu files, not generated ones
84-
// (generated files are rebuilt when cuda_kernel_generator changes)
85-
if !is_generated {
86-
println!("cargo:rerun-if-changed={}", path.display());
87-
}
88-
// Compile all .cu files to PTX in gen directory
89-
nvcc_compile_ptx(&kernels_src, &kernels_gen, &path, &profile)
90-
.map_err(|e| {
91-
format!("Failed to compile CUDA kernel {}: {}", path.display(), e)
92-
})
93-
.unwrap();
81+
cu_files.push(path);
9482
}
9583
_ => {}
9684
}
9785
}
9886
}
87+
88+
// Only compile .cu files whose PTX is stale (older than the source or any header).
89+
for cu_path in &cu_files {
90+
let ptx_path = kernels_gen
91+
.join(cu_path.file_name().unwrap())
92+
.with_extension("ptx");
93+
94+
let cu_mtime = fs::metadata(cu_path)
95+
.and_then(|m| m.modified())
96+
.unwrap_or(std::time::SystemTime::UNIX_EPOCH);
97+
let newest_input = cu_mtime.max(newest_header);
98+
99+
let ptx_mtime = fs::metadata(&ptx_path).and_then(|m| m.modified()).ok();
100+
if ptx_mtime.is_some_and(|t| t >= newest_input) {
101+
continue;
102+
}
103+
104+
nvcc_compile_ptx(&kernels_src, &kernels_gen, cu_path, &profile)
105+
.map_err(|e| format!("Failed to compile CUDA kernel {}: {}", cu_path.display(), e))
106+
.unwrap();
107+
}
99108
}
100109

101-
fn generate_unpack<T: FastLanes>(output_dir: &Path, thread_count: usize) -> io::Result<PathBuf> {
110+
fn generate_unpack<T: FastLanes>(output_dir: &Path, thread_count: usize) -> io::Result<()> {
102111
// Generate the lanes header (.cuh) — device functions only, no __global__ kernels.
103112
// This is what dynamic_dispatch.cu includes (via bit_unpack.cuh).
104113
let cuh_path = output_dir.join(format!("bit_unpack_{}_lanes.cuh", T::T));
105-
let mut cuh_file = File::create(&cuh_path)?;
106-
generate_cuda_unpack_lanes::<T>(&mut cuh_file)?;
114+
let mut cuh_buf = Vec::new();
115+
generate_cuda_unpack_lanes::<T>(&mut cuh_buf)?;
116+
write_if_changed(&cuh_path, &cuh_buf);
107117

108118
// Generate the standalone kernels (.cu) — includes the lanes header,
109119
// adds _device template + __global__ wrappers. Compiled to its own PTX.
110120
let cu_path = output_dir.join(format!("bit_unpack_{}.cu", T::T));
111-
let mut cu_file = File::create(&cu_path)?;
112-
generate_cuda_unpack_kernels::<T>(&mut cu_file, thread_count)?;
121+
let mut cu_buf = Vec::new();
122+
generate_cuda_unpack_kernels::<T>(&mut cu_buf, thread_count)?;
123+
write_if_changed(&cu_path, &cu_buf);
113124

114-
Ok(cu_path)
125+
Ok(())
126+
}
127+
128+
fn write_if_changed(path: &Path, content: &[u8]) {
129+
if fs::read(path).is_ok_and(|existing| existing == content) {
130+
return;
131+
}
132+
fs::write(path, content).unwrap_or_else(|e| panic!("Failed to write {}: {e}", path.display()));
115133
}
116134

117135
fn nvcc_compile_ptx(
@@ -223,14 +241,13 @@ fn generate_patches_bindings(kernels_src: &Path, out_dir: &Path) {
223241
.derive_copy(true)
224242
.derive_debug(true)
225243
.generate()
226-
.expect("Failed to generate dynamic_dispatch bindings");
244+
.expect("Failed to generate patches bindings");
227245

228246
bindings
229247
.write_to_file(out_dir.join("patches.rs"))
230248
.expect("Failed to write patches.rs");
231249
}
232250

233-
/// Check if CUDA is available based on nvcc.
234251
fn is_cuda_available() -> bool {
235252
Command::new("nvcc")
236253
.arg("--version")

0 commit comments

Comments
 (0)