diff --git a/.cargo/config.toml b/.cargo/config.toml index cdd03a35..b41f6ee1 100644 --- a/.cargo/config.toml +++ b/.cargo/config.toml @@ -1,2 +1,3 @@ [alias] xtask = "run -p xtask --bin xtask --" +compiletest = "run --release -p compiletests --" diff --git a/.github/workflows/ci_linux.yml b/.github/workflows/ci_linux.yml index 18c05def..fe12895a 100644 --- a/.github/workflows/ci_linux.yml +++ b/.github/workflows/ci_linux.yml @@ -153,3 +153,18 @@ jobs: MODAL_TOKEN_SECRET: ${{ secrets.MODAL_TOKEN_SECRET }} run: | echo "Stubbed out" + compiletest: + name: Compile tests + runs-on: ubuntu-latest + container: + image: "ghcr.io/rust-gpu/rust-cuda-ubuntu24-cuda12:latest" + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Run cargo version + run: cargo --version + - name: Rustfmt compiletests + shell: bash + run: shopt -s globstar && rustfmt --check tests/compiletests/ui/**/*.rs + - name: Compiletest + run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_70,compute_90 diff --git a/.github/workflows/ci_windows.yml b/.github/workflows/ci_windows.yml index a5e1055a..654e24ec 100644 --- a/.github/workflows/ci_windows.yml +++ b/.github/workflows/ci_windows.yml @@ -26,7 +26,18 @@ jobs: target: x86_64-pc-windows-msvc cuda: "12.8.1" linux-local-args: [] - sub-packages: ["nvcc", "nvrtc", "nvrtc_dev", "cuda_profiler_api", "cudart", "cublas", "cublas_dev", "curand", "curand_dev"] + sub-packages: + [ + "nvcc", + "nvrtc", + "nvrtc_dev", + "cuda_profiler_api", + "cudart", + "cublas", + "cublas_dev", + "curand", + "curand_dev", + ] steps: - name: Checkout repository @@ -41,7 +52,7 @@ jobs: linux-local-args: ${{ toJson(matrix.linux-local-args) }} use-local-cache: false sub-packages: ${{ toJson(matrix.sub-packages) }} - log-file-suffix: '${{matrix.os}}-${{matrix.cuda}}' + log-file-suffix: "${{matrix.os}}-${{matrix.cuda}}" - name: Verify CUDA installation run: nvcc --version @@ -76,3 +87,6 @@ jobs: env: RUSTDOCFLAGS: -Dwarnings run: cargo doc --workspace --all-features --document-private-items --no-deps --exclude "optix*" --exclude "path-tracer" --exclude "denoiser" --exclude "vecadd*" --exclude "gemm*" --exclude "ex*" --exclude "cudnn*" --exclude "cust_raw" + # Disabled due to dll issues, someone with Windows knowledge needed + # - name: Compiletest + # run: cargo run -p compiletests --release --no-default-features -- --target-arch compute_61,compute_70,compute_90 diff --git a/Cargo.toml b/Cargo.toml index 4c495879..c10feff4 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,6 +16,8 @@ members = [ "examples/cuda/path_tracer/kernels", "examples/optix/*", + "tests/compiletests", + "tests/compiletests/deps-helper", ] exclude = [ @@ -24,3 +26,7 @@ exclude = [ [profile.dev.package.rustc_codegen_nvvm] opt-level = 3 + +[workspace.dependencies] +cuda_std = { path = "crates/cuda_std" } +cuda_builder = { path = "crates/cuda_builder" } diff --git a/crates/rustc_codegen_nvvm/src/context.rs b/crates/rustc_codegen_nvvm/src/context.rs index 6392705e..253bb457 100644 --- a/crates/rustc_codegen_nvvm/src/context.rs +++ b/crates/rustc_codegen_nvvm/src/context.rs @@ -551,25 +551,40 @@ impl<'ll, 'tcx> CodegenCx<'ll, 'tcx> { } } +#[derive(Clone)] +pub enum DisassembleMode { + All, + Function(String), + Entry(String), + Globals, +} + #[derive(Default, Clone)] pub struct CodegenArgs { pub nvvm_options: Vec, pub override_libm: bool, pub use_constant_memory_space: bool, pub final_module_path: Option, + pub disassemble: Option, } impl CodegenArgs { pub fn from_session(sess: &Session) -> Self { - Self::parse(&sess.opts.cg.llvm_args) + Self::parse(&sess.opts.cg.llvm_args, sess) } // we may want to use rustc's own option parsing facilities to have better errors in the future. - pub fn parse(args: &[String]) -> Self { + pub fn parse(args: &[String], sess: &Session) -> Self { // TODO: replace this with a "proper" arg parser. let mut cg_args = Self::default(); + let mut skip_next = false; for (idx, arg) in args.iter().enumerate() { + if skip_next { + skip_next = false; + continue; + } + if let Ok(flag) = NvvmOption::from_str(arg) { cg_args.nvvm_options.push(flag); } else if arg == "--override-libm" { @@ -577,9 +592,40 @@ impl CodegenArgs { } else if arg == "--use-constant-memory-space" { cg_args.use_constant_memory_space = true; } else if arg == "--final-module-path" { - cg_args.final_module_path = Some(PathBuf::from( - args.get(idx + 1).expect("No path for --final-module-path"), - )); + let path = match args.get(idx + 1) { + Some(p) => p, + None => sess + .dcx() + .fatal("--final-module-path requires a path argument"), + }; + cg_args.final_module_path = Some(PathBuf::from(path)); + skip_next = true; + } else if arg == "--disassemble" { + cg_args.disassemble = Some(DisassembleMode::All); + } else if arg == "--disassemble-globals" { + cg_args.disassemble = Some(DisassembleMode::Globals); + } else if arg == "--disassemble-fn" { + let func_name = match args.get(idx + 1) { + Some(name) => name.clone(), + None => sess + .dcx() + .fatal("--disassemble-fn requires a function name argument"), + }; + cg_args.disassemble = Some(DisassembleMode::Function(func_name)); + skip_next = true; + } else if let Some(func) = arg.strip_prefix("--disassemble-fn=") { + cg_args.disassemble = Some(DisassembleMode::Function(func.to_string())); + } else if arg == "--disassemble-entry" { + let entry_name = match args.get(idx + 1) { + Some(name) => name.clone(), + None => sess + .dcx() + .fatal("--disassemble-entry requires an entry name argument"), + }; + cg_args.disassemble = Some(DisassembleMode::Entry(entry_name)); + skip_next = true; + } else if let Some(entry) = arg.strip_prefix("--disassemble-entry=") { + cg_args.disassemble = Some(DisassembleMode::Entry(entry.to_string())); } } diff --git a/crates/rustc_codegen_nvvm/src/lib.rs b/crates/rustc_codegen_nvvm/src/lib.rs index a0656a69..825748ca 100644 --- a/crates/rustc_codegen_nvvm/src/lib.rs +++ b/crates/rustc_codegen_nvvm/src/lib.rs @@ -52,6 +52,7 @@ mod lto; mod mono_item; mod nvvm; mod override_fns; +mod ptx_filter; mod target; mod ty; @@ -216,7 +217,7 @@ impl CodegenBackend for NvvmCodegenBackend { let cmdline = sess.opts.cg.target_feature.split(','); let cfg = sess.target.options.features.split(','); - let target_features: Vec<_> = cfg + let mut target_features: Vec<_> = cfg .chain(cmdline) .filter(|l| l.starts_with('+')) .map(|l| &l[1..]) @@ -224,6 +225,22 @@ impl CodegenBackend for NvvmCodegenBackend { .map(rustc_span::Symbol::intern) .collect(); + // Add backend-synthesized features (e.g., hierarchical compute capabilities) + // Parse CodegenArgs to get the architecture from llvm-args + let args = context::CodegenArgs::from_session(sess); + for opt in &args.nvvm_options { + if let ::nvvm::NvvmOption::Arch(arch) = opt { + // Add all features up to and including the current architecture + let backend_features = arch.all_target_features(); + target_features.extend( + backend_features + .iter() + .map(|f| rustc_span::Symbol::intern(f)), + ); + break; + } + } + // For NVPTX, all target features are stable let unstable_target_features = target_features.clone(); diff --git a/crates/rustc_codegen_nvvm/src/link.rs b/crates/rustc_codegen_nvvm/src/link.rs index 9cfe1e5a..b84812c2 100644 --- a/crates/rustc_codegen_nvvm/src/link.rs +++ b/crates/rustc_codegen_nvvm/src/link.rs @@ -30,6 +30,7 @@ use tracing::{debug, trace}; use crate::LlvmMod; use crate::context::CodegenArgs; +use crate::ptx_filter::{PtxFilter, PtxFilterConfig}; pub(crate) struct NvvmMetadataLoader; @@ -305,6 +306,31 @@ fn codegen_into_ptx_file( } }; + // If disassembly is requested, print PTX to stderr + if args.disassemble.is_some() + && let Ok(ptx_str) = std::str::from_utf8(&ptx_bytes) + { + let config = PtxFilterConfig::from_codegen_args(&args); + let filter = PtxFilter::new(config); + let output = filter.filter(ptx_str); + if !output.is_empty() { + // Check if we're in JSON mode by checking the error format + use rustc_session::config::ErrorOutputType; + match sess.opts.error_format { + ErrorOutputType::Json { .. } => { + sess.dcx() + .err("PTX disassembly output in JSON mode is not supported"); + } + _ => { + // In normal mode, just print to stderr + // Replace tabs with spaces for cleaner output + let output = output.replace('\t', " "); + eprintln!("{output}"); + } + } + } + } + std::fs::write(out_filename, ptx_bytes) } diff --git a/crates/rustc_codegen_nvvm/src/ptx_filter.rs b/crates/rustc_codegen_nvvm/src/ptx_filter.rs new file mode 100644 index 00000000..4b695600 --- /dev/null +++ b/crates/rustc_codegen_nvvm/src/ptx_filter.rs @@ -0,0 +1,454 @@ +/// What to include when filtering PTX output +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum PtxOutputMode { + /// Include everything + All, + /// Include only function declarations (no bodies) + DeclarationsOnly, + /// Include specific functions based on filter + Filtered, +} + +/// Filter for selecting specific functions +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum FunctionFilter { + /// Include all functions + All, + /// Include functions with names containing this string + ByName(String), + /// Include only entry points with names containing this string + EntryPoint(String), +} + +/// Configuration for filtering PTX output +#[derive(Debug, Clone)] +pub struct PtxFilterConfig { + /// What content to include + pub mode: PtxOutputMode, + + /// Filter for selecting functions (only used when mode is Filtered) + pub function_filter: FunctionFilter, + + /// What additional content to include + pub include_header: bool, + pub include_globals: bool, +} + +impl Default for PtxFilterConfig { + fn default() -> Self { + Self { + mode: PtxOutputMode::Filtered, + function_filter: FunctionFilter::All, + include_header: true, + include_globals: true, + } + } +} + +impl PtxFilterConfig { + /// Create a config that includes everything + pub fn all() -> Self { + Self { + mode: PtxOutputMode::All, + function_filter: FunctionFilter::All, + include_header: true, + include_globals: true, + } + } + + /// Create a config for declarations only + pub fn declarations_only() -> Self { + Self { + mode: PtxOutputMode::DeclarationsOnly, + function_filter: FunctionFilter::All, + include_header: true, + include_globals: true, + } + } + + /// Create a config that filters by function name + pub fn by_function_name(name: impl Into) -> Self { + Self { + mode: PtxOutputMode::Filtered, + function_filter: FunctionFilter::ByName(name.into()), + include_header: false, + include_globals: false, + } + } + + /// Create a config that filters by entry point name + pub fn by_entry_point(name: impl Into) -> Self { + Self { + mode: PtxOutputMode::Filtered, + function_filter: FunctionFilter::EntryPoint(name.into()), + include_header: false, + include_globals: false, + } + } + + /// Create a config from CodegenArgs + pub fn from_codegen_args(args: &crate::context::CodegenArgs) -> Self { + use crate::context::DisassembleMode; + match &args.disassemble { + Some(DisassembleMode::All) => Self::all(), + Some(DisassembleMode::Globals) => Self::declarations_only(), + Some(DisassembleMode::Function(func_name)) => Self::by_function_name(func_name), + Some(DisassembleMode::Entry(entry_name)) => Self::by_entry_point(entry_name), + None => Self::default(), + } + } +} + +/// PTX output filter that processes PTX assembly based on configuration +pub struct PtxFilter { + config: PtxFilterConfig, +} + +impl PtxFilter { + pub fn new(config: PtxFilterConfig) -> Self { + Self { config } + } + + /// Filter PTX content based on the configuration + pub fn filter(&self, ptx: &str) -> String { + // If mode is All, return everything + if self.config.mode == PtxOutputMode::All { + return ptx.to_string(); + } + + let parsed = PtxContent::parse(ptx); + parsed.format(&self.config) + } +} + +/// Parsed PTX content +#[derive(Debug, Default)] +struct PtxContent { + header_lines: Vec, + globals: Vec, + functions: Vec, +} + +/// A parsed PTX function +#[derive(Debug)] +struct PtxFunction { + name: String, + is_entry: bool, + declaration_line: String, + body_lines: Vec, +} + +impl PtxContent { + /// Parse PTX text into structured content + fn parse(ptx: &str) -> Self { + let mut content = Self::default(); + let mut in_function = false; + let mut current_function: Option = None; + + for line in ptx.lines() { + if Self::is_header_line(line) { + content.header_lines.push(line.to_string()); + } else if Self::is_global_line(line) && !in_function { + content.globals.push(line.to_string()); + } else if let Some(func) = Self::parse_function_start(line) { + // Save previous function if any + if let Some(f) = current_function.take() { + content.functions.push(f); + } + current_function = Some(func); + in_function = true; + } else if in_function && let Some(ref mut func) = current_function { + func.body_lines.push(line.to_string()); + if line.trim() == "}" { + content.functions.push(current_function.take().unwrap()); + in_function = false; + } + } + } + + // Handle case where file ends while in function + if let Some(func) = current_function { + content.functions.push(func); + } + + content + } + + fn is_header_line(line: &str) -> bool { + line.starts_with(".version") + || line.starts_with(".target") + || line.starts_with(".address_size") + } + + fn is_global_line(line: &str) -> bool { + line.contains(".global") || line.contains(".const") || line.contains(".shared") + } + + fn parse_function_start(line: &str) -> Option { + if line.contains(".func") || line.contains(".entry") { + Some(PtxFunction { + name: Self::extract_function_name(line), + is_entry: line.contains(".entry"), + declaration_line: line.to_string(), + body_lines: vec![], + }) + } else { + None + } + } + + fn extract_function_name(line: &str) -> String { + // Look for patterns like: + // .visible .entry kernel_main( + // .func (.reg .u32 %ret) helper_func() + // .entry simple_kernel ( + + // Strategy: Find all potential function names (valid identifiers) + // The last one before the final '(' is usually the function name + let mut potential_names = Vec::new(); + let mut current_word = String::new(); + let mut paren_depth = 0; + + for ch in line.chars() { + match ch { + '(' => { + // Save any current word before we see a paren + if !current_word.is_empty() + && current_word + .chars() + .all(|c| c.is_alphanumeric() || c == '_') + && paren_depth == 0 + { + potential_names.push(current_word.clone()); + } + current_word.clear(); + paren_depth += 1; + } + ')' => { + current_word.clear(); + if paren_depth > 0 { + paren_depth -= 1; + } + } + ' ' | '\t' | ',' | '.' => { + if !current_word.is_empty() + && current_word + .chars() + .all(|c| c.is_alphanumeric() || c == '_') + && paren_depth == 0 + { + // This is a word at depth 0 (not inside parentheses) + potential_names.push(current_word.clone()); + } + current_word.clear(); + } + _ => { + if ch.is_alphanumeric() || ch == '_' { + current_word.push(ch); + } else { + current_word.clear(); + } + } + } + } + + // Handle case where line ends with the function name + if !current_word.is_empty() + && current_word + .chars() + .all(|c| c.is_alphanumeric() || c == '_') + { + potential_names.push(current_word); + } + + // Return the last potential name found, or empty string + potential_names.into_iter().last().unwrap_or_default() + } + + /// Format the parsed content according to the configuration + fn format(&self, config: &PtxFilterConfig) -> String { + let mut output = String::new(); + + // Add header if requested + if config.include_header { + for line in &self.header_lines { + output.push_str(line); + output.push('\n'); + } + } + + // Add globals if requested + if config.include_globals { + for line in &self.globals { + output.push_str(line); + output.push('\n'); + } + } + + // Add functions based on mode + match config.mode { + PtxOutputMode::All => { + // Already handled above + unreachable!() + } + PtxOutputMode::DeclarationsOnly => { + for func in &self.functions { + output.push_str(&func.declaration_line); + output.push_str(" { ... }\n\n"); + } + } + PtxOutputMode::Filtered => { + for func in &self.functions { + if self.should_include_function(func, &config.function_filter) { + output.push_str(&func.declaration_line); + output.push('\n'); + for line in &func.body_lines { + output.push_str(line); + output.push('\n'); + } + output.push('\n'); + } + } + } + } + + output + } + + fn should_include_function(&self, func: &PtxFunction, filter: &FunctionFilter) -> bool { + match filter { + FunctionFilter::All => true, + FunctionFilter::ByName(name) => func.name.contains(name), + FunctionFilter::EntryPoint(name) => func.is_entry && func.name.contains(name), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + const SAMPLE_PTX: &str = r#".version 8.7 +.target sm_61, debug +.address_size 64 + +.global .align 4 .u32 global_var = 42; + +.visible .entry kernel_main( + .param .u64 kernel_main_param_0 +) +{ + .reg .u64 %r1; + ld.param.u64 %r1, [kernel_main_param_0]; + ret; +} + +.func (.reg .u32 %ret) helper_func() +{ + .reg .u32 %r1; + mov.u32 %r1, 10; + mov.u32 %ret, %r1; + ret; +} + +.visible .entry another_kernel() +{ + ret; +} +"#; + + #[test] + fn test_filter_all() { + let config = PtxFilterConfig::all(); + let filter = PtxFilter::new(config); + let result = filter.filter(SAMPLE_PTX); + assert_eq!(result, SAMPLE_PTX); + } + + #[test] + fn test_filter_by_entry_point() { + let config = PtxFilterConfig::by_entry_point("kernel_main"); + let filter = PtxFilter::new(config); + let result = filter.filter(SAMPLE_PTX); + + // Should NOT include header or globals with new config + assert!(!result.contains(".version 8.7")); + assert!(!result.contains(".target sm_61")); + assert!(!result.contains(".address_size 64")); + assert!(!result.contains(".global .align 4 .u32 global_var")); + + // Should include kernel_main + assert!(result.contains(".visible .entry kernel_main")); + assert!(result.contains("ld.param.u64 %r1")); + + // Should NOT include helper_func or another_kernel + assert!(!result.contains("helper_func")); + assert!(!result.contains("another_kernel")); + } + + #[test] + fn test_filter_by_function_name() { + let config = PtxFilterConfig::by_function_name("helper_func"); + let filter = PtxFilter::new(config); + let result = filter.filter(SAMPLE_PTX); + + // Should NOT include header with new config + assert!(!result.contains(".version 8.7")); + + // Should include helper_func + assert!(result.contains(".func (.reg .u32 %ret) helper_func")); + assert!(result.contains("mov.u32 %r1, 10")); + + // Should NOT include kernels + assert!(!result.contains("kernel_main")); + assert!(!result.contains("another_kernel")); + } + + #[test] + fn test_declarations_only() { + let config = PtxFilterConfig::declarations_only(); + let filter = PtxFilter::new(config); + let result = filter.filter(SAMPLE_PTX); + + // Should include header + assert!(result.contains(".version 8.7")); + + // Should include globals + assert!(result.contains(".global .align 4 .u32 global_var")); + + // Should include function declarations but not bodies + assert!(result.contains(".visible .entry kernel_main")); + assert!(result.contains(" { ... }")); + assert!(!result.contains("ld.param.u64")); + } + + #[test] + fn test_partial_name_match() { + let config = PtxFilterConfig::by_entry_point("kernel"); + let filter = PtxFilter::new(config); + let result = filter.filter(SAMPLE_PTX); + + // Should include both kernels that contain "kernel" + assert!(result.contains("kernel_main")); + assert!(result.contains("another_kernel")); + + // Should NOT include helper_func + assert!(!result.contains("helper_func")); + } + + #[test] + fn test_extract_function_name() { + assert_eq!( + PtxContent::extract_function_name(".visible .entry kernel_main("), + "kernel_main" + ); + assert_eq!( + PtxContent::extract_function_name(".func (.reg .u32 %ret) helper_func()"), + "helper_func" + ); + assert_eq!( + PtxContent::extract_function_name(".entry simple_kernel ("), + "simple_kernel" + ); + } +} diff --git a/tests/compiletests/Cargo.toml b/tests/compiletests/Cargo.toml new file mode 100644 index 00000000..f8470e58 --- /dev/null +++ b/tests/compiletests/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "compiletests" +version = "0.1.0" +edition = "2021" + +[[bin]] +name = "compiletests" +path = "src/main.rs" + +[dependencies] +compiletest_rs = "0.11" +clap = { version = "4.5", features = ["derive"] } +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["env-filter"] } +cuda_builder = { workspace = true } \ No newline at end of file diff --git a/tests/compiletests/README.md b/tests/compiletests/README.md new file mode 100644 index 00000000..f07f9db0 --- /dev/null +++ b/tests/compiletests/README.md @@ -0,0 +1,54 @@ +# Compiletests for Rust-CUDA + +This directory contains compile tests for the Rust-CUDA project using the `compiletest` framework. + +The code in these tests is not executed. Tests check that the compiler compiles +correctly. Tests in `dis/` verify correct PTX output. + +## Running Tests + +You can run the tests using the cargo alias: + +```bash +cargo compiletest +``` + +Or run directly from this directory: + +```bash +cargo run --release +``` + +### Options + +- `--bless` - Update expected output files +- `--target-arch=compute_61,compute_70,compute_90` - Test multiple CUDA compute capabilities (comma-separated) +- Filter by test name: `cargo compiletest simple` +- `RUST_LOG=info` - Enable progress logging +- `RUST_LOG=debug` - Enable detailed debug logging + +### Architecture-Specific Tests + +Tests can target specific architectures using stage IDs: + +```rust +// only-compute_70 - Only run on compute_70 +// only-compute_90 - Only run on compute_90 +// ignore-compute_61 - Skip on compute_61 +``` + +## Multi-Architecture Testing + +Test against multiple CUDA architectures: + +```bash +cargo compiletest -- --target-arch=compute_61,compute_70,compute_90 +``` + +Each test runs for all specified architectures. + +## Debugging + +- Use `RUST_LOG=debug` for detailed test execution +- Check generated PTX in `target/compiletest-results/` +- Filter specific tests: `cargo compiletest simple` diff --git a/tests/compiletests/deps-helper/Cargo.toml b/tests/compiletests/deps-helper/Cargo.toml new file mode 100644 index 00000000..b5167c77 --- /dev/null +++ b/tests/compiletests/deps-helper/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "compiletests-deps-helper" +description = "Shared dependencies of all the compiletest tests" +version = "0.1.0" +edition = "2024" +publish = false + +[lib] +crate-type = ["cdylib", "rlib"] + +[dependencies] +cuda_std = { workspace = true } diff --git a/tests/compiletests/deps-helper/src/lib.rs b/tests/compiletests/deps-helper/src/lib.rs new file mode 100644 index 00000000..d93d0978 --- /dev/null +++ b/tests/compiletests/deps-helper/src/lib.rs @@ -0,0 +1,6 @@ +#[cfg_attr(target_os = "cuda", panic_handler)] +#[allow(dead_code)] +fn panic(_: &core::panic::PanicInfo) -> ! { + #[allow(clippy::empty_loop)] + loop {} +} diff --git a/tests/compiletests/src/main.rs b/tests/compiletests/src/main.rs new file mode 100644 index 00000000..9fb2ed4f --- /dev/null +++ b/tests/compiletests/src/main.rs @@ -0,0 +1,621 @@ +use clap::Parser; +use std::env; +use std::io; +use std::path::{Path, PathBuf}; +use std::process::Command; + +#[derive(Parser)] +#[command(bin_name = "cargo compiletest")] +struct Opt { + /// Automatically update stderr/stdout files. + #[arg(long)] + bless: bool, + + /// The CUDA compute capability to target (e.g., compute_70, compute_80, compute_90). + /// Can specify multiple architectures comma-separated. + #[arg(long, default_value = "compute_70", value_delimiter = ',')] + target_arch: Vec, + + /// Only run tests that match these filters. + #[arg(name = "FILTER")] + filters: Vec, +} + +impl Opt { + pub fn architectures(&self) -> impl Iterator { + self.target_arch.iter().map(|s| s.as_str()) + } +} + +const CUDA_TARGET: &str = "nvptx64-nvidia-cuda"; + +#[derive(Copy, Clone)] +enum DepKind { + CudaLib, + ProcMacro, +} + +impl DepKind { + fn prefix_and_extension(self) -> (&'static str, &'static str) { + match self { + Self::CudaLib => ("lib", "rlib"), + Self::ProcMacro => (env::consts::DLL_PREFIX, env::consts::DLL_EXTENSION), + } + } + + fn target_dir_suffix(self, target: &str) -> String { + match self { + Self::CudaLib => format!("{target}/release/deps"), + Self::ProcMacro => "release/deps".into(), + } + } +} + +fn main() { + let opt = Opt::parse(); + + let tests_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")); + let workspace_root = tests_dir.parent().unwrap().parent().unwrap().to_path_buf(); + let original_target_dir = workspace_root.join("target"); + let deps_target_dir = original_target_dir.join("compiletest-deps"); + let compiletest_build_dir = original_target_dir.join("compiletest-results"); + + // Find the rustc_codegen_nvvm backend before changing directory + let codegen_backend_path = find_rustc_codegen_nvvm(&workspace_root); + + // HACK(eddyb) force `compiletest` to pass `ui/...` relative paths to `rustc`, + // which should always end up being the same regardless of the path that the + // Rust-CUDA repo is checked out at (among other things, this avoids hardcoded + // `compiletest` limits being hit by e.g. users with slightly longer paths). + std::env::set_current_dir(tests_dir).unwrap(); + let tests_dir = PathBuf::from(""); + + let runner = Runner { + opt, + tests_dir, + compiletest_build_dir, + deps_target_dir, + codegen_backend_path, + }; + + runner.run_mode("ui"); +} + +struct Runner { + opt: Opt, + tests_dir: PathBuf, + compiletest_build_dir: PathBuf, + deps_target_dir: PathBuf, + codegen_backend_path: PathBuf, +} + +impl Runner { + /// Runs the given `mode` on the directory that matches that name, using the + /// backend provided by `codegen_backend_path`. + #[allow(clippy::string_add)] + fn run_mode(&self, mode: &'static str) { + /// RUSTFLAGS passed to all test files. + fn test_rustc_flags( + codegen_backend_path: &Path, + deps: &TestDeps, + indirect_deps_dirs: &[&Path], + target_arch: &str, + ) -> String { + [ + &*rust_flags(codegen_backend_path, target_arch), + &*indirect_deps_dirs + .iter() + .map(|dir| format!("-L dependency={}", dir.display())) + .fold(String::new(), |a, b| b + " " + &a), + "--edition 2021", + &*format!("--extern noprelude:core={}", deps.core.display()), + &*format!( + "--extern noprelude:compiler_builtins={}", + deps.compiler_builtins.display() + ), + &*format!( + "--extern cuda_std_macros={}", + deps.cuda_std_macros.display() + ), + &*format!("--extern cuda_std={}", deps.cuda_std.display()), + "--crate-type cdylib", + "-Zunstable-options", + "-Zcrate-attr=no_std", + "-Zcrate-attr=feature(abi_ptx)", + ] + .join(" ") + } + + struct Variation { + name: &'static str, + extra_flags: &'static str, + } + const VARIATIONS: &[Variation] = &[Variation { + name: "default", + extra_flags: "", + }]; + + for (arch, variation) in self + .opt + .architectures() + .flat_map(|arch| VARIATIONS.iter().map(move |variation| (arch, variation))) + { + // HACK(eddyb) in order to allow *some* tests to have separate output + // in different testing variations (i.e. experimental features), while + // keeping *most* of the tests unchanged, we make use of "stage IDs", + // which offer `// only-S` and `// ignore-S` for any stage ID `S`. + let stage_id = if variation.name == "default" { + // Use the architecture name as the stage ID. + arch.to_string() + } else { + // Include the variation name in the stage ID. + format!("{}-{}", arch, variation.name) + }; + + println!("Testing arch: {stage_id}\n"); + + let libs = build_deps( + &self.deps_target_dir, + &self.codegen_backend_path, + CUDA_TARGET, + arch, + ); + let mut flags = test_rustc_flags( + &self.codegen_backend_path, + &libs, + &[ + &self + .deps_target_dir + .join(DepKind::CudaLib.target_dir_suffix(CUDA_TARGET)), + &self + .deps_target_dir + .join(DepKind::ProcMacro.target_dir_suffix(CUDA_TARGET)), + ], + arch, + ); + flags += variation.extra_flags; + + let config = compiletest_rs::Config { + stage_id, + target_rustcflags: Some(flags), + mode: mode.parse().expect("Invalid mode"), + target: CUDA_TARGET.to_string(), + src_base: self.tests_dir.join(mode), + build_base: self.compiletest_build_dir.clone(), + bless: self.opt.bless, + filters: self.opt.filters.clone(), + ..compiletest_rs::Config::default() + }; + // FIXME(eddyb) do we need this? shouldn't `compiletest` be independent? + config.clean_rmeta(); + + // Set up CUDA environment + setup_cuda_environment(); + + compiletest_rs::run_tests(&config); + } + } +} + +/// Runs the processes needed to build `cuda-std` & other deps. +fn build_deps( + deps_target_dir: &Path, + codegen_backend_path: &Path, + target: &str, + arch: &str, +) -> TestDeps { + // Build compiletests-deps-helper using the same approach as cuda_builder + let rustflags = vec![ + format!("-Zcodegen-backend={}", codegen_backend_path.display()), + "-Zcrate-attr=feature(register_tool)".into(), + "-Zcrate-attr=register_tool(nvvm_internal)".into(), + "-Zcrate-attr=no_std".into(), + "-Zcrate-attr=feature(abi_ptx)".into(), + "-Zsaturating_float_casts=false".into(), + "-Cembed-bitcode=no".into(), + "-Cdebuginfo=0".into(), + "-Coverflow-checks=off".into(), + "-Copt-level=3".into(), + "-Cpanic=abort".into(), + "-Cno-redzone=yes".into(), + format!("-Cllvm-args=-arch={} --override-libm", arch), + format!("-Ctarget-feature=+{}", arch), + ]; + + let cargo_encoded_rustflags = rustflags.join("\x1f"); + + std::process::Command::new("cargo") + .args([ + "build", + "--lib", + "-p", + "compiletests-deps-helper", + "--release", + "-Zbuild-std=core,alloc", + "-Zbuild-std-features=panic_immediate_abort", + &*format!("--target={target}"), + ]) + .arg("--target-dir") + .arg(deps_target_dir) + .env("CARGO_ENCODED_RUSTFLAGS", cargo_encoded_rustflags) + .env("CUDA_ARCH", "70") + .stderr(std::process::Stdio::inherit()) + .stdout(std::process::Stdio::inherit()) + .status() + .and_then(map_status_to_result) + .unwrap(); + + let compiler_builtins = find_lib( + deps_target_dir, + "compiler_builtins", + DepKind::CudaLib, + target, + ); + let core = find_lib(deps_target_dir, "core", DepKind::CudaLib, target); + let cuda_std = find_lib(deps_target_dir, "cuda_std", DepKind::CudaLib, target); + let cuda_std_macros = find_lib( + deps_target_dir, + "cuda_std_macros", + DepKind::ProcMacro, + target, + ); + + let all_libs = [&compiler_builtins, &core, &cuda_std, &cuda_std_macros]; + if all_libs.iter().any(|r| r.is_err()) { + // FIXME(eddyb) `missing_count` should always be `0` anyway. + // FIXME(eddyb) use `--message-format=json-render-diagnostics` to + // avoid caring about duplicates (or search within files at all). + let missing_count = all_libs + .iter() + .filter(|r| matches!(r, Err(FindLibError::Missing))) + .count(); + let duplicate_count = all_libs + .iter() + .filter(|r| matches!(r, Err(FindLibError::Duplicate))) + .count(); + eprintln!( + "warning: cleaning deps ({missing_count} missing libs, {duplicate_count} duplicated libs)" + ); + clean_deps(deps_target_dir); + build_deps(deps_target_dir, codegen_backend_path, target, arch) + } else { + TestDeps { + core: core.ok().unwrap(), + compiler_builtins: compiler_builtins.ok().unwrap(), + cuda_std: cuda_std.ok().unwrap(), + cuda_std_macros: cuda_std_macros.ok().unwrap(), + } + } +} + +fn clean_deps(deps_target_dir: &Path) { + std::process::Command::new("cargo") + .arg("clean") + .arg("--target-dir") + .arg(deps_target_dir) + .stderr(std::process::Stdio::inherit()) + .stdout(std::process::Stdio::inherit()) + .status() + .and_then(map_status_to_result) + .unwrap(); +} + +enum FindLibError { + Missing, + Duplicate, +} + +/// Attempt find the rlib that matches `base`, if multiple rlibs are found then +/// a clean build is required and `Err(FindLibError::Duplicate)` is returned. +fn find_lib( + deps_target_dir: &Path, + base: impl AsRef, + dep_kind: DepKind, + target: &str, +) -> Result { + let base = base.as_ref(); + let (expected_prefix, expected_extension) = dep_kind.prefix_and_extension(); + let expected_name = format!("{}{}", expected_prefix, base.display()); + + let dir = deps_target_dir.join(dep_kind.target_dir_suffix(target)); + + let matching_paths: Vec = std::fs::read_dir(dir) + .unwrap() + .map(|entry| entry.unwrap().path()) + .filter(|path| { + let name = { + let name = path.file_stem(); + if name.is_none() { + return false; + } + name.unwrap() + }; + + let name_matches = name.to_str().unwrap().starts_with(&expected_name) + && name.len() == expected_name.len() + 17 // we expect our name, '-', and then 16 hexadecimal digits + && ends_with_dash_hash(name.to_str().unwrap()); + let extension_matches = path + .extension() + .is_some_and(|ext| ext == expected_extension); + + name_matches && extension_matches + }) + .collect(); + + match matching_paths.len() { + 0 => Err(FindLibError::Missing), + 1 => Ok(matching_paths.into_iter().next().unwrap()), + _ => Err(FindLibError::Duplicate), + } +} + +/// Returns whether this string ends with a dash ('-'), followed by 16 lowercase hexadecimal characters +fn ends_with_dash_hash(s: &str) -> bool { + let n = s.len(); + if n < 17 { + return false; + } + let mut bytes = s.bytes().skip(n - 17); + if bytes.next() != Some(b'-') { + return false; + } + + bytes.all(|b| b.is_ascii_hexdigit()) +} + +/// Paths to all of the library artifacts of dependencies needed to compile tests. +struct TestDeps { + core: PathBuf, + compiler_builtins: PathBuf, + cuda_std: PathBuf, + cuda_std_macros: PathBuf, +} + +/// The RUSTFLAGS passed to all CUDA builds. +// FIXME(eddyb) expose most of these from `cuda-builder`. +fn rust_flags(codegen_backend_path: &Path, target_arch: &str) -> String { + [ + &*format!("-Zcodegen-backend={}", codegen_backend_path.display()), + // Ensure the codegen backend is emitted in `.d` files to force Cargo + // to rebuild crates compiled with it when it changes (this used to be + // the default until https://github.com/rust-lang/rust/pull/93969). + "-Zbinary-dep-depinfo", + "-Csymbol-mangling-version=v0", + "-Zcrate-attr=feature(register_tool)", + "-Zcrate-attr=register_tool(nvvm_internal)", + // HACK(eddyb) this is the same configuration that we test with, and + // ensures no unwanted surprises from e.g. `core` debug assertions. + "-Coverflow-checks=off", + "-Cdebug-assertions=off", + // HACK(eddyb) we need this for `core::fmt::rt::Argument::new_*` calls + // to *never* be inlined, so we can pattern-match the calls themselves. + "-Zinline-mir=off", + // HACK(eddyb) avoid ever reusing instantiations from `compiler_builtins` + // which is special-cased to turn calls to functions that never return, + // into aborts, and this applies to the panics of UB-checking helpers + // (https://github.com/rust-lang/rust/pull/122580#issuecomment-3033026194) + // but while upstream that only loses the panic message, for us it's even + // worse, as we lose the chance to remove otherwise-dead `fmt::Arguments`. + "-Zshare-generics=off", + // NOTE(eddyb) flags copied from `cuda-builder` are all above this line. + "-Cdebuginfo=2", + "-Cembed-bitcode=no", + &format!("-Ctarget-feature=+{target_arch}"), + "-Cpanic=abort", + "-Cno-redzone=yes", + &format!("-Cllvm-args=-arch={target_arch}"), + "-Cllvm-args=--override-libm", + ] + .join(" ") +} + +/// Convenience function to map process failure to results in Rust. +fn map_status_to_result(status: std::process::ExitStatus) -> io::Result<()> { + match status.success() { + true => Ok(()), + false => Err(io::Error::other(format!( + "process terminated with non-zero code: {}", + status.code().unwrap_or(0) + ))), + } +} + +// https://github.com/rust-lang/cargo/blob/1857880b5124580c4aeb4e8bc5f1198f491d61b1/src/cargo/util/paths.rs#L29-L52 +fn dylib_path_envvar() -> &'static str { + if cfg!(windows) { + "PATH" + } else if cfg!(target_os = "macos") { + "DYLD_FALLBACK_LIBRARY_PATH" + } else { + "LD_LIBRARY_PATH" + } +} + +fn dylib_path() -> Vec { + match env::var_os(dylib_path_envvar()) { + Some(var) => env::split_paths(&var).collect(), + None => Vec::new(), + } +} + +#[cfg(windows)] +fn setup_windows_dll_path(codegen_backend_path: &Path) { + fn add_to_dylib_path(dir: &Path) { + let lib_path_var = dylib_path_envvar(); + let existing_path = env::var(lib_path_var).unwrap_or_default(); + let separator = ";"; + + let dir_str = dir.to_string_lossy(); + // Check if the directory is already in the path + if !existing_path + .split(separator) + .any(|p| p == dir_str.as_ref()) + { + let new_path = if existing_path.is_empty() { + dir_str.to_string() + } else { + format!("{dir_str}{separator}{existing_path}") + }; + env::set_var(lib_path_var, new_path); + } + } + + // Add the directory containing the codegen backend + if let Some(dir) = codegen_backend_path.parent() { + add_to_dylib_path(dir); + } + + // Try to find LLVM directories and add them to PATH + // Look for llvm-config to find LLVM installation + let llvm_config_paths = vec![ + "llvm-config", + "llvm-config-7", + "llvm-config.exe", + "llvm-config-7.exe", + ]; + + for llvm_config in &llvm_config_paths { + if let Ok(output) = Command::new(llvm_config).arg("--bindir").output() { + if output.status.success() { + if let Ok(bindir) = String::from_utf8(output.stdout) { + let bindir = bindir.trim(); + let bindir_path = Path::new(bindir); + if bindir_path.exists() { + add_to_dylib_path(bindir_path); + // Also add the lib directory which might contain DLLs + if let Some(parent) = bindir_path.parent() { + let libdir = parent.join("lib"); + if libdir.exists() { + add_to_dylib_path(&libdir); + } + } + } + } + break; + } + } + } + + // Also check common LLVM installation directories on Windows + let common_llvm_paths = vec![ + "C:\\Program Files\\LLVM\\bin", + "C:\\Program Files (x86)\\LLVM\\bin", + "C:\\Tools\\LLVM\\bin", + "C:\\llvm\\bin", + ]; + + for path in &common_llvm_paths { + let path = Path::new(path); + if path.exists() { + add_to_dylib_path(path); + } + } +} + +fn find_rustc_codegen_nvvm(workspace_root: &Path) -> PathBuf { + let filename = format!( + "{}rustc_codegen_nvvm{}", + env::consts::DLL_PREFIX, + env::consts::DLL_SUFFIX + ); + + // First check if it's already built + let target_dir = workspace_root.join("target"); + let search_paths = vec![ + target_dir.join("debug").join(&filename), + target_dir.join("release").join(&filename), + ]; + + for path in &search_paths { + if path.is_file() { + // On Windows, ensure the directory containing the DLL is in PATH + // so that its dependencies can be found + #[cfg(windows)] + setup_windows_dll_path(&path); + + return path.clone(); + } + } + + // If not found, try to build it + println!("Building rustc_codegen_nvvm..."); + let status = Command::new("cargo") + .args(["build", "-p", "rustc_codegen_nvvm"]) + .current_dir(workspace_root) + .status() + .expect("Failed to execute cargo build"); + + if !status.success() { + panic!("Failed to build rustc_codegen_nvvm"); + } + + // Try to find it again after building + for path in &search_paths { + if path.is_file() { + // On Windows, ensure the directory containing the DLL is in PATH + #[cfg(windows)] + setup_windows_dll_path(&path); + + return path.clone(); + } + } + + // Last resort: check library path + for mut path in dylib_path() { + path.push(&filename); + if path.is_file() { + return path; + } + } + panic!("Could not find {filename} in library path or target directory"); +} + +fn setup_cuda_environment() { + // Set library path to include CUDA NVVM libraries + let lib_path_var = dylib_path_envvar(); + + // Try to find CUDA installation + let cuda_paths = vec![ + "/usr/local/cuda/nvvm/lib64", + "/usr/local/cuda-12/nvvm/lib64", + "/usr/local/cuda-11/nvvm/lib64", + "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.8\\nvvm\\lib\\x64", + "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.0\\nvvm\\lib\\x64", + "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.8\\nvvm\\lib\\x64", + ]; + + let mut found_cuda_paths = Vec::new(); + + // Check CUDA_PATH environment variable + if let Ok(cuda_path) = env::var("CUDA_PATH") { + let nvvm_path = Path::new(&cuda_path).join("nvvm").join("lib64"); + if nvvm_path.exists() { + found_cuda_paths.push(nvvm_path.to_string_lossy().to_string()); + } + let nvvm_path_win = Path::new(&cuda_path).join("nvvm").join("lib").join("x64"); + if nvvm_path_win.exists() { + found_cuda_paths.push(nvvm_path_win.to_string_lossy().to_string()); + } + } + + // Check standard paths + for path in &cuda_paths { + if Path::new(path).exists() { + found_cuda_paths.push(path.to_string()); + } + } + + if !found_cuda_paths.is_empty() { + let existing_path = env::var(lib_path_var).unwrap_or_default(); + let separator = if cfg!(windows) { ";" } else { ":" }; + + let new_paths = found_cuda_paths.join(separator); + let new_lib_path = if existing_path.is_empty() { + new_paths + } else { + format!("{new_paths}{separator}{existing_path}") + }; + + env::set_var(lib_path_var, new_lib_path); + } +} diff --git a/tests/compiletests/ui/core/ops/logical_and.rs b/tests/compiletests/ui/core/ops/logical_and.rs new file mode 100644 index 00000000..9fe0f052 --- /dev/null +++ b/tests/compiletests/ui/core/ops/logical_and.rs @@ -0,0 +1,13 @@ +// Test using `&&` operator. +// build-pass + +use cuda_std::kernel; + +fn f(x: bool, y: bool) -> bool { + x && y +} + +#[kernel] +pub unsafe fn main() { + f(false, true); +} diff --git a/tests/compiletests/ui/core/ops/logical_or.rs b/tests/compiletests/ui/core/ops/logical_or.rs new file mode 100644 index 00000000..f0515254 --- /dev/null +++ b/tests/compiletests/ui/core/ops/logical_or.rs @@ -0,0 +1,13 @@ +// Test using `||` operator. +// build-pass + +use cuda_std::kernel; + +fn f(x: bool, y: bool) -> bool { + x || y +} + +#[kernel] +pub unsafe fn main() { + f(false, true); +} diff --git a/tests/compiletests/ui/core/ops/range_contains.rs b/tests/compiletests/ui/core/ops/range_contains.rs new file mode 100644 index 00000000..f7f733b7 --- /dev/null +++ b/tests/compiletests/ui/core/ops/range_contains.rs @@ -0,0 +1,12 @@ +// build-pass + +use cuda_std::kernel; + +fn has_two_decimal_digits(x: u32) -> bool { + (10..100).contains(&x) +} + +#[kernel] +pub unsafe fn main(i: u32, o: *mut u32) { + *o = has_two_decimal_digits(i) as u32; +} diff --git a/tests/compiletests/ui/dis/simple_add.rs b/tests/compiletests/ui/dis/simple_add.rs new file mode 100644 index 00000000..15b6b179 --- /dev/null +++ b/tests/compiletests/ui/dis/simple_add.rs @@ -0,0 +1,12 @@ +// build-pass +// compile-flags: -Cllvm-args=--disassemble-entry=simple_add_kernel --error-format=human + +// This test verifies PTX generation for a simple kernel + +use cuda_std::kernel; + +#[kernel] +pub unsafe fn simple_add_kernel(a: *const f32, b: *const f32, c: *mut f32) { + let sum = *a + *b; + *c = sum; +} diff --git a/tests/compiletests/ui/dis/simple_add.stderr b/tests/compiletests/ui/dis/simple_add.stderr new file mode 100644 index 00000000..0386db78 --- /dev/null +++ b/tests/compiletests/ui/dis/simple_add.stderr @@ -0,0 +1,39 @@ +.visible .entry simple_add_kernel( + .param .u64 simple_add_kernel_param_0, + .param .u64 simple_add_kernel_param_1, + .param .u64 simple_add_kernel_param_2 +) +{ + .reg .f32 %f<4>; + .reg .b64 %rd<7>; + .loc 1 9 0 +$L__func_begin0: + .loc 1 9 0 + + + ld.param.u64 %rd1, [simple_add_kernel_param_0]; + ld.param.u64 %rd2, [simple_add_kernel_param_1]; + ld.param.u64 %rd3, [simple_add_kernel_param_2]; +$L__tmp0: + .loc 1 9 33 + cvta.to.global.u64 %rd4, %rd3; + cvta.to.global.u64 %rd5, %rd2; + cvta.to.global.u64 %rd6, %rd1; + .loc 1 10 15 + ld.global.f32 %f1, [%rd6]; + .loc 1 10 20 + ld.global.f32 %f2, [%rd5]; + .loc 1 10 15 + add.f32 %f3, %f1, %f2; +$L__tmp1: + .loc 1 11 5 + st.global.f32 [%rd4], %f3; +$L__tmp2: + .loc 1 12 2 + ret; +$L__tmp3: +$L__func_end0: + +} + + diff --git a/tests/compiletests/ui/dis/target_feature_arch_cc.rs b/tests/compiletests/ui/dis/target_feature_arch_cc.rs new file mode 100644 index 00000000..7e1cf88c --- /dev/null +++ b/tests/compiletests/ui/dis/target_feature_arch_cc.rs @@ -0,0 +1,55 @@ +// only-compute_120a +// build-fail +// compile-flags: -Cllvm-args=--disassemble-entry=test_arch_cc_120a --error-format=human + +// This test verifies feature inheritance for compute_120a (architecture capability) +// FIXME: This currently fails because NVVM doesn't support architecture suffixes like compute_120a +// This test is ignored until we use a later NVVM that supports architecture suffixes + +use cuda_std::kernel; + +#[kernel] +pub unsafe fn test_arch_cc_120a(result: *mut f32) { + let mut val = 0.0f32; + + // compute_120a should have compute_120 enabled (base of architecture) + #[cfg(target_feature = "compute_120")] + { + val += 120.0; + } + + // compute_120a should also have all lower capabilities enabled + #[cfg(target_feature = "compute_100")] + { + val += 100.0; + } + + #[cfg(target_feature = "compute_90")] + { + val += 90.0; + } + + #[cfg(target_feature = "compute_80")] + { + val += 80.0; + } + + #[cfg(target_feature = "compute_70")] + { + val += 70.0; + } + + // compute_120a should NOT have family features from lower versions + #[cfg(target_feature = "compute_100f")] + { + val += 100.5; + } + + #[cfg(target_feature = "compute_101f")] + { + val += 101.5; + } + + // Prevent DCE - expected value should be 460.0 (120 + 100 + 90 + 80 + 70) + core::ptr::write_volatile(result, val); +} diff --git a/tests/compiletests/ui/dis/target_feature_arch_cc.stderr b/tests/compiletests/ui/dis/target_feature_arch_cc.stderr new file mode 100644 index 00000000..8db06a35 --- /dev/null +++ b/tests/compiletests/ui/dis/target_feature_arch_cc.stderr @@ -0,0 +1 @@ +error: libnvvm returned an error that was not previously caught by the verifier: InvalidOption \ No newline at end of file diff --git a/tests/compiletests/ui/dis/target_feature_base_cc.rs b/tests/compiletests/ui/dis/target_feature_base_cc.rs new file mode 100644 index 00000000..7a30cd5e --- /dev/null +++ b/tests/compiletests/ui/dis/target_feature_base_cc.rs @@ -0,0 +1,39 @@ +// build-pass +// only-compute_70 +// compile-flags: -Cllvm-args=--disassemble-entry=test_base_cc_70 --error-format=human + +// This test verifies feature inheritance for compute_70 (base capability) + +use cuda_std::kernel; + +#[kernel] +pub unsafe fn test_base_cc_70(result: *mut f32) { + let mut val = 0.0f32; + + // arch=compute_70 should have target_feature=compute_70 enabled + #[cfg(target_feature = "compute_70")] + { + val += 70.0; + } + + // arch=compute_70 should also have target_feature=compute_60 enabled (lower capability) + #[cfg(target_feature = "compute_60")] + { + val += 60.0; + } + + // arch=compute_70 should NOT have target_feature=compute_80 enabled (higher capability) + #[cfg(target_feature = "compute_80")] + { + val += 80.0; + } + + // arch=compute_70 should NOT have target_feature=compute_90 enabled + #[cfg(target_feature = "compute_90")] + { + val += 90.0; + } + + // Prevent DCE - expected value should be 130.0 (70 + 60) + core::ptr::write_volatile(result, val); +} diff --git a/tests/compiletests/ui/dis/target_feature_base_cc.stderr b/tests/compiletests/ui/dis/target_feature_base_cc.stderr new file mode 100644 index 00000000..8d6ad01b --- /dev/null +++ b/tests/compiletests/ui/dis/target_feature_base_cc.stderr @@ -0,0 +1,31 @@ +.visible .entry test_base_cc_70( + .param .u64 test_base_cc_70_param_0 +) +{ + .reg .b32 %r<2>; + .reg .b64 %rd<3>; + .loc 1 10 0 +$L__func_begin0: + .loc 1 10 0 + + + ld.param.u64 %rd1, [test_base_cc_70_param_0]; +$L__tmp0: + .loc 1 10 31 + cvta.to.global.u64 %rd2, %rd1; +$L__tmp1: + .loc 1 38 5 + bra.uni $L__tmp2; +$L__tmp2: + .loc 2 2180 9 + mov.u32 %r1, 1124204544; + st.volatile.global.u32 [%rd2], %r1; +$L__tmp3: + .loc 1 39 2 + ret; +$L__tmp4: +$L__func_end0: + +} + + diff --git a/tests/compiletests/ui/dis/target_feature_family_cc.rs b/tests/compiletests/ui/dis/target_feature_family_cc.rs new file mode 100644 index 00000000..14745810 --- /dev/null +++ b/tests/compiletests/ui/dis/target_feature_family_cc.rs @@ -0,0 +1,57 @@ +// only-compute_101f +// build-fail +// compile-flags: -Cllvm-args=--disassemble-entry=test_family_cc_101f --error-format=human + +// This test verifies feature inheritance for compute_101f (family capability) +// FIXME: This currently fails because NVVM doesn't support family suffixes like compute_101f +// This test is ignored until we use a later NVVM that supports family suffixes + +use cuda_std::kernel; + +#[kernel] +pub unsafe fn test_family_cc_101f(result: *mut f32) { + let mut val = 0.0f32; + + // compute_101f should have compute_101 enabled + #[cfg(target_feature = "compute_101")] + { + val += 101.0; + } + + // compute_101f should have compute_100 enabled (lower family version) + #[cfg(target_feature = "compute_100")] + { + val += 100.0; + } + + // compute_101f should NOT have compute_100f enabled (same family, lower minor) + #[cfg(target_feature = "compute_100f")] + { + val += 100.5; + } + + // compute_101f should also have lower capabilities enabled + #[cfg(target_feature = "compute_90")] + { + val += 90.0; + } + + #[cfg(target_feature = "compute_80")] + { + val += 80.0; + } + + #[cfg(target_feature = "compute_70")] + { + val += 70.0; + } + + // compute_101f should NOT have architecture-specific features + #[cfg(target_feature = "compute_120a")] + { + val += 120.0; + } + + // Prevent DCE - expected value should be 441.0 (101 + 100 + 90 + 80 + 70) + core::ptr::write_volatile(result, val); +} diff --git a/tests/compiletests/ui/dis/target_feature_family_cc.stderr b/tests/compiletests/ui/dis/target_feature_family_cc.stderr new file mode 100644 index 00000000..8db06a35 --- /dev/null +++ b/tests/compiletests/ui/dis/target_feature_family_cc.stderr @@ -0,0 +1 @@ +error: libnvvm returned an error that was not previously caught by the verifier: InvalidOption \ No newline at end of file diff --git a/tests/compiletests/ui/glam/mat4_operations.rs b/tests/compiletests/ui/glam/mat4_operations.rs new file mode 100644 index 00000000..e54d2d10 --- /dev/null +++ b/tests/compiletests/ui/glam/mat4_operations.rs @@ -0,0 +1,51 @@ +// build-pass + +// This test verifies glam Mat4 operations work correctly in CUDA kernels + +use cuda_std::glam::{Mat4, Vec3, Vec4}; +use cuda_std::kernel; + +#[kernel] +pub unsafe fn mat4_transform_operations( + matrix: Mat4, + point: Vec3, + vector: Vec4, + result_point: *mut Vec3, + result_vector: *mut Vec4, + result_determinant: *mut f32, +) { + // Transform a 3D point (w=1 implied) + let transformed_point = matrix.transform_point3(point); + *result_point = transformed_point; + + // Transform a 4D vector + let transformed_vector = matrix * vector; + *result_vector = transformed_vector; + + // Calculate determinant + let det = matrix.determinant(); + *result_determinant = det; +} + +#[kernel] +pub unsafe fn mat4_construction( + translation: Vec3, + scale: Vec3, + angle_radians: f32, + axis: Vec3, + result_translation: *mut Mat4, + result_scale: *mut Mat4, + result_rotation: *mut Mat4, +) { + // Create translation matrix + let trans_mat = Mat4::from_translation(translation); + *result_translation = trans_mat; + + // Create scale matrix + let scale_mat = Mat4::from_scale(scale); + *result_scale = scale_mat; + + // Create rotation matrix + let rot_mat = Mat4::from_axis_angle(axis, angle_radians); + *result_rotation = rot_mat; +} diff --git a/tests/compiletests/ui/glam/vec3_operations.rs b/tests/compiletests/ui/glam/vec3_operations.rs new file mode 100644 index 00000000..9ab43513 --- /dev/null +++ b/tests/compiletests/ui/glam/vec3_operations.rs @@ -0,0 +1,42 @@ +// build-pass + +// This test verifies glam Vec3 operations work correctly in CUDA kernels + +use cuda_std::glam::Vec3; +use cuda_std::kernel; + +#[kernel] +pub unsafe fn vec3_basic_ops( + a: Vec3, + b: Vec3, + result_add: *mut Vec3, + result_dot: *mut f32, + result_cross: *mut Vec3, +) { + // Vector addition + let sum = a + b; + *result_add = sum; + + // Dot product + let dot = a.dot(b); + *result_dot = dot; + + // Cross product + let cross = a.cross(b); + *result_cross = cross; +} + +#[kernel] +pub unsafe fn vec3_normalization( + input: Vec3, + result_normalized: *mut Vec3, + result_length: *mut f32, +) { + // Get length + let len = input.length(); + *result_length = len; + + // Normalize + let normalized = input.normalize(); + *result_normalized = normalized; +} diff --git a/tests/compiletests/ui/hello_world.rs b/tests/compiletests/ui/hello_world.rs new file mode 100644 index 00000000..82174c88 --- /dev/null +++ b/tests/compiletests/ui/hello_world.rs @@ -0,0 +1,8 @@ +// build-pass + +use cuda_std::kernel; + +#[kernel] +pub unsafe fn add_one(x: *mut f32) { + *x = *x + 1.0; +} diff --git a/tests/compiletests/ui/lang/compile_fail.rs b/tests/compiletests/ui/lang/compile_fail.rs new file mode 100644 index 00000000..eb0d1d36 --- /dev/null +++ b/tests/compiletests/ui/lang/compile_fail.rs @@ -0,0 +1,6 @@ +use cuda_std::kernel; + +#[kernel] +pub unsafe fn bad_kernel() { + let _s = std::fs::File::create("foo.txt"); +} diff --git a/tests/compiletests/ui/lang/compile_fail.stderr b/tests/compiletests/ui/lang/compile_fail.stderr new file mode 100644 index 00000000..16555a74 --- /dev/null +++ b/tests/compiletests/ui/lang/compile_fail.stderr @@ -0,0 +1,11 @@ +error[E0433]: failed to resolve: use of unresolved module or unlinked crate `std` + --> $DIR/compile_fail.rs:5:14 + | +5 | let _s = std::fs::File::create("foo.txt"); + | ^^^ use of unresolved module or unlinked crate `std` + | + = help: you might be missing a crate named `std` + +error: aborting due to 1 previous error + +For more information about this error, try `rustc --explain E0433`. diff --git a/tests/compiletests/ui/thread/thread_functions.rs b/tests/compiletests/ui/thread/thread_functions.rs new file mode 100644 index 00000000..5f732a4b --- /dev/null +++ b/tests/compiletests/ui/thread/thread_functions.rs @@ -0,0 +1,17 @@ +// build-pass + +// This test verifies CUDA thread functions are available and working + +use cuda_std::kernel; +use cuda_std::thread; + +#[kernel] +pub unsafe fn thread_functions_test() { + // Thread identification functions + let _tid = thread::thread_idx_x(); + let _bid = thread::block_idx_x(); + let _bdim = thread::block_dim_x(); + + // Synchronization function + thread::sync_threads(); +}