diff --git a/.github/workflows/ci_linux.yml b/.github/workflows/ci_linux.yml index 2e548ec9..1e74d754 100644 --- a/.github/workflows/ci_linux.yml +++ b/.github/workflows/ci_linux.yml @@ -163,7 +163,6 @@ jobs: --exclude cust ' - # Exclude cust_raw because it triggers hundreds of warnings. - name: Check documentation run: | docker exec "$CONTAINER_NAME" bash -lc 'set -euo pipefail diff --git a/.github/workflows/ci_windows.yml b/.github/workflows/ci_windows.yml index 63082087..7923fb66 100644 --- a/.github/workflows/ci_windows.yml +++ b/.github/workflows/ci_windows.yml @@ -106,7 +106,6 @@ jobs: run: cargo test --workspace --exclude blastoff --exclude cudnn --exclude cudnn-sys --exclude cust # Exclude crates that require cuDNN, not available on Windows CI: cudnn, cudnn-sys. - # Exclude cust_raw because it triggers hundreds of warnings. - name: Check documentation env: RUSTDOCFLAGS: -Dwarnings diff --git a/.github/workflows/container_images.yml b/.github/workflows/container_images.yml index 7485812e..4fe650bd 100644 --- a/.github/workflows/container_images.yml +++ b/.github/workflows/container_images.yml @@ -33,9 +33,15 @@ jobs: - name: Ubuntu-24.04/CUDA-12.8.1 image: "rust-gpu/rust-cuda-ubuntu24-cuda12" dockerfile: ./container/ubuntu24-cuda12/Dockerfile + - name: Ubuntu-24.04/CUDA-13.0.2 + image: "rust-gpu/rust-cuda-ubuntu24-cuda13" + dockerfile: ./container/ubuntu24-cuda13/Dockerfile - name: RockyLinux-9/CUDA-12.8.1 image: "rust-gpu/rust-cuda-rockylinux9-cuda12" dockerfile: ./container/rockylinux9-cuda12/Dockerfile + - name: RockyLinux-9/CUDA-13.0.2 + image: "rust-gpu/rust-cuda-rockylinux9-cuda13" + dockerfile: ./container/rockylinux9-cuda13/Dockerfile steps: - name: Free up space # Without this the job will likely run out of disk space. @@ -153,8 +159,12 @@ jobs: variance: - name: Ubuntu-24.04/CUDA-12.8.1 image: "rust-gpu/rust-cuda-ubuntu24-cuda12" + - name: Ubuntu-24.04/CUDA-13.0.2 + image: "rust-gpu/rust-cuda-ubuntu24-cuda13" - name: RockyLinux-9/CUDA-12.8.1 image: "rust-gpu/rust-cuda-rockylinux9-cuda12" + - name: RockyLinux-9/CUDA-13.0.2 + image: "rust-gpu/rust-cuda-rockylinux9-cuda13" steps: - name: Set artifact name run: | diff --git a/README.md b/README.md index c069a024..1e410303 100644 --- a/README.md +++ b/README.md @@ -2,116 +2,36 @@

The Rust CUDA Project

- An ecosystem of libraries and tools for writing and executing extremely fast GPU code fully in - Rust + An ecosystem of libraries and tools for writing and executing extremely fast GPU code + fully in Rust

- -

- Guide - | - Getting Started - | - Features -

-⚠️ The project is still in early development, expect bugs, safety issues, and things that don't work ⚠️ -
- > [!IMPORTANT] > This project is no longer dormant and is [being > rebooted](https://rust-gpu.github.io/blog/2025/01/27/rust-cuda-reboot). Read the [latest status update](https://rust-gpu.github.io/blog/2025/08/11/rust-cuda-update). > Please contribute! +> +> The project is still in early development, however. Expect bugs, safety issues, and things that +> don't work. -## Goal - -The Rust CUDA Project is a project aimed at making Rust a tier-1 language for extremely fast GPU computing -using the CUDA Toolkit. It provides tools for compiling Rust to extremely fast PTX code as well as libraries -for using existing CUDA libraries with it. - -## Background - -Historically, general purpose high performance GPU computing has been done using the CUDA toolkit. The CUDA toolkit primarily -provides a way to use Fortran/C/C++ code for GPU computing in tandem with CPU code with a single source. It also provides -many libraries, tools, forums, and documentation to supplement the single-source CPU/GPU code. - -CUDA is exclusively an NVIDIA-only toolkit. Many tools have been proposed for cross-platform GPU computing such as -OpenCL, Vulkan Computing, and HIP. However, CUDA remains the most used toolkit for such tasks by far. This is why it is -imperative to make Rust a viable option for use with the CUDA toolkit. - -However, CUDA with Rust has been a historically very rocky road. The only viable option until now has been to use the LLVM PTX -backend, however, the LLVM PTX backend does not always work and would generate invalid PTX for many common Rust operations, and -in recent years it has been shown time and time again that a specialized solution is needed for Rust on the GPU with the advent -of projects such as rust-gpu (for Rust -> SPIR-V). - -Our hope is that with this project we can push the Rust GPU computing industry forward and make Rust an excellent language -for such tasks. Rust offers plenty of benefits such as `__restrict__` performance benefits for every kernel, An excellent module/crate system, -delimiting of unsafe areas of CPU/GPU code with `unsafe`, high level wrappers to low level CUDA libraries, etc. - -## Structure - -The scope of the Rust CUDA Project is quite broad, it spans the entirety of the CUDA ecosystem, with libraries and tools to make it -usable using Rust. Therefore, the project contains many crates for all corners of the CUDA ecosystem. - -The current line-up of libraries is the following: - -- `rustc_codegen_nvvm` Which is a rustc backend that targets NVVM IR (a subset of LLVM IR) for the [libnvvm](https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html) library. - - Generates highly optimized PTX code which can be loaded by the CUDA Driver API to execute on the GPU. - - For the near future it will be CUDA-only, but it may be used to target amdgpu in the future. -- `cuda_std` for GPU-side functions and utilities, such as thread index queries, memory allocation, warp intrinsics, etc. - - _Not_ a low level library, provides many utility functions to make it easier to write cleaner and more reliable GPU kernels. - - Closely tied to `rustc_codegen_nvvm` which exposes GPU features through it internally. -- [`cudnn`](https://github.com/Rust-GPU/rust-cuda/tree/master/crates/cudnn) for a collection of GPU-accelerated primitives for deep neural networks. -- `cust` for CPU-side CUDA features such as launching GPU kernels, GPU memory allocation, device queries, etc. - - High level with features such as RAII and Rust Results that make it easier and cleaner to manage the interface to the GPU. - - A high level wrapper for the CUDA Driver API, the lower level version of the more common CUDA Runtime API used from C++. - - Provides much more fine grained control over things like kernel concurrency and module loading than the C++ Runtime API. -- `gpu_rand` for GPU-friendly random number generation, currently only implements xoroshiro RNGs from `rand_xoshiro`. -- `optix` for CPU-side hardware raytracing and denoising using the CUDA OptiX library. - -In addition to many "glue" crates for things such as high level wrappers for certain smaller CUDA libraries. - -## Related Projects - -Other projects related to using Rust on the GPU: - -- 2016: [glassful](https://github.com/kmcallister/glassful) Subset of Rust that compiles to GLSL. -- 2017: [inspirv-rust](https://github.com/msiglreith/inspirv-rust) Experimental Rust MIR -> SPIR-V Compiler. -- 2018: [nvptx](https://github.com/japaric-archived/nvptx) Rust to PTX compiler using the `nvptx` target for rustc (using the LLVM PTX backend). -- 2020: [accel](https://github.com/termoshtt/accel) Higher-level library that relied on the same mechanism that `nvptx` does. -- 2020: [rlsl](https://github.com/MaikKlein/rlsl) Experimental Rust -> SPIR-V compiler (predecessor to rust-gpu) -- 2020: [rust-gpu](https://github.com/Rust-GPU/rust-gpu) `rustc` compiler backend to compile Rust to SPIR-V for use in shaders, similar mechanism as our project. - -## Usage -```bash -## setup your environment like: -### export OPTIX_ROOT=/opt/NVIDIA-OptiX-SDK-9.0.0-linux64-x86_64 -### export OPTIX_ROOT_DIR=/opt/NVIDIA-OptiX-SDK-9.0.0-linux64-x86_64 - -## build proj -cargo build -``` - -## Use Rust CUDA in Container Environments - -The distribution related Dockerfile are located in `container` folder. -Taking ubuntu 24.04 as an example, run the following command in repository root: -```bash -docker build -f ./container/ubuntu24-cuda12/Dockerfile -t rust-cuda-ubuntu24 . -docker run --rm --runtime=nvidia --gpus all -it rust-cuda-ubuntu24 -``` +## Documentation -A sample `.devcontainer.json` file is also included, configured for Ubuntu 24.02. Copy this to `.devcontainer/devcontainer.json` to make additional customizations. +Please see [The Rust CUDA Guide](https://rust-gpu.github.io/rust-cuda/) for documentation on Rust +CUDA. ## License Licensed under either of -- Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) +- Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) - MIT license ([LICENSE-MIT](LICENSE-MIT) or http://opensource.org/licenses/MIT) at your discretion. ### Contribution -Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any additional terms or conditions. +Unless you explicitly state otherwise, any contribution intentionally submitted for inclusion in +the work by you, as defined in the Apache-2.0 license, shall be dual licensed as above, without any +additional terms or conditions. diff --git a/container/rockylinux9-cuda13/Dockerfile b/container/rockylinux9-cuda13/Dockerfile new file mode 100644 index 00000000..dc428186 --- /dev/null +++ b/container/rockylinux9-cuda13/Dockerfile @@ -0,0 +1,92 @@ +FROM nvcr.io/nvidia/cuda:13.0.2-cudnn-devel-rockylinux9 AS llvm-builder + +RUN dnf -y install \ + --nobest \ + --allowerasing \ + --setopt=install_weak_deps=False \ + openssl-devel \ + pkgconfig \ + which \ + xz \ + zlib-devel \ + libffi-devel \ + ncurses-devel \ + libxml2-devel \ + libedit-devel \ + python3 \ + make \ + cmake && \ + dnf clean all + +WORKDIR /data/llvm7 + +# Download and build LLVM 7.1.0 for all architectures. +RUN curl -sSf -L -O https://github.com/llvm/llvm-project/releases/download/llvmorg-7.1.0/llvm-7.1.0.src.tar.xz && \ + tar -xf llvm-7.1.0.src.tar.xz && \ + cd llvm-7.1.0.src && \ + mkdir build && cd build && \ + ARCH=$(uname -m) && \ + if [ "$ARCH" = "x86_64" ]; then \ + TARGETS="X86;NVPTX"; \ + else \ + TARGETS="AArch64;NVPTX"; \ + fi && \ + cmake \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_TARGETS_TO_BUILD="$TARGETS" \ + -DLLVM_BUILD_LLVM_DYLIB=ON \ + -DLLVM_LINK_LLVM_DYLIB=ON \ + -DLLVM_ENABLE_ASSERTIONS=OFF \ + -DLLVM_ENABLE_BINDINGS=OFF \ + -DLLVM_INCLUDE_EXAMPLES=OFF \ + -DLLVM_INCLUDE_TESTS=OFF \ + -DLLVM_INCLUDE_BENCHMARKS=OFF \ + -DLLVM_ENABLE_ZLIB=ON \ + -DLLVM_ENABLE_TERMINFO=ON \ + -DCMAKE_INSTALL_PREFIX=/opt/llvm-7 \ + .. && \ + make -j$(nproc) && \ + make install && \ + cd ../.. && \ + rm -rf llvm-7.1.0.src* && \ + dnf clean all + +FROM nvcr.io/nvidia/cuda:13.0.2-cudnn-devel-rockylinux9 + +RUN dnf -y install \ + --nobest \ + --allowerasing \ + --setopt=install_weak_deps=False \ + clang \ + openssl-devel \ + fontconfig-devel \ + libX11-devel \ + libXcursor-devel \ + libXi-devel \ + libXrandr-devel \ + libxml2-devel \ + ncurses-devel \ + pkgconfig \ + which \ + xz \ + zlib-devel \ + cmake && \ + dnf clean all + +COPY --from=llvm-builder /opt/llvm-7 /opt/llvm-7 +RUN ln -s /opt/llvm-7/bin/llvm-config /usr/bin/llvm-config && \ + ln -s /opt/llvm-7/bin/llvm-config /usr/bin/llvm-config-7 + +# Get Rust (install rustup; toolchain installed from rust-toolchain.toml below) +RUN curl -sSf -L https://sh.rustup.rs | bash -s -- -y --profile minimal --default-toolchain none +ENV PATH="/root/.cargo/bin:${PATH}" + +# Setup the workspace +WORKDIR /data/rust-cuda +RUN --mount=type=bind,source=rust-toolchain.toml,target=/data/rust-cuda/rust-toolchain.toml \ + rustup show + +# Add nvvm to LD_LIBRARY_PATH. +ENV LD_LIBRARY_PATH="/usr/local/cuda/nvvm/lib64:${LD_LIBRARY_PATH}" +ENV LLVM_LINK_STATIC=1 +ENV RUST_LOG=info diff --git a/container/ubuntu24-cuda13/Dockerfile b/container/ubuntu24-cuda13/Dockerfile new file mode 100644 index 00000000..be2a2f73 --- /dev/null +++ b/container/ubuntu24-cuda13/Dockerfile @@ -0,0 +1,89 @@ +FROM nvcr.io/nvidia/cuda:13.0.2-cudnn-devel-ubuntu24.04 AS llvm-builder + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install \ + build-essential \ + clang \ + curl \ + libffi-dev \ + libedit-dev \ + libncurses5-dev \ + libssl-dev \ + libtinfo-dev \ + libxml2-dev \ + cmake \ + ninja-build \ + pkg-config \ + python3 \ + xz-utils \ + zlib1g-dev && \ + rm -rf /var/lib/apt/lists/* + +WORKDIR /data/llvm7 + +# Download and build LLVM 7.1.0 for all architectures. +RUN curl -sSf -L -O https://github.com/llvm/llvm-project/releases/download/llvmorg-7.1.0/llvm-7.1.0.src.tar.xz && \ + tar -xf llvm-7.1.0.src.tar.xz && \ + cd llvm-7.1.0.src && \ + mkdir build && cd build && \ + ARCH=$(dpkg --print-architecture) && \ + if [ "$ARCH" = "amd64" ]; then \ + TARGETS="X86;NVPTX"; \ + else \ + TARGETS="AArch64;NVPTX"; \ + fi && \ + cmake -G Ninja \ + -DCMAKE_BUILD_TYPE=Release \ + -DLLVM_TARGETS_TO_BUILD="$TARGETS" \ + -DLLVM_BUILD_LLVM_DYLIB=ON \ + -DLLVM_LINK_LLVM_DYLIB=ON \ + -DLLVM_ENABLE_ASSERTIONS=OFF \ + -DLLVM_ENABLE_BINDINGS=OFF \ + -DLLVM_INCLUDE_EXAMPLES=OFF \ + -DLLVM_INCLUDE_TESTS=OFF \ + -DLLVM_INCLUDE_BENCHMARKS=OFF \ + -DLLVM_ENABLE_ZLIB=ON \ + -DLLVM_ENABLE_TERMINFO=ON \ + -DCMAKE_INSTALL_PREFIX=/opt/llvm-7 \ + .. && \ + ninja -j$(nproc) && \ + ninja install && \ + cd ../.. && \ + rm -rf llvm-7.1.0.src* + +FROM nvcr.io/nvidia/cuda:13.0.2-cudnn-devel-ubuntu24.04 + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install \ + build-essential \ + clang \ + curl \ + libssl-dev \ + libtinfo-dev \ + pkg-config \ + xz-utils \ + zlib1g-dev \ + cmake \ + libfontconfig-dev \ + libx11-xcb-dev \ + libxcursor-dev \ + libxi-dev \ + libxinerama-dev \ + libxrandr-dev && \ + rm -rf /var/lib/apt/lists/* + +COPY --from=llvm-builder /opt/llvm-7 /opt/llvm-7 +RUN ln -s /opt/llvm-7/bin/llvm-config /usr/bin/llvm-config && \ + ln -s /opt/llvm-7/bin/llvm-config /usr/bin/llvm-config-7 + +# Get Rust (install rustup; toolchain installed from rust-toolchain.toml below) +RUN curl -sSf -L https://sh.rustup.rs | bash -s -- -y --profile minimal --default-toolchain none +ENV PATH="/root/.cargo/bin:${PATH}" + +# Setup the workspace +WORKDIR /data/rust-cuda +RUN --mount=type=bind,source=rust-toolchain.toml,target=/data/rust-cuda/rust-toolchain.toml \ + rustup show + +# Add nvvm to LD_LIBRARY_PATH. +ENV LD_LIBRARY_PATH="/usr/local/cuda/nvvm/lib64:${LD_LIBRARY_PATH}" +ENV LLVM_LINK_STATIC=1 +ENV RUST_LOG=info diff --git a/crates/nvvm/src/lib.rs b/crates/nvvm/src/lib.rs index cb19c6e7..c6706ea0 100644 --- a/crates/nvvm/src/lib.rs +++ b/crates/nvvm/src/lib.rs @@ -312,8 +312,10 @@ pub enum NvvmArch { /// This default value of 7.5 corresponds to Turing and later devices. We default to this /// because it is the minimum supported by CUDA 13.0 while being in the middle of the range /// supported by CUDA 12.x. - // WARNING: If you change the default, consider updating the `--target-arch` values used for - // compiletests in `ci_linux.yml` and `.github/workflows/ci_{linux,windows}.yml`. + // WARNING: If you change the default, consider updating: + // - The `--target-arch` values used for compiletests in `ci_linux.yml` and + // `.github/workflows/ci_{linux,windows}.yml`. + // - The CUDA versions used in `setup_cuda_environment` in `compiletests`. #[default] Compute75, Compute80, diff --git a/guide/src/README.md b/guide/src/README.md deleted file mode 100644 index c2bce261..00000000 --- a/guide/src/README.md +++ /dev/null @@ -1,3 +0,0 @@ -# Introduction - -Welcome to the Rust CUDA guide! Let's dive right in. diff --git a/guide/src/SUMMARY.md b/guide/src/SUMMARY.md index 21cd8d26..4c5fe3d8 100644 --- a/guide/src/SUMMARY.md +++ b/guide/src/SUMMARY.md @@ -1,8 +1,6 @@ # Summary -- [Introduction](README.md) -- [Supported Features](features.md) -- [Frequently Asked Questions](faq.md) +- [Introduction](introduction.md) - [Guide](guide/README.md) - [Getting Started](guide/getting_started.md) - [Compute Capability Gating](guide/compute_capabilities.md) @@ -18,3 +16,9 @@ - [Types](nvvm/types.md) - [PTX Generation](nvvm/ptxgen.md) - [Debugging](nvvm/debugging.md) + +---- + +[Supported Features](features.md) +[Frequently Asked Questions](faq.md) + diff --git a/guide/assets/nsight.png b/guide/src/assets/nsight.png similarity index 100% rename from guide/assets/nsight.png rename to guide/src/assets/nsight.png diff --git a/guide/assets/streams.svg b/guide/src/assets/streams.svg similarity index 100% rename from guide/assets/streams.svg rename to guide/src/assets/streams.svg diff --git a/guide/src/guide/getting_started.md b/guide/src/guide/getting_started.md index 07b872d5..762e17cb 100644 --- a/guide/src/guide/getting_started.md +++ b/guide/src/guide/getting_started.md @@ -276,12 +276,24 @@ There are two ways to build and run this example: natively, and with docker. ### Native -If you have all the required libraries installed, try building with `cargo build`. If you get an -error "libnvvm.so.4: cannot open shared object file", you will need to adjust `LD_LIBRARY_PATH`, -something like this: +If you have all the required libraries installed, try building with `cargo build`. + +If you get an error "libnvvm.so.4: cannot open shared object file", you will need to adjust +`LD_LIBRARY_PATH`, something like this: ``` export LD_LIBRARY_PATH="/usr/local/cuda/nvvm/lib64:${LD_LIBRARY_PATH}" ``` + +If you get an error "error: couldn't load codegen backend" on Windows, you will need to adjust +`PATH`, something like this with CUDA 12: +``` +$env:PATH += ";C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.9\nvvm\bin" +``` +or this with CUDA 13: +``` +$env:PATH += ";C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.9\nvvm\bin\x64" +``` + You should then be able to `cargo run`, and see the expected output: ``` c = [3.0, 5.0, 7.0, 9.0] @@ -328,6 +340,9 @@ is recognized. `make`ing and running the [`deviceQuery`] sample. If all is well it will print various details about your GPU. +A sample `.devcontainer.json` file is also included, configured for Ubuntu 24.04. Copy this to +`.devcontainer/devcontainer.json` to make additional customizations. + [`deviceQuery`]: https://github.com/NVIDIA/cuda-samples/tree/ba04faaf7328dbcc87bfc9acaf17f951ee5ddcf3/Samples/deviceQuery ## More examples diff --git a/guide/src/guide/kernel_abi.md b/guide/src/guide/kernel_abi.md index 5330d207..1ab5cd04 100644 --- a/guide/src/guide/kernel_abi.md +++ b/guide/src/guide/kernel_abi.md @@ -52,15 +52,15 @@ by reference (by allocating a device box): ```rs let foo = Foo { - a: 5, - b: 6, - c: 7 + a: 5, + b: 6, + c: 7 }; unsafe { - launch!( - module.kernel<<<1, 1, 0, stream>>>(foo) - )?; + launch!( + module.kernel<<<1, 1, 0, stream>>>(foo) + )?; } ``` @@ -68,15 +68,15 @@ And not ```rs let foo = DeviceBox::new(Foo { - a: 5, - b: 6, - c: 7 + a: 5, + b: 6, + c: 7 }); unsafe { - launch!( - module.kernel<<<1, 1, 0, stream>>>(foo.as_device_ptr()) - )?; + launch!( + module.kernel<<<1, 1, 0, stream>>>(foo.as_device_ptr()) + )?; } ``` diff --git a/guide/src/introduction.md b/guide/src/introduction.md new file mode 100644 index 00000000..8fc1eeae --- /dev/null +++ b/guide/src/introduction.md @@ -0,0 +1,81 @@ +# Introduction + +Welcome to the Rust CUDA Guide! + +## Goal + +The Rust CUDA Project is a project aimed at making Rust a tier-1 language for GPU computing using +the CUDA Toolkit. It provides tools for compiling Rust to fast PTX code as well as libraries for +using existing CUDA libraries with it. + +## Background + +Historically, general-purpose high-performance GPU computing has been done using the CUDA toolkit. +The CUDA toolkit primarily provides a way to use Fortran/C/C++ code for GPU computing in tandem +with CPU code with a single source. It also provides many libraries, tools, forums, and +documentation to supplement the single-source CPU/GPU code. + +CUDA is exclusively an NVIDIA-only toolkit. Many tools have been proposed for cross-platform GPU +computing such as OpenCL, Vulkan Computing, and HIP. However, CUDA remains the most used toolkit +for such tasks by far. This is why it is imperative to make Rust a viable option for use with the +CUDA toolkit. + +However, CUDA with Rust has been a historically very rocky road. The only viable option until now +has been to use the LLVM PTX backend. However, the LLVM PTX backend does not always work and would +generate invalid PTX for many common Rust operations. In recent years it has been shown time and +time again that a specialized solution is needed for Rust on the GPU with the advent of projects +such as rust-gpu (for translating Rust to SPIR-V). + +Our hope is that with this project we can push the Rust on GPUs forward and make Rust an excellent +language for such tasks. Rust offers plenty of benefits such as `__restrict__` performance benefits +for every kernel, an excellent module/crate system, delimiting of unsafe areas of CPU/GPU code with +`unsafe`, high-level wrappers to low-level CUDA libraries, etc. + +## Structure + +The scope of the Rust CUDA Project is broad, spanning the entirety of the CUDA ecosystem, with +libraries and tools to make it usable using Rust. Therefore, the project contains many crates for +all corners of the CUDA ecosystem. + +- `rustc_codegen_nvvm` is a rustc backend that targets NVVM IR (a subset of LLVM IR) for the + [libnvvm](https://docs.nvidia.com/cuda/nvvm-ir-spec/index.html) library. + - Generates highly optimized PTX code which can be loaded by the CUDA Driver API to execute on + the GPU. + - For now it is CUDA-only, but it may be used to target AMD GPUs in the future. +- `cuda_std` contains GPU-side functions and utilities, such as thread index queries, memory + allocation, warp intrinsics, etc. + - It is _not_ a low level library. It provides many utility functions to make it easier to write + cleaner and more reliable GPU kernels. + - It is Closely tied to `rustc_codegen_nvvm` which exposes GPU features through it internally. +- `cust` contains CPU-side CUDA features such as launching GPU kernels, GPU memory allocation, + device queries, etc. + - It is a high-level wrapper for the CUDA Driver API, the lower level alternative to the more + common CUDA Runtime API used from C++. It provides more fine-grained control over things like + kernel concurrency and module loading than the Runtime API. + - High-level Rust features such as RAII and `Result` make it easier and cleaner to manage + the interface to the GPU. +- `cudnn` is a collection of GPU-accelerated primitives for deep neural networks. +- `gpu_rand` does GPU-friendly random number generation. It currently only implements xoroshiro + RNGs from `rand_xoshiro`. +- `optix` provides CPU-side hardware raytracing and denoising using the CUDA OptiX library. + (This library is currently commented out because the OptiX SDK is difficult to install.) + +There are also several "glue" crates for things such as high level wrappers for certain smaller +CUDA libraries. + +## Related Projects + +Other projects related to using Rust on the GPU: + +- 2016: [glassful](https://github.com/kmcallister/glassful) translates a subset of Rust to GLSL. +- 2017: [inspirv-rust](https://github.com/msiglreith/inspirv-rust) is an experimental + Rust-MIR-to-SPIR-V compiler. +- 2018: [nvptx](https://github.com/japaric-archived/nvptx) is a Rust-to-PTX compiler using the + `nvptx` target for rustc (using the LLVM PTX backend). +- 2020: [accel](https://github.com/termoshtt/accel) is a higher-level library that relied on the + same mechanism that `nvptx` does. +- 2020: [rlsl](https://github.com/MaikKlein/rlsl) is an experimental Rust-to-SPIR-V compiler + (and a predecessor to rust-gpu). +- 2020: [rust-gpu](https://github.com/Rust-GPU/rust-gpu) is a `rustc` compiler backend to compile + Rust to SPIR-V for use in shaders. Like Rust CUDA, it is part of the broader [Rust + GPU](https://rust-gpu.github.io/) project. diff --git a/guide/src/nvvm/debugging.md b/guide/src/nvvm/debugging.md index 6c0491a9..23b3578d 100644 --- a/guide/src/nvvm/debugging.md +++ b/guide/src/nvvm/debugging.md @@ -33,8 +33,10 @@ which I will add to the project soon. ## Miscompilations Miscompilations are rare but annoying. They usually result in one of two things happening: -- CUDA rejecting the PTX as a whole (throwing an InvalidPtx error). This is rare but the most common cause is declaring invalid -extern functions (just grep for `extern` in the PTX file and check if it's odd functions that aren't CUDA syscalls like vprintf, malloc, free, etc). +- CUDA rejecting the PTX as a whole (throwing an `InvalidPtx` error). Run `ptxas` on the `.ptx` + file to get a more informative error message. This is rare but the most common cause is declaring + invalid extern functions (just grep for `extern` in the PTX file and check if it's odd functions + that aren't CUDA syscalls like vprintf, malloc, free, etc). - The PTX containing invalid behavior. This is very specific and rare but if you find this, the best way to debug it is: - Try to get a minimal working example so we don't have to search through megabytes of LLVM IR/PTX. - Use `RUSTFLAGS="--emit=llvm-ir"` and find `crate_name.ll` in `target/nvptx64-nvidia-cuda//deps/` and attach it in any bug report. @@ -51,4 +53,4 @@ If you set up the codegen backend for debug, it should give you a mapping from R Here is an example of the screen you should see: -![](../../../assets/nsight.png) +![](../../assets/nsight.png) diff --git a/tests/compiletests/src/main.rs b/tests/compiletests/src/main.rs index 0e345813..b6888e95 100644 --- a/tests/compiletests/src/main.rs +++ b/tests/compiletests/src/main.rs @@ -576,30 +576,41 @@ fn setup_cuda_environment() { // Set library path to include CUDA NVVM libraries let lib_path_var = dylib_path_envvar(); - // Try to find CUDA installation - let cuda_paths = vec![ - "/usr/local/cuda/nvvm/lib64", - "/usr/local/cuda-12/nvvm/lib64", - "/usr/local/cuda-11/nvvm/lib64", - "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.8\\nvvm\\lib\\x64", - "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.0\\nvvm\\lib\\x64", - "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v11.8\\nvvm\\lib\\x64", - ]; - let mut found_cuda_paths = Vec::new(); // Check CUDA_PATH environment variable if let Ok(cuda_path) = env::var("CUDA_PATH") { - let nvvm_path = Path::new(&cuda_path).join("nvvm").join("lib64"); - if nvvm_path.exists() { - found_cuda_paths.push(nvvm_path.to_string_lossy().to_string()); + #[cfg(unix)] + { + let nvvm_path = Path::new(&cuda_path).join("nvvm").join("lib64"); + if nvvm_path.exists() { + found_cuda_paths.push(nvvm_path.to_string_lossy().to_string()); + } } - let nvvm_path_win = Path::new(&cuda_path).join("nvvm").join("lib").join("x64"); - if nvvm_path_win.exists() { - found_cuda_paths.push(nvvm_path_win.to_string_lossy().to_string()); + #[cfg(windows)] + { + let nvvm_path = Path::new(&cuda_path).join("nvvm").join("lib").join("x64"); + if nvvm_path.exists() { + found_cuda_paths.push(nvvm_path.to_string_lossy().to_string()); + } } } + // Try to find CUDA installation + #[cfg(unix)] + let cuda_paths = vec![ + "/usr/local/cuda/nvvm/lib64", + "/usr/local/cuda-13/nvvm/lib64", + "/usr/local/cuda-12/nvvm/lib64", + ]; + #[cfg(windows)] + let cuda_paths = vec![ + "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v13.0\\nvvm\\lib\\x64", + "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.9\\nvvm\\lib\\x64", + "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.8\\nvvm\\lib\\x64", + "C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v12.0\\nvvm\\lib\\x64", + ]; + // Check standard paths for path in &cuda_paths { if Path::new(path).exists() {