Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/ci_linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,10 @@ jobs:
- name: RockyLinux-9 / CUDA-13.0.2 / x86_64
image: "ghcr.io/rust-gpu/rust-cuda-rockylinux9-cuda13:latest"
runner: ubuntu-latest
# CUDA 13.2 entries require container images from container_images.yml
# to be published first. Add these back once the images exist on ghcr.io:
# ghcr.io/rust-gpu/rust-cuda-ubuntu24-cuda132:latest
# ghcr.io/rust-gpu/rust-cuda-rockylinux9-cuda132:latest

steps:
- name: Free up space
Expand Down
21 changes: 20 additions & 1 deletion .github/workflows/ci_windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -59,13 +59,32 @@ jobs:
"nvrtc_dev",
"nvvm", # new subpackage in CUDA 13
]
- os: windows-latest
target: x86_64-pc-windows-msvc
cuda: "13.2.0"
nvvm-dll-dir: "nvvm\\bin\\x64"
sub-packages:
[
"crt",
"cublas",
"cublas_dev",
"cuda_profiler_api",
"cudart",
"curand",
"curand_dev",
"nvcc",
"nvptxcompiler",
"nvrtc",
"nvrtc_dev",
"nvvm",
]

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Install CUDA
uses: Jimver/cuda-toolkit@v0.2.29
uses: Jimver/cuda-toolkit@v0.2.35
id: cuda-toolkit
with:
cuda: ${{ matrix.cuda }}
Expand Down
10 changes: 10 additions & 0 deletions .github/workflows/container_images.yml
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ jobs:
- name: RockyLinux-9/CUDA-13.0.2
image: "rust-gpu/rust-cuda-rockylinux9-cuda13"
dockerfile: ./container/rockylinux9-cuda13/Dockerfile
- name: Ubuntu-24.04/CUDA-13.2.0
image: "rust-gpu/rust-cuda-ubuntu24-cuda132"
dockerfile: ./container/ubuntu24-cuda132/Dockerfile
- name: RockyLinux-9/CUDA-13.2.0
image: "rust-gpu/rust-cuda-rockylinux9-cuda132"
dockerfile: ./container/rockylinux9-cuda132/Dockerfile
steps:
- name: Free up space
# Without this the job will likely run out of disk space.
Expand Down Expand Up @@ -165,6 +171,10 @@ jobs:
image: "rust-gpu/rust-cuda-rockylinux9-cuda12"
- name: RockyLinux-9/CUDA-13.0.2
image: "rust-gpu/rust-cuda-rockylinux9-cuda13"
- name: Ubuntu-24.04/CUDA-13.2.0
image: "rust-gpu/rust-cuda-ubuntu24-cuda132"
- name: RockyLinux-9/CUDA-13.2.0
image: "rust-gpu/rust-cuda-rockylinux9-cuda132"
steps:
- name: Set artifact name
run: |
Expand Down
92 changes: 92 additions & 0 deletions container/rockylinux9-cuda132/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
FROM nvcr.io/nvidia/cuda:13.2.0-cudnn-devel-rockylinux9 AS llvm-builder

RUN dnf -y install \
--nobest \
--allowerasing \
--setopt=install_weak_deps=False \
openssl-devel \
pkgconfig \
which \
xz \
zlib-devel \
libffi-devel \
ncurses-devel \
libxml2-devel \
libedit-devel \
python3 \
make \
cmake && \
dnf clean all

WORKDIR /data/llvm7

# Download and build LLVM 7.1.0 for all architectures.
RUN curl -sSf -L -O https://github.com/llvm/llvm-project/releases/download/llvmorg-7.1.0/llvm-7.1.0.src.tar.xz && \
tar -xf llvm-7.1.0.src.tar.xz && \
cd llvm-7.1.0.src && \
mkdir build && cd build && \
ARCH=$(uname -m) && \
if [ "$ARCH" = "x86_64" ]; then \
TARGETS="X86;NVPTX"; \
else \
TARGETS="AArch64;NVPTX"; \
fi && \
cmake \
-DCMAKE_BUILD_TYPE=Release \
-DLLVM_TARGETS_TO_BUILD="$TARGETS" \
-DLLVM_BUILD_LLVM_DYLIB=ON \
-DLLVM_LINK_LLVM_DYLIB=ON \
-DLLVM_ENABLE_ASSERTIONS=OFF \
-DLLVM_ENABLE_BINDINGS=OFF \
-DLLVM_INCLUDE_EXAMPLES=OFF \
-DLLVM_INCLUDE_TESTS=OFF \
-DLLVM_INCLUDE_BENCHMARKS=OFF \
-DLLVM_ENABLE_ZLIB=ON \
-DLLVM_ENABLE_TERMINFO=ON \
-DCMAKE_INSTALL_PREFIX=/opt/llvm-7 \
.. && \
make -j$(nproc) && \
make install && \
cd ../.. && \
rm -rf llvm-7.1.0.src* && \
dnf clean all

FROM nvcr.io/nvidia/cuda:13.2.0-cudnn-devel-rockylinux9

RUN dnf -y install \
--nobest \
--allowerasing \
--setopt=install_weak_deps=False \
clang \
openssl-devel \
fontconfig-devel \
libX11-devel \
libXcursor-devel \
libXi-devel \
libXrandr-devel \
libxml2-devel \
ncurses-devel \
pkgconfig \
which \
xz \
zlib-devel \
cmake && \
dnf clean all

COPY --from=llvm-builder /opt/llvm-7 /opt/llvm-7
RUN ln -s /opt/llvm-7/bin/llvm-config /usr/bin/llvm-config && \
ln -s /opt/llvm-7/bin/llvm-config /usr/bin/llvm-config-7

# Get Rust (install rustup; toolchain installed from rust-toolchain.toml below)
RUN curl -sSf -L https://sh.rustup.rs | bash -s -- -y --profile minimal --default-toolchain none
ENV PATH="/root/.cargo/bin:${PATH}"

# Setup the workspace
WORKDIR /data/rust-cuda
RUN --mount=type=bind,source=rust-toolchain.toml,target=/data/rust-cuda/rust-toolchain.toml \
rustup show

# Add nvvm to LD_LIBRARY_PATH.
ENV LD_LIBRARY_PATH="/usr/local/cuda/nvvm/lib64:${LD_LIBRARY_PATH}"
ENV LLVM_LINK_STATIC=1
ENV RUST_LOG=info
89 changes: 89 additions & 0 deletions container/ubuntu24-cuda132/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
FROM nvcr.io/nvidia/cuda:13.2.0-cudnn-devel-ubuntu24.04 AS llvm-builder

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install \
build-essential \
clang \
curl \
libffi-dev \
libedit-dev \
libncurses5-dev \
libssl-dev \
libtinfo-dev \
libxml2-dev \
cmake \
ninja-build \
pkg-config \
python3 \
xz-utils \
zlib1g-dev && \
rm -rf /var/lib/apt/lists/*

WORKDIR /data/llvm7

# Download and build LLVM 7.1.0 for all architectures.
RUN curl -sSf -L -O https://github.com/llvm/llvm-project/releases/download/llvmorg-7.1.0/llvm-7.1.0.src.tar.xz && \
tar -xf llvm-7.1.0.src.tar.xz && \
cd llvm-7.1.0.src && \
mkdir build && cd build && \
ARCH=$(dpkg --print-architecture) && \
if [ "$ARCH" = "amd64" ]; then \
TARGETS="X86;NVPTX"; \
else \
TARGETS="AArch64;NVPTX"; \
fi && \
cmake -G Ninja \
-DCMAKE_BUILD_TYPE=Release \
-DLLVM_TARGETS_TO_BUILD="$TARGETS" \
-DLLVM_BUILD_LLVM_DYLIB=ON \
-DLLVM_LINK_LLVM_DYLIB=ON \
-DLLVM_ENABLE_ASSERTIONS=OFF \
-DLLVM_ENABLE_BINDINGS=OFF \
-DLLVM_INCLUDE_EXAMPLES=OFF \
-DLLVM_INCLUDE_TESTS=OFF \
-DLLVM_INCLUDE_BENCHMARKS=OFF \
-DLLVM_ENABLE_ZLIB=ON \
-DLLVM_ENABLE_TERMINFO=ON \
-DCMAKE_INSTALL_PREFIX=/opt/llvm-7 \
.. && \
ninja -j$(nproc) && \
ninja install && \
cd ../.. && \
rm -rf llvm-7.1.0.src*

FROM nvcr.io/nvidia/cuda:13.2.0-cudnn-devel-ubuntu24.04

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get -qq -y install \
build-essential \
clang \
curl \
libssl-dev \
libtinfo-dev \
pkg-config \
xz-utils \
zlib1g-dev \
cmake \
libfontconfig-dev \
libx11-xcb-dev \
libxcursor-dev \
libxi-dev \
libxinerama-dev \
libxrandr-dev && \
rm -rf /var/lib/apt/lists/*

COPY --from=llvm-builder /opt/llvm-7 /opt/llvm-7
RUN ln -s /opt/llvm-7/bin/llvm-config /usr/bin/llvm-config && \
ln -s /opt/llvm-7/bin/llvm-config /usr/bin/llvm-config-7

# Get Rust (install rustup; toolchain installed from rust-toolchain.toml below)
RUN curl -sSf -L https://sh.rustup.rs | bash -s -- -y --profile minimal --default-toolchain none
ENV PATH="/root/.cargo/bin:${PATH}"

# Setup the workspace
WORKDIR /data/rust-cuda
RUN --mount=type=bind,source=rust-toolchain.toml,target=/data/rust-cuda/rust-toolchain.toml \
rustup show

# Add nvvm to LD_LIBRARY_PATH.
ENV LD_LIBRARY_PATH="/usr/local/cuda/nvvm/lib64:${LD_LIBRARY_PATH}"
ENV LLVM_LINK_STATIC=1
ENV RUST_LOG=info
8 changes: 8 additions & 0 deletions crates/cust/build.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,4 +40,12 @@ fn main() {
println!("cargo::rustc-cfg=cuGraphGetEdges_v2");
println!("cargo::rustc-cfg=cuCtxCreate_v4");
}

// In CUDA 13.2 the `id` field in `CUmemLocation_st` was placed inside an anonymous union.
// Bindgen renders this as `__bindgen_anon_1: CUmemLocation_st__bindgen_ty_1` instead of a
// direct `id` field. This cfg gates the struct initialization syntax accordingly.
println!("cargo::rustc-check-cfg=cfg(cuMemLocation_anon_id)");
if driver_version >= 13020 {
println!("cargo::rustc-cfg=cuMemLocation_anon_id");
}
}
15 changes: 15 additions & 0 deletions crates/cust/src/memory/unified.rs
Original file line number Diff line number Diff line change
Expand Up @@ -647,6 +647,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemPrefetchAsync_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemPrefetchAsync_v2))]
Expand Down Expand Up @@ -693,6 +696,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemPrefetchAsync_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemPrefetchAsync_v2))]
Expand Down Expand Up @@ -735,6 +741,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand Down Expand Up @@ -777,6 +786,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand All @@ -801,6 +813,9 @@ pub trait MemoryAdvise<T: DeviceCopy>: private::Sealed {
#[cfg(cuMemAdvise_v2)]
driver_sys::CUmemLocation {
type_: driver_sys::CUmemLocationType::CU_MEM_LOCATION_TYPE_DEVICE,
#[cfg(cuMemLocation_anon_id)]
__bindgen_anon_1: driver_sys::CUmemLocation_st__bindgen_ty_1 { id },
#[cfg(not(cuMemLocation_anon_id))]
id,
},
#[cfg(not(cuMemAdvise_v2))]
Expand Down
Loading