Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions .github/workflows/ubuntu24_04_cuda13_1.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
name: Build and Test perftest on Ubuntu 24.04 with CUDA 13.1

on:
push:
branches:
- master
pull_request:
branches:
- master

jobs:
build:
runs-on: ubuntu-24.04

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Install CUDA repository
run: |
wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb
sudo dpkg -i cuda-keyring_1.1-1_all.deb

- name: Install dependencies
run: |
sudo apt-get update
sudo apt-get install -y \
build-essential \
autoconf \
automake \
libtool \
pkg-config \
libibverbs-dev \
librdmacm-dev \
libibumad-dev \
libpci-dev \
cuda-toolkit-13-1 \
cuda-drivers

- name: Set up CUDA environment
run: |
echo 'export PATH=/usr/local/cuda-13.1/bin:${PATH}' >> $GITHUB_ENV
echo 'export LD_LIBRARY_PATH=/usr/local/cuda-13.1/lib64:${LD_LIBRARY_PATH}' >> $GITHUB_ENV

- name: Run autogen.sh
run: ./autogen.sh

- name: Configure the build
run: ./configure CUDA_H_PATH=/usr/local/cuda/include/cuda.h

- name: Build perftest
run: make

- name: Install perftest
run: sudo make install
1 change: 0 additions & 1 deletion configure.ac
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,6 @@ if test "$cuda_found" = "yes"; then
AC_DEFINE_UNQUOTED([CUDA_PATH], "$cuda_h_path" , [Enable CUDA feature])
AC_CHECK_LIB([cuda], [cuMemGetHandleForAddressRange], [HAVE_CUDA_CUMEMGETHANDLEFORADDRESSRANGE=yes], [HAVE_CUDA_CUMEMGETHANDLEFORADDRESSRANGE=no])
cuda_toolkit_version=`grep "define CUDA_VERSION" $cuda_h_path | cut -d' ' -f3`
AC_DEFINE_UNQUOTED([CUDA_VER], [$cuda_toolkit_version], [Define CUDA_VER])
AC_TRY_LINK([
#include <$cuda_h_path>],
[int x = CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD|CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED;],
Expand Down
9 changes: 5 additions & 4 deletions src/cuda_loader.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ CUresult (*p_cuDeviceGetCount)(int *) = NULL;
CUresult (*p_cuDeviceGet)(CUdevice *, int) = NULL;
CUresult (*p_cuDeviceGetAttribute)(int *, CUdevice_attribute, CUdevice) = NULL;
CUresult (*p_cuDeviceGetName)(char *, int, CUdevice) = NULL;
CUresult (*p_cuCtxCreate)(CUcontext *, unsigned int, CUdevice) = NULL;
CUresult (*p_cuCtxCreate_v2)(CUcontext *, unsigned int, CUdevice) = NULL;
CUresult (*p_cuDevicePrimaryCtxRetain)(CUcontext *, CUdevice) = NULL;
CUresult (*p_cuCtxSetCurrent)(CUcontext) = NULL;
CUresult (*p_cuCtxDestroy)(CUcontext) = NULL;
Expand All @@ -25,7 +25,7 @@ CUresult (*p_cuMemcpyDtoD)(CUdeviceptr, CUdeviceptr, size_t) = NULL;
CUresult (*p_cuMemGetHandleForAddressRange)(void *, void *, size_t, CUmemRangeHandleType, unsigned int) = NULL;
#endif
CUresult (*p_cuDriverGetVersion)(int* driverVersion) = NULL;
#if CUDA_VER >= 12000
#if CUDA_VERSION >= 12000
CUresult (*p_cuGetProcAddress)(const char* symbol, void** pfn, int cudaVersion, uint64_t flags, CUdriverProcAddressQueryResult* symbolStatus) = NULL;
#else
CUresult (*p_cuGetProcAddress)(const char* symbol, void** pfn, int cudaVersion, uint64_t flags) = NULL;
Expand All @@ -34,7 +34,7 @@ CUresult (*p_cuMemAllocManaged)(CUdeviceptr* dptr, size_t bytesize, unsigned int
CUresult (*p_cuCtxSynchronize) (void) = NULL;

int load_cuda_function(void **func_ptr, const char *func_name, int version) {
#if CUDA_VER >= 12000
#if CUDA_VERSION >= 12000
CUresult res = p_cuGetProcAddress(func_name, func_ptr, version, 0, NULL);
#else
CUresult res = p_cuGetProcAddress(func_name, func_ptr, version, 0);
Expand Down Expand Up @@ -69,7 +69,8 @@ int load_cuda_library(void) {
{ (void**)&p_cuDeviceGet, "cuDeviceGet", CUDA_VER_2_0 },
{ (void**)&p_cuDeviceGetAttribute, "cuDeviceGetAttribute", CUDA_VER_2_0 },
{ (void**)&p_cuDeviceGetName, "cuDeviceGetName", CUDA_VER_2_0 },
{ (void**)&p_cuCtxCreate, "cuCtxCreate", CUDA_VER_3_2 },
/* CUDA_VER_3_2 selects the cuCtxCreate_v2 ABI across CUDA 11-13. */
{ (void**)&p_cuCtxCreate_v2, "cuCtxCreate", CUDA_VER_3_2 },
{ (void**)&p_cuDevicePrimaryCtxRetain, "cuDevicePrimaryCtxRetain", CUDA_VER_7_0 },
{ (void**)&p_cuCtxSetCurrent, "cuCtxSetCurrent", CUDA_VER_4_0 },
{ (void**)&p_cuCtxDestroy, "cuCtxDestroy", CUDA_VER_4_0 },
Expand Down
4 changes: 2 additions & 2 deletions src/cuda_loader.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ extern CUresult (*p_cuDeviceGetCount)(int *);
extern CUresult (*p_cuDeviceGet)(CUdevice *, int);
extern CUresult (*p_cuDeviceGetAttribute)(int *, CUdevice_attribute, CUdevice);
extern CUresult (*p_cuDeviceGetName)(char *, int, CUdevice);
extern CUresult (*p_cuCtxCreate)(CUcontext *, unsigned int, CUdevice);
extern CUresult (*p_cuCtxCreate_v2)(CUcontext *, unsigned int, CUdevice);
extern CUresult (*p_cuDevicePrimaryCtxRetain)(CUcontext *, CUdevice);
extern CUresult (*p_cuCtxSetCurrent)(CUcontext);
extern CUresult (*p_cuCtxDestroy)(CUcontext);
Expand All @@ -47,7 +47,7 @@ extern CUresult (*p_cuMemGetHandleForAddressRange)(void *, void *, size_t, CUmem
extern CUresult (*p_cuDriverGetVersion)(int* driverVersion);
extern CUresult (*p_cuCtxSynchronize) (void);
extern CUresult (*p_cuMemAllocManaged)(CUdeviceptr* dptr, size_t bytesize, unsigned int flags);
#if CUDA_VER >= 12000
#if CUDA_VERSION >= 12000
extern CUresult (*p_cuGetProcAddress)(const char* symbol, void** pfn, int cudaVersion, uint64_t flags, CUdriverProcAddressQueryResult* symbolStatus);
#else
extern CUresult (*p_cuGetProcAddress)(const char* symbol, void** pfn, int cudaVersion, uint64_t flags);
Expand Down
6 changes: 4 additions & 2 deletions src/cuda_memory.c
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,11 @@ static int init_gpu(struct cuda_memory_ctx *ctx)
printf("[pid = %d, dev = %d] device name = [%s]\n", getpid(), ctx->cuDevice, name);
printf("creating CUDA Ctx\n");

error = p_cuCtxCreate(&ctx->cuContext, CU_CTX_MAP_HOST, ctx->cuDevice);
/* Create context */
error = p_cuCtxCreate_v2(&ctx->cuContext, CU_CTX_MAP_HOST, ctx->cuDevice);

if (error != CUDA_SUCCESS) {
printf("cuCtxCreate() error=%d\n", error);
printf("cuCtxCreate_v2() error=%d\n", error);
return FAILURE;
}

Expand Down
Loading