Skip to content

Commit 678a73e

Browse files
committed
Perftest: keep CUDA context creation ABI-stable
cuGetProcAddress resolves versioned driver entry points from the requested CUDA version, not from the headers used to compile perftest. Request the CUDA 3.2 cuCtxCreate ABI explicitly and keep the function pointer plus call site on the three-argument cuCtxCreate_v2 signature, even when CUDA 13 headers expose a newer default cuCtxCreate prototype. The stale configure-generated CUDA_VER macro is removed because source now uses CUDA_VERSION from cuda.h for compile-time checks. The existing CUDA_VER_* constants remain local names for explicit driver-entry ABI request versions. Signed-off-by: Zelong Yue <yuezelong@bytedance.com>
1 parent c94e538 commit 678a73e

4 files changed

Lines changed: 11 additions & 9 deletions

File tree

configure.ac

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -369,7 +369,6 @@ if test "$cuda_found" = "yes"; then
369369
AC_DEFINE_UNQUOTED([CUDA_PATH], "$cuda_h_path" , [Enable CUDA feature])
370370
AC_CHECK_LIB([cuda], [cuMemGetHandleForAddressRange], [HAVE_CUDA_CUMEMGETHANDLEFORADDRESSRANGE=yes], [HAVE_CUDA_CUMEMGETHANDLEFORADDRESSRANGE=no])
371371
cuda_toolkit_version=`grep "define CUDA_VERSION" $cuda_h_path | cut -d' ' -f3`
372-
AC_DEFINE_UNQUOTED([CUDA_VER], [$cuda_toolkit_version], [Define CUDA_VER])
373372
AC_TRY_LINK([
374373
#include <$cuda_h_path>],
375374
[int x = CU_MEM_RANGE_HANDLE_TYPE_DMA_BUF_FD|CU_DEVICE_ATTRIBUTE_DMA_BUF_SUPPORTED;],

src/cuda_loader.c

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ CUresult (*p_cuDeviceGetCount)(int *) = NULL;
1010
CUresult (*p_cuDeviceGet)(CUdevice *, int) = NULL;
1111
CUresult (*p_cuDeviceGetAttribute)(int *, CUdevice_attribute, CUdevice) = NULL;
1212
CUresult (*p_cuDeviceGetName)(char *, int, CUdevice) = NULL;
13-
CUresult (*p_cuCtxCreate)(CUcontext *, unsigned int, CUdevice) = NULL;
13+
CUresult (*p_cuCtxCreate_v2)(CUcontext *, unsigned int, CUdevice) = NULL;
1414
CUresult (*p_cuDevicePrimaryCtxRetain)(CUcontext *, CUdevice) = NULL;
1515
CUresult (*p_cuCtxSetCurrent)(CUcontext) = NULL;
1616
CUresult (*p_cuCtxDestroy)(CUcontext) = NULL;
@@ -25,7 +25,7 @@ CUresult (*p_cuMemcpyDtoD)(CUdeviceptr, CUdeviceptr, size_t) = NULL;
2525
CUresult (*p_cuMemGetHandleForAddressRange)(void *, void *, size_t, CUmemRangeHandleType, unsigned int) = NULL;
2626
#endif
2727
CUresult (*p_cuDriverGetVersion)(int* driverVersion) = NULL;
28-
#if CUDA_VER >= 12000
28+
#if CUDA_VERSION >= 12000
2929
CUresult (*p_cuGetProcAddress)(const char* symbol, void** pfn, int cudaVersion, uint64_t flags, CUdriverProcAddressQueryResult* symbolStatus) = NULL;
3030
#else
3131
CUresult (*p_cuGetProcAddress)(const char* symbol, void** pfn, int cudaVersion, uint64_t flags) = NULL;
@@ -34,7 +34,7 @@ CUresult (*p_cuMemAllocManaged)(CUdeviceptr* dptr, size_t bytesize, unsigned int
3434
CUresult (*p_cuCtxSynchronize) (void) = NULL;
3535

3636
int load_cuda_function(void **func_ptr, const char *func_name, int version) {
37-
#if CUDA_VER >= 12000
37+
#if CUDA_VERSION >= 12000
3838
CUresult res = p_cuGetProcAddress(func_name, func_ptr, version, 0, NULL);
3939
#else
4040
CUresult res = p_cuGetProcAddress(func_name, func_ptr, version, 0);
@@ -69,7 +69,8 @@ int load_cuda_library(void) {
6969
{ (void**)&p_cuDeviceGet, "cuDeviceGet", CUDA_VER_2_0 },
7070
{ (void**)&p_cuDeviceGetAttribute, "cuDeviceGetAttribute", CUDA_VER_2_0 },
7171
{ (void**)&p_cuDeviceGetName, "cuDeviceGetName", CUDA_VER_2_0 },
72-
{ (void**)&p_cuCtxCreate, "cuCtxCreate", CUDA_VER_3_2 },
72+
/* CUDA_VER_3_2 selects the cuCtxCreate_v2 ABI across CUDA 11-13. */
73+
{ (void**)&p_cuCtxCreate_v2, "cuCtxCreate", CUDA_VER_3_2 },
7374
{ (void**)&p_cuDevicePrimaryCtxRetain, "cuDevicePrimaryCtxRetain", CUDA_VER_7_0 },
7475
{ (void**)&p_cuCtxSetCurrent, "cuCtxSetCurrent", CUDA_VER_4_0 },
7576
{ (void**)&p_cuCtxDestroy, "cuCtxDestroy", CUDA_VER_4_0 },

src/cuda_loader.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ extern CUresult (*p_cuDeviceGetCount)(int *);
3030
extern CUresult (*p_cuDeviceGet)(CUdevice *, int);
3131
extern CUresult (*p_cuDeviceGetAttribute)(int *, CUdevice_attribute, CUdevice);
3232
extern CUresult (*p_cuDeviceGetName)(char *, int, CUdevice);
33-
extern CUresult (*p_cuCtxCreate)(CUcontext *, unsigned int, CUdevice);
33+
extern CUresult (*p_cuCtxCreate_v2)(CUcontext *, unsigned int, CUdevice);
3434
extern CUresult (*p_cuDevicePrimaryCtxRetain)(CUcontext *, CUdevice);
3535
extern CUresult (*p_cuCtxSetCurrent)(CUcontext);
3636
extern CUresult (*p_cuCtxDestroy)(CUcontext);
@@ -47,7 +47,7 @@ extern CUresult (*p_cuMemGetHandleForAddressRange)(void *, void *, size_t, CUmem
4747
extern CUresult (*p_cuDriverGetVersion)(int* driverVersion);
4848
extern CUresult (*p_cuCtxSynchronize) (void);
4949
extern CUresult (*p_cuMemAllocManaged)(CUdeviceptr* dptr, size_t bytesize, unsigned int flags);
50-
#if CUDA_VER >= 12000
50+
#if CUDA_VERSION >= 12000
5151
extern CUresult (*p_cuGetProcAddress)(const char* symbol, void** pfn, int cudaVersion, uint64_t flags, CUdriverProcAddressQueryResult* symbolStatus);
5252
#else
5353
extern CUresult (*p_cuGetProcAddress)(const char* symbol, void** pfn, int cudaVersion, uint64_t flags);

src/cuda_memory.c

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,9 +94,11 @@ static int init_gpu(struct cuda_memory_ctx *ctx)
9494
printf("[pid = %d, dev = %d] device name = [%s]\n", getpid(), ctx->cuDevice, name);
9595
printf("creating CUDA Ctx\n");
9696

97-
error = p_cuCtxCreate(&ctx->cuContext, CU_CTX_MAP_HOST, ctx->cuDevice);
97+
/* Create context */
98+
error = p_cuCtxCreate_v2(&ctx->cuContext, CU_CTX_MAP_HOST, ctx->cuDevice);
99+
98100
if (error != CUDA_SUCCESS) {
99-
printf("cuCtxCreate() error=%d\n", error);
101+
printf("cuCtxCreate_v2() error=%d\n", error);
100102
return FAILURE;
101103
}
102104

0 commit comments

Comments
 (0)