File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 110110# define GGML_CUDA_USE_CUB
111111#endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11070
112112
113- // PDL host-side support (cudaLaunchKernelEx) requires CUDART >= 11.8 and excludes HIP/MUSA.
113+ // PDL host-side support (cudaLaunchKernelEx) requires CUDART >= 11.8.
114+ // However, this has been bugged in CTK < 12.3 for MSVC builds, see
115+ // https://github.com/ggml-org/llama.cpp/pull/22522#discussion_r3302393293
114116// __CUDA_ARCH__ is undefined in host passes; GPU arch check happens in device-side code.
115- #if !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11080
117+ #if !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && \
118+ (CUDART_VERSION >= 12030 || (!(defined (_MSC_VER) && !defined (__clang__)) && CUDART_VERSION >= 11080 ))
116119# define GGML_CUDA_USE_PDL
117- #endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && CUDART_VERSION >= 11080
120+ #endif // !defined(GGML_USE_HIP) && !defined(GGML_USE_MUSA) && ( CUDART_VERSION >= 12030 || (!(defined(_MSC_VER) && !defined(__clang__)) && CUDART_VERSION >= 11080))
118121
119122static __device__ __forceinline__ void ggml_cuda_pdl_sync () {
120123#if defined(GGML_CUDA_USE_PDL) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= GGML_CUDA_CC_HOPPER
You can’t perform that action at this time.
0 commit comments