From 05627605f258863a3c7be9f22488b322852b812d Mon Sep 17 00:00:00 2001 From: Antoine Viallon Date: Mon, 6 Apr 2026 18:42:27 +0200 Subject: [PATCH] ggml-cuda : fix CDNA2 compute capability constant for gfx90a (MI210) GGML_CUDA_CC_CDNA2 was set to 0x910 which corresponds to gfx910 (RDNA3), not gfx90a (CDNA2/MI210). This caused CDNA2 GPUs to be misidentified, skipping CDNA2-specific code paths such as MFMA acc register renaming. Fix by setting the constant to 0x90a to match the actual gfx90a ISA. --- ggml/src/ggml-cuda/common.cuh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh index 9affe023403..1c9233b4fc1 100644 --- a/ggml/src/ggml-cuda/common.cuh +++ b/ggml/src/ggml-cuda/common.cuh @@ -65,7 +65,7 @@ #define GGML_CUDA_CC_VEGA (GGML_CUDA_CC_OFFSET_AMD + 0x900) // Vega56/64, minimum for fp16 dual issue #define GGML_CUDA_CC_VEGA20 (GGML_CUDA_CC_OFFSET_AMD + 0x906) // MI50/Radeon VII, minimum for dp4a #define GGML_CUDA_CC_CDNA1 (GGML_CUDA_CC_OFFSET_AMD + 0x908) // MI100, minimum for MFMA, acc registers -#define GGML_CUDA_CC_CDNA2 (GGML_CUDA_CC_OFFSET_AMD + 0x910) // MI210, minimum acc register renameing +#define GGML_CUDA_CC_CDNA2 (GGML_CUDA_CC_OFFSET_AMD + 0x90a) // MI210 (gfx90a), minimum acc register renaming #define GGML_CUDA_CC_CDNA3 (GGML_CUDA_CC_OFFSET_AMD + 0x942) // MI300 // RDNA removes MFMA, dp4a, xnack, acc registers, wave size is 32