Skip to content

Commit 3c667e4

Browse files
committed
ggml-cuda: aggressive Hopper FA config for D=64 (nthreads=256, nbatch_fa=256)
1 parent 2bd4d8a commit 3c667e4

1 file changed

Lines changed: 5 additions & 4 deletions

File tree

ggml/src/ggml-cuda/fattn-mma-f16.cuh

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -81,10 +81,11 @@ static constexpr __host__ __device__ fattn_mma_config ggml_cuda_fattn_mma_get_co
8181
// We also relax the restriction on nstages for ncols2=1 (handled in get_nstages).
8282

8383
// D=64
84-
GGML_CUDA_FATTN_MMA_CONFIG_CASE( 64, 64, 8, 128, 2, 128, 32, 32, 32, 2, true);
85-
GGML_CUDA_FATTN_MMA_CONFIG_CASE( 64, 64, 16, 128, 2, 128, 32, 32, 32, 2, true);
86-
GGML_CUDA_FATTN_MMA_CONFIG_CASE( 64, 64, 32, 128, 2, 128, 32, 32, 32, 2, true);
87-
GGML_CUDA_FATTN_MMA_CONFIG_CASE( 64, 64, 64, 128, 2, 128, 32, 32, 32, 2, true);
84+
// Aggressive Hopper tuning: nthreads=256 (8 warps), occupancy=1 (full SM), nbatch_fa=256 (max tile).
85+
GGML_CUDA_FATTN_MMA_CONFIG_CASE( 64, 64, 8, 256, 1, 256, 32, 32, 32, 2, true);
86+
GGML_CUDA_FATTN_MMA_CONFIG_CASE( 64, 64, 16, 256, 1, 256, 32, 32, 32, 2, true);
87+
GGML_CUDA_FATTN_MMA_CONFIG_CASE( 64, 64, 32, 256, 1, 256, 32, 32, 32, 2, true);
88+
GGML_CUDA_FATTN_MMA_CONFIG_CASE( 64, 64, 64, 256, 1, 128, 32, 32, 32, 2, true);
8889

8990
// D=80
9091
GGML_CUDA_FATTN_MMA_CONFIG_CASE( 80, 80, 8, 128, 2, 128, 40, 40, 40, 2, true);

0 commit comments

Comments
 (0)