File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -397,7 +397,7 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
397397#ifndef FLASH_ATTN_AVAILABLE
398398 GGML_UNUSED (device); GGML_UNUSED (dst);
399399 return BEST_FATTN_KERNEL_NONE;
400- #endif // FLASH_ATTN_AVAILABLE
400+ #endif // FLASH_ATTN_AVAILABLE
401401
402402 const ggml_tensor * KQV = dst;
403403 const ggml_tensor * Q = dst->src [0 ];
@@ -598,6 +598,8 @@ static best_fattn_kernel ggml_cuda_get_best_fattn_kernel(const int device, const
598598
599599 // Use the WMMA kernel if possible:
600600 if (ggml_cuda_should_use_wmma_fattn (cc) && K->ne [1 ] % FATTN_KQ_STRIDE == 0 && Q->ne [0 ] != 40 && Q->ne [0 ] != 72 && Q->ne [0 ] != 512 && Q->ne [0 ] != 576 && Q->ne [0 ] != 640 ) {
601+ return BEST_FATTN_KERNEL_WMMA_F16;
602+ }
601603
602604 // If there are no tensor cores available, use the generic tile kernel:
603605 if (can_use_vector_kernel) {
You can’t perform that action at this time.
0 commit comments