Skip to content

Commit 2e81dc5

Browse files
authored
Merge pull request #4 from dedesite/fix-hip-crash
HIP/ROCm: two crash fixes for TurboQuant KV cache on RDNA
2 parents 6c20151 + 425db5b commit 2e81dc5

2 files changed

Lines changed: 17 additions & 0 deletions

File tree

ggml/src/ggml-cuda/fattn-tile.cuh

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_nv
6868
GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 16, 256, 2, 64, 64)
6969
GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 32, 256, 2, 64, 64)
7070

71+
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 2, 64, 2, 32, 64)
7172
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 4, 128, 2, 64, 64)
7273
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 8, 256, 2, 64, 64)
7374
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 16, 256, 2, 64, 64)
@@ -132,6 +133,7 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_nv
132133
GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 16, 256, 2, 32, 128)
133134
GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 32, 256, 2, 32, 64)
134135

136+
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 2, 64, 2, 32, 64)
135137
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 4, 128, 2, 32, 64)
136138
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 8, 256, 2, 32, 64)
137139
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 16, 256, 2, 32, 64)
@@ -203,6 +205,7 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_am
203205
GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 16, 256, 2, 32, 128)
204206
GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 32, 256, 2, 32, 128)
205207

208+
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 2, 64, 2, 32, 64)
206209
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 4, 128, 2, 64, 64)
207210
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 8, 256, 2, 64, 64)
208211
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 16, 256, 2, 64, 64)
@@ -277,6 +280,7 @@ static constexpr __host__ __device__ uint32_t ggml_cuda_fattn_tile_get_config_am
277280
GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 16, 256, 5, 32, 256)
278281
GGML_CUDA_FATTN_TILE_CONFIG_CASE(256, 256, 32, 256, 3, 64, 128)
279282

283+
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 2, 64, 2, 32, 64)
280284
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 4, 128, 2, 64, 64)
281285
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 8, 256, 2, 64, 64)
282286
GGML_CUDA_FATTN_TILE_CONFIG_CASE(512, 512, 16, 256, 4, 64, 64)
@@ -1259,6 +1263,16 @@ static void launch_fattn_tile_switch_ncols2(ggml_backend_cuda_context & ctx, ggm
12591263
launch_fattn_tile_switch_ncols1<DKQ, DV, 1, use_logit_softcap>(ctx, dst);
12601264
return;
12611265
}
1266+
1267+
// DV > 256 (e.g. DKQ=DV=512, head_dim=512 models): extend GQA fallback to ncols2=2/1.
1268+
// Without this, gqa_ratio not divisible by 4 (e.g. ratio=2) reaches GGML_ABORT.
1269+
if (use_gqa_opt && gqa_ratio % 2 == 0) {
1270+
launch_fattn_tile_switch_ncols1<DKQ, DV, 2, use_logit_softcap>(ctx, dst);
1271+
return;
1272+
}
1273+
1274+
launch_fattn_tile_switch_ncols1<DKQ, DV, 1, use_logit_softcap>(ctx, dst);
1275+
return;
12621276
}
12631277
GGML_ABORT("fatal error");
12641278
}

ggml/src/ggml-hip/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,14 +85,17 @@ else()
8585
../ggml-cuda/template-instances/fattn-vec-instance-turbo3_0-turbo3_0.cu
8686
../ggml-cuda/template-instances/fattn-vec-instance-turbo3_0-q8_0.cu
8787
../ggml-cuda/template-instances/fattn-vec-instance-q8_0-turbo3_0.cu
88+
../ggml-cuda/template-instances/fattn-vec-instance-f16-turbo3_0.cu
8889
../ggml-cuda/template-instances/fattn-vec-instance-turbo2_0-turbo2_0.cu
8990
../ggml-cuda/template-instances/fattn-vec-instance-turbo2_0-q8_0.cu
9091
../ggml-cuda/template-instances/fattn-vec-instance-q8_0-turbo2_0.cu
92+
../ggml-cuda/template-instances/fattn-vec-instance-f16-turbo2_0.cu
9193
../ggml-cuda/template-instances/fattn-vec-instance-turbo3_0-turbo2_0.cu
9294
../ggml-cuda/template-instances/fattn-vec-instance-turbo2_0-turbo3_0.cu
9395
../ggml-cuda/template-instances/fattn-vec-instance-turbo4_0-turbo4_0.cu
9496
../ggml-cuda/template-instances/fattn-vec-instance-turbo4_0-q8_0.cu
9597
../ggml-cuda/template-instances/fattn-vec-instance-q8_0-turbo4_0.cu
98+
../ggml-cuda/template-instances/fattn-vec-instance-f16-turbo4_0.cu
9699
../ggml-cuda/template-instances/fattn-vec-instance-turbo4_0-turbo3_0.cu
97100
../ggml-cuda/template-instances/fattn-vec-instance-turbo3_0-turbo4_0.cu
98101
../ggml-cuda/template-instances/fattn-vec-instance-turbo4_0-turbo2_0.cu

0 commit comments

Comments
 (0)