Skip to content

Commit 734f76f

Browse files
committed
fix case where DV % GGML_F32_EPR !=0
1 parent a1e1420 commit 734f76f

1 file changed

Lines changed: 4 additions & 5 deletions

File tree

ggml/src/ggml-cpu/ops.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8420,9 +8420,6 @@ static void ggml_compute_forward_flash_attn_ext_tiled(
84208420

84218421
static constexpr int Q_TILE_SZ = ggml_fa_tile_config::Q;
84228422
static constexpr int KV_TILE_SZ = ggml_fa_tile_config::KV;
8423-
#ifdef GGML_SIMD
8424-
GGML_ASSERT(DV % GGML_F32_EPR == 0);
8425-
#endif
84268423

84278424
int ir = ir0;
84288425
while (ir < ir1) {
@@ -8812,12 +8809,14 @@ static void ggml_compute_forward_flash_attn_ext_f16(
88128809
const int64_t dr = (nr + nchunk - 1) / nchunk;
88138810

88148811
static constexpr int64_t Q_TILE_SZ = ggml_fa_tile_config::Q;
8815-
const bool use_tiled = !use_ref &&
8812+
bool use_tiled = !use_ref &&
88168813
(q->type == GGML_TYPE_F32 &&
88178814
kv_is_f32_or_f16 &&
88188815
k->type == v->type &&
88198816
neq1 >= Q_TILE_SZ);
8820-
8817+
#ifdef GGML_SIMD
8818+
use_tiled &= (DV % GGML_F32_EPR == 0);
8819+
#endif
88218820
int current_chunk = ith;
88228821

88238822
while (current_chunk < nchunk) {

0 commit comments

Comments
 (0)