Skip to content

Commit d436640

Browse files
committed
[BugFix] Fix flash_attn_backend
1 parent 86b6430 commit d436640

1 file changed

Lines changed: 3 additions & 0 deletions

File tree

fastdeploy/model_executor/layers/attention/flash_attn_backend.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,8 @@ def forward_mixed(
295295
metadata.pre_cache_batch_ids,
296296
metadata.pre_cache_tile_ids_per_batch,
297297
metadata.pre_cache_num_blocks_cpu,
298+
getattr(layer, "q_norm_weight", None),
299+
getattr(layer, "k_norm_weight", None),
298300
getattr(layer, "cache_k_scale", None),
299301
getattr(layer, "cache_v_scale", None),
300302
getattr(layer, "cache_k_out_scale", None),
@@ -304,6 +306,7 @@ def forward_mixed(
304306
metadata.kv_signal_data_list[layer.layer_id],
305307
metadata.kv_token_num_cpu[0].item(),
306308
self.max_seq_len,
309+
getattr(layer, "rms_norm_eps", 1e-6),
307310
getattr(layer, "cache_quant_type_str", "none"),
308311
self.rope_3d,
309312
)

0 commit comments

Comments
 (0)