Skip to content

Commit 92ccbfc

Browse files
committed
update
1 parent 74559b9 commit 92ccbfc

3 files changed

Lines changed: 8 additions & 4 deletions

File tree

fastdeploy/model_executor/layers/attention/triton_ops/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,9 @@
1818
# Licensed under Apache License 2.0
1919
"""
2020

21-
from .decode_attention import compute_num_kv_splits, decode_attention_fwd
22-
from .mla_cache_kernel import mla_write_cache_triton
23-
from .unified_extend_attention import (
21+
from .decode_attention import compute_num_kv_splits, decode_attention_fwd # noqa: F401
22+
from .mla_cache_kernel import mla_write_cache_triton # noqa: F401
23+
from .unified_extend_attention import ( # noqa: F401
2424
build_kv_indices_from_block_tables,
2525
build_unified_kv_indices,
2626
extend_attention_fwd_unified,

fastdeploy/worker/gpu_model_runner.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1386,7 +1386,9 @@ def _compute_position_ids_and_slot_mapping(self) -> None:
13861386
Results are stored in self.forward_meta.
13871387
"""
13881388
# NOTE(zhushengguang): Only support MLAAttentionBackend and DSAAttentionBackend currently.
1389-
if not isinstance(self.attn_backends[0], (MLAAttentionBackend, DSAAttentionBackend, TritonMLAAttentionBackend)):
1389+
if not isinstance(
1390+
self.attn_backends[0], (MLAAttentionBackend, DSAAttentionBackend, TritonMLAAttentionBackend)
1391+
):
13901392
return
13911393
current_total_tokens = self.forward_meta.ids_remove_padding.shape[0]
13921394
position_ids = self.share_inputs["position_ids_buffer"][:current_total_tokens]

scripts/.coveragerc

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ omit =
3131
*/fastdeploy/benchmarks/lib/endpoint_request_func.py
3232
*/fastdeploy/model_executor/graph_optimization/utils.py
3333
*/fastdeploy/model_executor/layers/sample/ops/top_k_top_p_triton.py
34+
*/fastdeploy/model_executor/layers/attention/triton_ops/*
35+
*/fastdeploy/model_executor/layers/attention/triton_mla_attention_backend.py
3436
*/fastdeploy/model_executor/ops/gpu/fastdeploy_ops.py
3537
*/fastdeploy/model_executor/ops/gpu/fastdeploy_ops/__init__.py
3638
*/fastdeploy/model_executor/ops/gpu/deep_gemm/utils.py

0 commit comments

Comments
 (0)