Skip to content

Commit bfbf0b5

Browse files
committed
update forward
1 parent 15a153a commit bfbf0b5

1 file changed

Lines changed: 6 additions & 12 deletions

File tree

fastdeploy/model_executor/models/deepseek_v3.py

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -362,18 +362,6 @@ def forward(
362362
fused_read_cache_and_interleave,
363363
)
364364

365-
need_do_prefill = forward_meta.max_len_tensor_cpu[1] > 0
366-
need_do_decode = forward_meta.max_len_tensor_cpu[2] > 0
367-
368-
# Idle pass (e.g. CUDAGraph padding): skip all attention computation
369-
if not need_do_prefill and not need_do_decode:
370-
return self.o_proj(
371-
paddle.zeros(
372-
[hidden_states.shape[0], self.num_attention_heads_tp * self.v_head_dim],
373-
dtype=hidden_states.dtype,
374-
)
375-
)
376-
377365
attn_out = None
378366
if self.use_gated_attn:
379367
gate_out = self.gate(hidden_states)
@@ -1070,6 +1058,12 @@ def forward(
10701058
residual: paddle.Tensor,
10711059
):
10721060
""" """
1061+
need_do_prefill = forward_meta.max_len_tensor_cpu[1] > 0
1062+
need_do_decode = forward_meta.max_len_tensor_cpu[2] > 0
1063+
1064+
if not need_do_prefill and not need_do_decode:
1065+
return hidden_states
1066+
10731067
if hidden_states.shape[0] > 0:
10741068
hidden_states, residual = self.input_layernorm(
10751069
hidden_states, residual_input=residual, forward_meta=forward_meta

0 commit comments

Comments
 (0)