Skip to content

Commit 15a153a

Browse files
committed
update forward
1 parent 3419f6d commit 15a153a

1 file changed

Lines changed: 12 additions & 1 deletion

File tree

fastdeploy/model_executor/models/deepseek_v3.py

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,18 @@ def forward(
362362
fused_read_cache_and_interleave,
363363
)
364364

365+
need_do_prefill = forward_meta.max_len_tensor_cpu[1] > 0
366+
need_do_decode = forward_meta.max_len_tensor_cpu[2] > 0
367+
368+
# Idle pass (e.g. CUDAGraph padding): skip all attention computation
369+
if not need_do_prefill and not need_do_decode:
370+
return self.o_proj(
371+
paddle.zeros(
372+
[hidden_states.shape[0], self.num_attention_heads_tp * self.v_head_dim],
373+
dtype=hidden_states.dtype,
374+
)
375+
)
376+
365377
attn_out = None
366378
if self.use_gated_attn:
367379
gate_out = self.gate(hidden_states)
@@ -489,7 +501,6 @@ def forward(
489501
attn_out = attn_out * ((F.softsign(gate_out) + 1.0) / 2.0)
490502
else:
491503
raise NotImplementedError(f"{gated_attn_act} not implemented")
492-
493504
output = self.o_proj(attn_out)
494505
return output
495506

0 commit comments

Comments
 (0)