File tree Expand file tree Collapse file tree
fastdeploy/model_executor/models Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -362,18 +362,6 @@ def forward(
362362 fused_read_cache_and_interleave ,
363363 )
364364
365- need_do_prefill = forward_meta .max_len_tensor_cpu [1 ] > 0
366- need_do_decode = forward_meta .max_len_tensor_cpu [2 ] > 0
367-
368- # Idle pass (e.g. CUDAGraph padding): skip all attention computation
369- if not need_do_prefill and not need_do_decode :
370- return self .o_proj (
371- paddle .zeros (
372- [hidden_states .shape [0 ], self .num_attention_heads_tp * self .v_head_dim ],
373- dtype = hidden_states .dtype ,
374- )
375- )
376-
377365 attn_out = None
378366 if self .use_gated_attn :
379367 gate_out = self .gate (hidden_states )
@@ -1070,6 +1058,12 @@ def forward(
10701058 residual : paddle .Tensor ,
10711059 ):
10721060 """ """
1061+ need_do_prefill = forward_meta .max_len_tensor_cpu [1 ] > 0
1062+ need_do_decode = forward_meta .max_len_tensor_cpu [2 ] > 0
1063+
1064+ if not need_do_prefill and not need_do_decode :
1065+ return hidden_states
1066+
10731067 if hidden_states .shape [0 ] > 0 :
10741068 hidden_states , residual = self .input_layernorm (
10751069 hidden_states , residual_input = residual , forward_meta = forward_meta
You can’t perform that action at this time.
0 commit comments