Skip to content

Commit 7e582b7

Browse files
authored
fix: only unpad routed output before shared expert add (#1646)
Signed-off-by: AlpinDale <alpindale@gmail.com>
1 parent e03dafe commit 7e582b7

1 file changed

Lines changed: 3 additions & 1 deletion

File tree

aphrodite/model_executor/layers/fused_moe/runner/moe_runner.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -525,6 +525,8 @@ def forward(
525525

526526
# Record before `_maybe_pad_hidden_states` pads activations to match
527527
# `moe_config.hidden_dim`, e.g. after `align_trtllm_fp4_moe_hidden_dim_for_fi`
528+
# so routed output can be trimmed before
529+
# shared+routed add / latent up proj if needed.
528530
routed_hidden_dim = hidden_states.shape[-1]
529531
hidden_states, og_hidden_dim = self._maybe_pad_hidden_states(
530532
shared_experts_input,
@@ -550,7 +552,7 @@ def forward(
550552

551553
# Extract outputs from result
552554
shared_output, fused_output = _unpack(result)
553-
if hidden_dim_was_padded:
555+
if (shared_output is not None or self.routed_output_transform is not None) and hidden_dim_was_padded:
554556
fused_output = fused_output[..., :routed_hidden_dim]
555557

556558
# If combine kernel already reduced fused, reduce shared to match.

0 commit comments

Comments
 (0)