fix: only unpad routed output before shared expert add (#1646)

AlpinDale · web-flow · commit 7e582b789512 · 2026-04-27T12:17:21.000+04:30
Signed-off-by: AlpinDale &lt;alpindale@gmail.com&gt;
diff --git a/aphrodite/model_executor/layers/fused_moe/runner/moe_runner.py b/aphrodite/model_executor/layers/fused_moe/runner/moe_runner.py
@@ -525,6 +525,8 @@ def forward(
 
         # Record before `_maybe_pad_hidden_states` pads activations to match
         # `moe_config.hidden_dim`, e.g. after `align_trtllm_fp4_moe_hidden_dim_for_fi`
+        # so routed output can be trimmed before
+        # shared+routed add / latent up proj if needed.
         routed_hidden_dim = hidden_states.shape[-1]
         hidden_states, og_hidden_dim = self._maybe_pad_hidden_states(
             shared_experts_input,
@@ -550,7 +552,7 @@ def forward(
 
         # Extract outputs from result
         shared_output, fused_output = _unpack(result)
-        if hidden_dim_was_padded:
+        if (shared_output is not None or self.routed_output_transform is not None) and hidden_dim_was_padded:
             fused_output = fused_output[..., :routed_hidden_dim]
 
         # If combine kernel already reduced fused, reduce shared to match.