[None][fix] fix AutoDeploy sharding IR dist config

bmarimuthu-nv · bmarimuthu-nv · commit 81831234ff3b · 2026-06-09T17:29:51.000-07:00
Signed-off-by: Balamurugan Marimuthu &lt;246387390+bmarimuthu-nv@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/_torch/auto_deploy/transform/library/sharding_ir.py b/tensorrt_llm/_torch/auto_deploy/transform/library/sharding_ir.py
@@ -1069,14 +1069,6 @@ def get_partition(lst, world_size, rank):
             self._localize_expert_indices(
                 gm, selected_experts, routing_weights, experts_per_rank, ep_rank, ep_size
             )
-            _, all_reduce_op = _get_dist_ops(dc.dist_backend)
-            with gm.graph.inserting_after(self.node):
-                red = gm.graph.call_function(
-                    all_reduce_op,
-                    args=(self.node, dc.allreduce_strategy),
-                )
-                self.node.replace_all_uses_with(red)
-                red.replace_input_with(red, self.node)
 
         ad_logger.debug(
             f"  sharded MoE: {num_experts} experts, ep={ep_size}, ep_rank={ep_rank}, "
diff --git a/tensorrt_llm/_torch/auto_deploy/utils/dist_config.py b/tensorrt_llm/_torch/auto_deploy/utils/dist_config.py
@@ -137,6 +137,7 @@ def from_sharding_params(
         dist_mapping: dict,
         enable_attention_dp: bool = False,
         allreduce_strategy: str = "NCCL",
+        dist_backend: Literal["auto", "torch", "trtllm"] = "auto",
     ) -> "DistConfig":
         """Build ``DistConfig`` from sharding-transform YAML inputs + runtime MPI info.
 
@@ -154,6 +155,7 @@ def from_sharding_params(
             moe_cluster_size=dist_mapping.get("moe_cluster", 1),
             enable_attention_dp=enable_attention_dp,
             allreduce_strategy=allreduce_strategy,
+            dist_backend=dist_backend,
         )
 
     def to_mapping(self) -> Any:
diff --git a/tests/unittest/auto_deploy/multigpu/transformations/library/test_apply_sharding_hints.py b/tests/unittest/auto_deploy/multigpu/transformations/library/test_apply_sharding_hints.py
@@ -480,8 +480,8 @@ def _make_list_moe_graph():
     return gm
 
 
-def test_list_moe_ir_contract_inserts_all_reduce_for_ep():
-    """List-based MoE EP sharding localizes experts and adds a graph collective."""
+def test_list_moe_ir_contract_leaves_ep_reduction_to_modeling():
+    """List-based MoE EP sharding localizes experts without choosing a reduction site."""
     gm = _make_list_moe_graph()
     gm_out = _make_optimizer(world_size=2)(None, gm)
     moe_nodes = _call_nodes(gm_out, torch.ops.auto_deploy.torch_moe)
@@ -492,7 +492,7 @@ def test_list_moe_ir_contract_inserts_all_reduce_for_ep():
     assert len(w1_weight) == 2
     assert len(w2_weight) == 2
     assert len(w3_weight) == 2
-    assert len(_call_nodes(gm_out, torch.ops.auto_deploy.torch_dist_all_reduce)) == 1
+    assert len(_call_nodes(gm_out, torch.ops.auto_deploy.torch_dist_all_reduce)) == 0
 
 
 def _optional_auto_deploy_default(name):