@@ -290,6 +290,109 @@ INSTANTIATE_TEST_SUITE_P(PlacementTests, MoePlacementTest,
290290 return name;
291291 });
292292
293+ TEST (MoeLoadBalancerMaskOnlyTest, DynamicPlacementHonorsDeadRankMask)
294+ {
295+ constexpr int kExpertCount = 4 ;
296+ constexpr int kTopK = 2 ;
297+ constexpr int kEpRank = 0 ;
298+ constexpr int kEpSize = 4 ;
299+ constexpr int kSlotCountPerRank = 2 ;
300+ constexpr int kDeadRank = 2 ;
301+ constexpr int kActiveSlotCount = (kEpSize - 1 ) * kSlotCountPerRank ;
302+
303+ tensorrt_llm::kernels::MoeLoadBalanceMetaInfo metaInfo{kExpertCount , kTopK , kEpRank , kEpSize , kSlotCountPerRank };
304+ std::vector<float > expertLoadFactor{8 .0F , 4 .0F , 2 .0F , 1 .0F };
305+ std::vector<uint8_t > deadRankMask{0 , 0 , 1 , 0 };
306+
307+ MoePlacementCpuInfo cpuPlacement;
308+ doReplication (metaInfo, expertLoadFactor.data (), &cpuPlacement, &deadRankMask);
309+
310+ int replicaSum = 0 ;
311+ for (int replicaCount : cpuPlacement.expertReplicaCount )
312+ {
313+ replicaSum += replicaCount;
314+ }
315+ EXPECT_EQ (replicaSum, kActiveSlotCount );
316+
317+ cpuPlacement.rankExpertIds .resize (kEpSize );
318+ for (int rank = 0 ; rank < kEpSize ; ++rank)
319+ {
320+ cpuPlacement.rankExpertIds [rank].resize (kSlotCountPerRank , 99 );
321+ }
322+
323+ doPlacement (metaInfo, expertLoadFactor.data (), &cpuPlacement, &deadRankMask);
324+
325+ std::vector<int > placedReplicas (kExpertCount , 0 );
326+ int assignedSlotCount = 0 ;
327+ for (int rank = 0 ; rank < kEpSize ; ++rank)
328+ {
329+ for (int slot = 0 ; slot < kSlotCountPerRank ; ++slot)
330+ {
331+ int const expertId = cpuPlacement.rankExpertIds [rank][slot];
332+ if (rank == kDeadRank )
333+ {
334+ EXPECT_EQ (expertId, -1 );
335+ continue ;
336+ }
337+
338+ EXPECT_GE (expertId, 0 );
339+ EXPECT_LT (expertId, kExpertCount );
340+ if (expertId >= 0 && expertId < kExpertCount )
341+ {
342+ ++placedReplicas[expertId];
343+ ++assignedSlotCount;
344+ }
345+ }
346+ }
347+
348+ EXPECT_EQ (assignedSlotCount, kActiveSlotCount );
349+ for (int expertId = 0 ; expertId < kExpertCount ; ++expertId)
350+ {
351+ EXPECT_EQ (placedReplicas[expertId], cpuPlacement.expertReplicaCount [expertId]);
352+ }
353+ }
354+
355+ TEST (MoeLoadBalancerMaskOnlyTest, DynamicPlacementRejectsMismatchedDeadRankMask)
356+ {
357+ constexpr int kExpertCount = 4 ;
358+ constexpr int kTopK = 2 ;
359+ constexpr int kEpRank = 0 ;
360+ constexpr int kEpSize = 4 ;
361+ constexpr int kSlotCountPerRank = 2 ;
362+
363+ tensorrt_llm::kernels::MoeLoadBalanceMetaInfo metaInfo{kExpertCount , kTopK , kEpRank , kEpSize , kSlotCountPerRank };
364+ std::vector<float > expertLoadFactor{1 .0F , 1 .0F , 1 .0F , 1 .0F };
365+ std::vector<uint8_t > deadRankMask{0 , 1 };
366+
367+ MoePlacementCpuInfo cpuPlacement;
368+ EXPECT_THROW (doReplication (metaInfo, expertLoadFactor.data (), &cpuPlacement, &deadRankMask),
369+ tensorrt_llm::common::TllmException);
370+ }
371+
372+ TEST (MoeLoadBalancerMaskOnlyTest, DynamicPlacementRejectsReplicaCountMismatch)
373+ {
374+ constexpr int kExpertCount = 4 ;
375+ constexpr int kTopK = 2 ;
376+ constexpr int kEpRank = 0 ;
377+ constexpr int kEpSize = 4 ;
378+ constexpr int kSlotCountPerRank = 2 ;
379+
380+ tensorrt_llm::kernels::MoeLoadBalanceMetaInfo metaInfo{kExpertCount , kTopK , kEpRank , kEpSize , kSlotCountPerRank };
381+ std::vector<float > expertLoadFactor{1 .0F , 1 .0F , 1 .0F , 1 .0F };
382+ std::vector<uint8_t > deadRankMask{0 , 0 , 1 , 0 };
383+
384+ MoePlacementCpuInfo cpuPlacement;
385+ cpuPlacement.expertReplicaCount = {1 , 1 , 1 , 1 };
386+ cpuPlacement.rankExpertIds .resize (kEpSize );
387+ for (int rank = 0 ; rank < kEpSize ; ++rank)
388+ {
389+ cpuPlacement.rankExpertIds [rank].resize (kSlotCountPerRank , -1 );
390+ }
391+
392+ EXPECT_THROW (doPlacement (metaInfo, expertLoadFactor.data (), &cpuPlacement, &deadRankMask),
393+ tensorrt_llm::common::TllmException);
394+ }
395+
293396TEST (MoeLoadBalancerMaskOnlyTest, ReconfigureMaskOnlyRemovesDeadRankSlots)
294397{
295398 setenv (" TLLM_HOST_ACCESSIBLE_ALLOW_MANAGED_FALLBACK" , " 1" , 1 );
0 commit comments