Skip to content

Commit 02cd4bf

Browse files
committed
Address open-rabbit review comments
Signed-off-by: Simeng Liu <simengl@nvidia.com>
1 parent 31ab2aa commit 02cd4bf

4 files changed

Lines changed: 44 additions & 3 deletions

File tree

cpp/tensorrt_llm/batch_manager/capacityScheduler.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -351,7 +351,8 @@ std::tuple<RequestVector, RequestVector> GuaranteedNoEvictScheduler::impl(
351351
crossSummary = crossKvCacheManager->analyzePrefixReuse(uniqueTokens, *req);
352352
}
353353
}
354-
else if (isEncoderInit && crossKvCacheManager && crossKvCacheManager->isEnableBlockReuse()
354+
else if (mEnablePrefixAwareScheduling && isEncoderInit && crossKvCacheManager
355+
&& crossKvCacheManager->isEnableBlockReuse()
355356
&& !crossKvCacheManager->getBlockManager().isVariableWindow())
356357
{
357358
// Encoder admission only needs the cross summary for reuse ordering.

cpp/tensorrt_llm/executor/schedulerConfig.cpp

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,29 @@
2020
namespace tensorrt_llm::executor
2121
{
2222

23+
namespace
24+
{
25+
26+
bool dynamicBatchConfigsEqual(
27+
std::optional<DynamicBatchConfig> const& lhs, std::optional<DynamicBatchConfig> const& rhs)
28+
{
29+
if (lhs.has_value() != rhs.has_value())
30+
{
31+
return false;
32+
}
33+
if (!lhs.has_value())
34+
{
35+
return true;
36+
}
37+
38+
return lhs->getEnableBatchSizeTuning() == rhs->getEnableBatchSizeTuning()
39+
&& lhs->getEnableMaxNumTokensTuning() == rhs->getEnableMaxNumTokensTuning()
40+
&& lhs->getDynamicBatchMovingAverageWindow() == rhs->getDynamicBatchMovingAverageWindow()
41+
&& lhs->getBatchSizeTable() == rhs->getBatchSizeTable();
42+
}
43+
44+
} // namespace
45+
2346
SchedulerConfig::SchedulerConfig(CapacitySchedulerPolicy capacitySchedulerPolicy,
2447
std::optional<ContextChunkingPolicy> contextChunkingPolicy, std::optional<DynamicBatchConfig> dynamicBatchConfig,
2548
bool enablePrefixAwareScheduling)
@@ -34,6 +57,7 @@ bool SchedulerConfig::operator==(SchedulerConfig const& other) const
3457
{
3558
return mCapacitySchedulerPolicy == other.mCapacitySchedulerPolicy
3659
&& mContextChunkingPolicy == other.mContextChunkingPolicy
60+
&& dynamicBatchConfigsEqual(mDynamicBatchConfig, other.mDynamicBatchConfig)
3761
&& mEnablePrefixAwareScheduling == other.mEnablePrefixAwareScheduling;
3862
}
3963

cpp/tests/unit_tests/batch_manager/microBatchSchedulerTest.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1416,6 +1416,9 @@ TEST_F(CombinedSchedulerTest, PrefixAwareSchedulingDisabledKeepsReusableTokensZe
14161416
EXPECT_TRUE(disaggInit1.empty());
14171417
EXPECT_TRUE(paused1.empty());
14181418
EXPECT_EQ(req1->getEstimatedReusableTokens(), 0);
1419+
auto const req1Scheduled
1420+
= std::any_of(scheduled1.begin(), scheduled1.end(), [](auto const& req) { return req->mRequestId == 1; });
1421+
ASSERT_TRUE(req1Scheduled) << "Capacity scheduler must admit req1 before micro batch budget filtering";
14191422

14201423
RequestVector microBatchActive;
14211424
for (auto& req : scheduled1)

cpp/tests/unit_tests/executor/serializeUtilsTest.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -505,13 +505,26 @@ TEST(SerializeUtilsTest, SchedulerConfig)
505505
texec::SchedulerConfig defaultSchedulerConfig;
506506
EXPECT_TRUE(defaultSchedulerConfig.getEnablePrefixAwareScheduling());
507507

508+
texec::DynamicBatchConfig dynamicBatchConfig{/*enableBatchSizeTuning=*/true,
509+
/*enableMaxNumTokensTuning=*/true, /*dynamicBatchMovingAverageWindow=*/64};
508510
texec::SchedulerConfig schedulerConfig(texec::CapacitySchedulerPolicy::kMAX_UTILIZATION,
509-
texec::ContextChunkingPolicy::kFIRST_COME_FIRST_SERVED, std::nullopt, false);
511+
texec::ContextChunkingPolicy::kFIRST_COME_FIRST_SERVED, dynamicBatchConfig, false);
510512
auto schedulerConfig2 = serializeDeserialize(schedulerConfig);
511513
EXPECT_EQ(schedulerConfig.getCapacitySchedulerPolicy(), schedulerConfig2.getCapacitySchedulerPolicy());
512514
EXPECT_EQ(schedulerConfig.getContextChunkingPolicy(), schedulerConfig2.getContextChunkingPolicy());
513-
EXPECT_FALSE(schedulerConfig2.getDynamicBatchConfig().has_value());
515+
EXPECT_EQ(schedulerConfig, schedulerConfig2);
516+
ASSERT_TRUE(schedulerConfig2.getDynamicBatchConfig().has_value());
517+
EXPECT_TRUE(schedulerConfig2.getDynamicBatchConfig()->getEnableBatchSizeTuning());
518+
EXPECT_TRUE(schedulerConfig2.getDynamicBatchConfig()->getEnableMaxNumTokensTuning());
519+
EXPECT_EQ(schedulerConfig2.getDynamicBatchConfig()->getDynamicBatchMovingAverageWindow(), 64);
514520
EXPECT_FALSE(schedulerConfig2.getEnablePrefixAwareScheduling());
521+
522+
texec::SchedulerConfig differentDynamicBatchConfig(texec::CapacitySchedulerPolicy::kMAX_UTILIZATION,
523+
texec::ContextChunkingPolicy::kFIRST_COME_FIRST_SERVED,
524+
texec::DynamicBatchConfig{/*enableBatchSizeTuning=*/false, /*enableMaxNumTokensTuning=*/true,
525+
/*dynamicBatchMovingAverageWindow=*/64},
526+
false);
527+
EXPECT_FALSE(schedulerConfig == differentDynamicBatchConfig);
515528
}
516529

517530
TEST(SerializeUtilsTest, ParallelConfig)

0 commit comments

Comments
 (0)