Address comments from Robin and Tyler.

SimengLiu-nv · SimengLiu-nv · commit bcd57ce850d1 · 2026-06-29T13:08:20.000-07:00
Signed-off-by: Simeng Liu &lt;simengl@nvidia.com&gt;
diff --git a/cpp/include/tensorrt_llm/executor/executor.h b/cpp/include/tensorrt_llm/executor/executor.h
@@ -989,6 +989,8 @@ class DynamicBatchConfig
 
     [[nodiscard]] std::vector<std::pair<SizeType32, SizeType32>> getBatchSizeTable() const;
 
+    bool operator==(DynamicBatchConfig const& other) const;
+
     /// @brief The default value of batch size table
     static std::vector<std::pair<SizeType32, SizeType32>> const kDefaultBatchSizeTable;
 
diff --git a/cpp/tensorrt_llm/batch_manager/capacityScheduler.cpp b/cpp/tensorrt_llm/batch_manager/capacityScheduler.cpp
@@ -326,38 +326,39 @@ std::tuple<RequestVector, RequestVector> GuaranteedNoEvictScheduler::impl(
                 bool const isEncoderInit = req->isEncoderInitState();
                 std::optional<kv_cache_manager::PrefixReuseSummary> summary;
                 std::optional<kv_cache_manager::PrefixReuseSummary> crossSummary;
-                if (isFirstChunkContext)
+                if (mEnablePrefixAwareScheduling)
                 {
-                    // analyzePrefixReuse asserts on variable-window managers; skip the walk there
-                    // and let downstream callers fall back to their fresh tree-walk path.
-                    if (!mEnablePrefixAwareScheduling)
+                    if (isFirstChunkContext)
                     {
-                        summary = kv_cache_manager::PrefixReuseSummary{};
-                        if (crossKvCacheManager)
+                        // analyzePrefixReuse asserts on variable-window managers; skip the walk there
+                        // and let downstream callers fall back to their fresh tree-walk path.
+                        if (kvCacheManager.isEnableBlockReuse() && !kvCacheManager.getBlockManager().isVariableWindow())
                         {
-                            crossSummary = kv_cache_manager::PrefixReuseSummary{};
+                            auto uniqueTokens = req->getUniqueTokens(0);
+                            summary = kvCacheManager.analyzePrefixReuse(uniqueTokens, *req);
+                        }
+                        if (crossKvCacheManager && crossKvCacheManager->isEnableBlockReuse()
+                            && !crossKvCacheManager->getBlockManager().isVariableWindow())
+                        {
+                            auto uniqueTokens = *(req->getEncoderUniqueTokens().value());
+                            crossSummary = crossKvCacheManager->analyzePrefixReuse(uniqueTokens, *req);
                         }
                     }
-                    else if (kvCacheManager.isEnableBlockReuse()
-                        && !kvCacheManager.getBlockManager().isVariableWindow())
-                    {
-                        auto uniqueTokens = req->getUniqueTokens(0);
-                        summary = kvCacheManager.analyzePrefixReuse(uniqueTokens, *req);
-                    }
-                    if (mEnablePrefixAwareScheduling && crossKvCacheManager && crossKvCacheManager->isEnableBlockReuse()
+                    else if (isEncoderInit && crossKvCacheManager && crossKvCacheManager->isEnableBlockReuse()
                         && !crossKvCacheManager->getBlockManager().isVariableWindow())
                     {
+                        // Encoder admission only needs the cross summary for reuse ordering.
                         auto uniqueTokens = *(req->getEncoderUniqueTokens().value());
                         crossSummary = crossKvCacheManager->analyzePrefixReuse(uniqueTokens, *req);
                     }
                 }
-                else if (mEnablePrefixAwareScheduling && isEncoderInit && crossKvCacheManager
-                    && crossKvCacheManager->isEnableBlockReuse()
-                    && !crossKvCacheManager->getBlockManager().isVariableWindow())
+                else if (isFirstChunkContext)
                 {
-                    // Encoder admission only needs the cross summary for reuse ordering.
-                    auto uniqueTokens = *(req->getEncoderUniqueTokens().value());
-                    crossSummary = crossKvCacheManager->analyzePrefixReuse(uniqueTokens, *req);
+                    summary = kv_cache_manager::PrefixReuseSummary{};
+                    if (crossKvCacheManager)
+                    {
+                        crossSummary = kv_cache_manager::PrefixReuseSummary{};
+                    }
                 }
                 // Beneficial-to-skip check using the cached summary
                 if (!StaticBatchScheduling && skippingIsRelevant && (isFirstChunkContext || isEncoderInit)
diff --git a/cpp/tensorrt_llm/executor/dynamicBatchConfig.cpp b/cpp/tensorrt_llm/executor/dynamicBatchConfig.cpp
@@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
  * SPDX-License-Identifier: Apache-2.0
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
@@ -49,6 +49,14 @@ std::vector<std::pair<SizeType32, SizeType32>> DynamicBatchConfig::getBatchSizeT
     return mBatchSizeTable;
 }
 
+bool DynamicBatchConfig::operator==(DynamicBatchConfig const& other) const
+{
+    return mEnableBatchSizeTuning == other.mEnableBatchSizeTuning
+        && mEnableMaxNumTokensTuning == other.mEnableMaxNumTokensTuning
+        && mDynamicBatchMovingAverageWindow == other.mDynamicBatchMovingAverageWindow
+        && mBatchSizeTable == other.mBatchSizeTable;
+}
+
 std::vector<std::pair<SizeType32, SizeType32>> const DynamicBatchConfig::kDefaultBatchSizeTable{
     {144, 128},
     {336, 256},
diff --git a/cpp/tensorrt_llm/executor/schedulerConfig.cpp b/cpp/tensorrt_llm/executor/schedulerConfig.cpp
@@ -20,29 +20,6 @@
 namespace tensorrt_llm::executor
 {
 
-namespace
-{
-
-bool dynamicBatchConfigsEqual(
-    std::optional<DynamicBatchConfig> const& lhs, std::optional<DynamicBatchConfig> const& rhs)
-{
-    if (lhs.has_value() != rhs.has_value())
-    {
-        return false;
-    }
-    if (!lhs.has_value())
-    {
-        return true;
-    }
-
-    return lhs->getEnableBatchSizeTuning() == rhs->getEnableBatchSizeTuning()
-        && lhs->getEnableMaxNumTokensTuning() == rhs->getEnableMaxNumTokensTuning()
-        && lhs->getDynamicBatchMovingAverageWindow() == rhs->getDynamicBatchMovingAverageWindow()
-        && lhs->getBatchSizeTable() == rhs->getBatchSizeTable();
-}
-
-} // namespace
-
 SchedulerConfig::SchedulerConfig(CapacitySchedulerPolicy capacitySchedulerPolicy,
     std::optional<ContextChunkingPolicy> contextChunkingPolicy, std::optional<DynamicBatchConfig> dynamicBatchConfig,
     bool enablePrefixAwareScheduling)
@@ -56,8 +33,7 @@ SchedulerConfig::SchedulerConfig(CapacitySchedulerPolicy capacitySchedulerPolicy
 bool SchedulerConfig::operator==(SchedulerConfig const& other) const
 {
     return mCapacitySchedulerPolicy == other.mCapacitySchedulerPolicy
-        && mContextChunkingPolicy == other.mContextChunkingPolicy
-        && dynamicBatchConfigsEqual(mDynamicBatchConfig, other.mDynamicBatchConfig)
+        && mContextChunkingPolicy == other.mContextChunkingPolicy && mDynamicBatchConfig == other.mDynamicBatchConfig
         && mEnablePrefixAwareScheduling == other.mEnablePrefixAwareScheduling;
 }
 
diff --git a/cpp/tensorrt_llm/nanobind/executor/executorConfig.cpp b/cpp/tensorrt_llm/nanobind/executor/executorConfig.cpp
@@ -76,14 +76,13 @@ void initConfigBindings(nb::module_& m)
 
     auto schedulerConfigSetstate = [](tle::SchedulerConfig& self, nb::tuple const& state)
     {
-        if (state.size() != 3 && state.size() != 4)
+        if (state.size() != 4)
         {
             throw std::runtime_error("Invalid state!");
         }
-        bool const enablePrefixAwareScheduling = state.size() == 4 ? nb::cast<bool>(state[3]) : true;
         new (&self) tle::SchedulerConfig(nb::cast<tle::CapacitySchedulerPolicy>(state[0]),
             nb::cast<std::optional<tle::ContextChunkingPolicy>>(state[1]),
-            nb::cast<std::optional<tle::DynamicBatchConfig>>(state[2]), enablePrefixAwareScheduling);
+            nb::cast<std::optional<tle::DynamicBatchConfig>>(state[2]), nb::cast<bool>(state[3]));
     };
     auto schedulerConfigGetstate = [](tle::SchedulerConfig const& self)
     {
diff --git a/tests/unittest/bindings/test_executor_bindings.py b/tests/unittest/bindings/test_executor_bindings.py
@@ -1372,7 +1372,7 @@ def test_dynamic_batch_config_pickle():
     assert config_copy.dynamic_batch_moving_average_window == 128
 
 
-def test_scheduler_config() -> None:
+def test_scheduler_config():
     capacity_scheduler_policy = trtllm.CapacitySchedulerPolicy.GUARANTEED_NO_EVICT
     config = trtllm.SchedulerConfig()
     assert config.capacity_scheduler_policy == capacity_scheduler_policy

Original file line number	Diff line number	Diff line change
`@@ -76,14 +76,13 @@ void initConfigBindings(nb::module_& m)`
`76`	`76`
`77`	`77`	`auto schedulerConfigSetstate = [](tle::SchedulerConfig& self, nb::tuple const& state)`
`78`	`78`	`{`
`79`		`- if (state.size() != 3 && state.size() != 4)`
	`79`	`+ if (state.size() != 4)`
`80`	`80`	`{`
`81`	`81`	`throw std::runtime_error("Invalid state!");`
`82`	`82`	`}`
`83`		`- bool const enablePrefixAwareScheduling = state.size() == 4 ? nb::cast<bool>(state[3]) : true;`
`84`	`83`	`new (&self) tle::SchedulerConfig(nb::cast<tle::CapacitySchedulerPolicy>(state[0]),`
`85`	`84`	`nb::cast<std::optional<tle::ContextChunkingPolicy>>(state[1]),`
`86`		`- nb::cast<std::optional<tle::DynamicBatchConfig>>(state[2]), enablePrefixAwareScheduling);`
	`85`	`+ nb::cast<std::optional<tle::DynamicBatchConfig>>(state[2]), nb::cast<bool>(state[3]));`
`87`	`86`	`};`
`88`	`87`	`auto schedulerConfigGetstate = [](tle::SchedulerConfig const& self)`
`89`	`88`	`{`