@@ -87,7 +87,11 @@ class MaxRequestsScheduler : public BaseCapacityScheduler
8787
8888// / @brief Schedule requests using the MAX_UTILIZATION policy
8989// / @details Try reserving resources to advance requests by one step,
90- // / may pause previously started requests.
90+ // / may pause previously started requests. When a
91+ // / ``crossKvCacheManager`` is supplied, requests in the
92+ // / ``ENCODER_INIT`` state may be admitted for encoder compute
93+ // / without consuming self- or cross-KV blocks; the later
94+ // / ``CONTEXT_INIT`` decoder admission owns cross-pool budgeting.
9195class MaxUtilizationScheduler : public BaseCapacityScheduler
9296{
9397public:
@@ -96,8 +100,9 @@ class MaxUtilizationScheduler : public BaseCapacityScheduler
96100 LlmRequestState noScheduleAfterState = LlmRequestState::kGENERATION_COMPLETE );
97101
98102 [[nodiscard]] std::tuple<RequestVector, RequestVector> operator ()(
99- kv_cache_manager::BaseKVCacheManager& kvCacheManager, OptionalRef<BasePeftCacheManager const > peftCacheManager,
100- RequestList const & activeRequests) const ;
103+ kv_cache_manager::BaseKVCacheManager& kvCacheManager,
104+ OptionalRef<kv_cache_manager::BaseKVCacheManager> crossKvCacheManager,
105+ OptionalRef<BasePeftCacheManager const > peftCacheManager, RequestList const & activeRequests) const ;
101106
102107private:
103108 SizeType32 mMaxNumRequests ;
@@ -106,6 +111,10 @@ class MaxUtilizationScheduler : public BaseCapacityScheduler
106111};
107112
108113// / @brief Schedule requests using the GUARANTEED_NO_EVICT policy
114+ // / @details When a ``crossKvCacheManager`` is supplied, requests in the
115+ // / ``ENCODER_INIT`` state may be admitted for encoder compute
116+ // / without consuming self- or cross-KV blocks. The later
117+ // / ``CONTEXT_INIT`` decoder admission owns cross-pool budgeting.
109118class GuaranteedNoEvictScheduler : public BaseCapacityScheduler
110119{
111120public:
@@ -158,7 +167,11 @@ class CapacityScheduler : public Algorithm
158167 *
159168 * @param kvCacheManager Required in MaxUtilizationScheduler (as a ref) and in GuaranteedNoEvictScheduler and
160169 * StaticBatchScheduler (as a const ref).
161- * @param crossKvCacheManager Optional used in GuaranteedNoEvictScheduler and StaticBatchScheduler.
170+ * @param crossKvCacheManager Optional cross-attention KV cache manager. Used by
171+ * MaxUtilizationScheduler (mutates: ``startScheduling`` / ``schedulingRemoveSequence``)
172+ * and GuaranteedNoEvictScheduler / StaticBatchScheduler (read-only). Required for
173+ * encoder-decoder admission. Encoder-init requests only require this pool
174+ * to be configured; decoder context admission budgets blocks from it.
162175 * @param peftCacheManager Optional used in MaxUtilizationScheduler, GuaranteedNoEvictScheduler and
163176 * StaticBatchScheduler.
164177 * @param activeRequests
@@ -168,7 +181,7 @@ class CapacityScheduler : public Algorithm
168181 [[nodiscard]] std::tuple<RequestVector, RequestVector, RequestVector> operator ()(RequestList const & activeRequests,
169182 OptionalRef<kv_cache_manager::BaseKVCacheManager> kvCacheManager = std::nullopt ,
170183 OptionalRef<BasePeftCacheManager const > peftCacheManager = std::nullopt ,
171- OptionalRef<kv_cache_manager::BaseKVCacheManager const > crossKvCacheManager = std::nullopt ) const ;
184+ OptionalRef<kv_cache_manager::BaseKVCacheManager> crossKvCacheManager = std::nullopt ) const ;
172185
173186 // / @brief Sets the reorder policy to use AgentTreePolicy with the given configuration.
174187 // / @param agentPercentage The ratio of agent requests to schedule (0.0-1.0, -1.0 for random).
0 commit comments