@@ -314,38 +314,39 @@ std::tuple<RequestVector, RequestVector> GuaranteedNoEvictScheduler::impl(
314314 bool const isEncoderInit = req->isEncoderInitState ();
315315 std::optional<kv_cache_manager::PrefixReuseSummary> summary;
316316 std::optional<kv_cache_manager::PrefixReuseSummary> crossSummary;
317- if (isFirstChunkContext )
317+ if (mEnablePrefixAwareScheduling )
318318 {
319- // analyzePrefixReuse asserts on variable-window managers; skip the walk there
320- // and let downstream callers fall back to their fresh tree-walk path.
321- if (!mEnablePrefixAwareScheduling )
319+ if (isFirstChunkContext)
322320 {
323- summary = kv_cache_manager::PrefixReuseSummary{};
324- if (crossKvCacheManager)
321+ // analyzePrefixReuse asserts on variable-window managers; skip the walk there
322+ // and let downstream callers fall back to their fresh tree-walk path.
323+ if (kvCacheManager.isEnableBlockReuse () && !kvCacheManager.getBlockManager ().isVariableWindow ())
325324 {
326- crossSummary = kv_cache_manager::PrefixReuseSummary{};
325+ auto uniqueTokens = req->getUniqueTokens (0 );
326+ summary = kvCacheManager.analyzePrefixReuse (uniqueTokens, *req);
327+ }
328+ if (crossKvCacheManager && crossKvCacheManager->isEnableBlockReuse ()
329+ && !crossKvCacheManager->getBlockManager ().isVariableWindow ())
330+ {
331+ auto uniqueTokens = *(req->getEncoderUniqueTokens ().value ());
332+ crossSummary = crossKvCacheManager->analyzePrefixReuse (uniqueTokens, *req);
327333 }
328334 }
329- else if (kvCacheManager.isEnableBlockReuse ()
330- && !kvCacheManager.getBlockManager ().isVariableWindow ())
331- {
332- auto uniqueTokens = req->getUniqueTokens (0 );
333- summary = kvCacheManager.analyzePrefixReuse (uniqueTokens, *req);
334- }
335- if (mEnablePrefixAwareScheduling && crossKvCacheManager && crossKvCacheManager->isEnableBlockReuse ()
335+ else if (isEncoderInit && crossKvCacheManager && crossKvCacheManager->isEnableBlockReuse ()
336336 && !crossKvCacheManager->getBlockManager ().isVariableWindow ())
337337 {
338+ // Encoder admission only needs the cross summary for reuse ordering.
338339 auto uniqueTokens = *(req->getEncoderUniqueTokens ().value ());
339340 crossSummary = crossKvCacheManager->analyzePrefixReuse (uniqueTokens, *req);
340341 }
341342 }
342- else if (mEnablePrefixAwareScheduling && isEncoderInit && crossKvCacheManager
343- && crossKvCacheManager->isEnableBlockReuse ()
344- && !crossKvCacheManager->getBlockManager ().isVariableWindow ())
343+ else if (isFirstChunkContext)
345344 {
346- // Encoder admission only needs the cross summary for reuse ordering.
347- auto uniqueTokens = *(req->getEncoderUniqueTokens ().value ());
348- crossSummary = crossKvCacheManager->analyzePrefixReuse (uniqueTokens, *req);
345+ summary = kv_cache_manager::PrefixReuseSummary{};
346+ if (crossKvCacheManager)
347+ {
348+ crossSummary = kv_cache_manager::PrefixReuseSummary{};
349+ }
349350 }
350351 // Beneficial-to-skip check using the cached summary
351352 if (!StaticBatchScheduling && skippingIsRelevant && (isFirstChunkContext || isEncoderInit)
0 commit comments