@@ -326,38 +326,39 @@ std::tuple<RequestVector, RequestVector> GuaranteedNoEvictScheduler::impl(
326326 bool const isEncoderInit = req->isEncoderInitState ();
327327 std::optional<kv_cache_manager::PrefixReuseSummary> summary;
328328 std::optional<kv_cache_manager::PrefixReuseSummary> crossSummary;
329- if (isFirstChunkContext )
329+ if (mEnablePrefixAwareScheduling )
330330 {
331- // analyzePrefixReuse asserts on variable-window managers; skip the walk there
332- // and let downstream callers fall back to their fresh tree-walk path.
333- if (!mEnablePrefixAwareScheduling )
331+ if (isFirstChunkContext)
334332 {
335- summary = kv_cache_manager::PrefixReuseSummary{};
336- if (crossKvCacheManager)
333+ // analyzePrefixReuse asserts on variable-window managers; skip the walk there
334+ // and let downstream callers fall back to their fresh tree-walk path.
335+ if (kvCacheManager.isEnableBlockReuse () && !kvCacheManager.getBlockManager ().isVariableWindow ())
337336 {
338- crossSummary = kv_cache_manager::PrefixReuseSummary{};
337+ auto uniqueTokens = req->getUniqueTokens (0 );
338+ summary = kvCacheManager.analyzePrefixReuse (uniqueTokens, *req);
339+ }
340+ if (crossKvCacheManager && crossKvCacheManager->isEnableBlockReuse ()
341+ && !crossKvCacheManager->getBlockManager ().isVariableWindow ())
342+ {
343+ auto uniqueTokens = *(req->getEncoderUniqueTokens ().value ());
344+ crossSummary = crossKvCacheManager->analyzePrefixReuse (uniqueTokens, *req);
339345 }
340346 }
341- else if (kvCacheManager.isEnableBlockReuse ()
342- && !kvCacheManager.getBlockManager ().isVariableWindow ())
343- {
344- auto uniqueTokens = req->getUniqueTokens (0 );
345- summary = kvCacheManager.analyzePrefixReuse (uniqueTokens, *req);
346- }
347- if (mEnablePrefixAwareScheduling && crossKvCacheManager && crossKvCacheManager->isEnableBlockReuse ()
347+ else if (isEncoderInit && crossKvCacheManager && crossKvCacheManager->isEnableBlockReuse ()
348348 && !crossKvCacheManager->getBlockManager ().isVariableWindow ())
349349 {
350+ // Encoder admission only needs the cross summary for reuse ordering.
350351 auto uniqueTokens = *(req->getEncoderUniqueTokens ().value ());
351352 crossSummary = crossKvCacheManager->analyzePrefixReuse (uniqueTokens, *req);
352353 }
353354 }
354- else if (mEnablePrefixAwareScheduling && isEncoderInit && crossKvCacheManager
355- && crossKvCacheManager->isEnableBlockReuse ()
356- && !crossKvCacheManager->getBlockManager ().isVariableWindow ())
355+ else if (isFirstChunkContext)
357356 {
358- // Encoder admission only needs the cross summary for reuse ordering.
359- auto uniqueTokens = *(req->getEncoderUniqueTokens ().value ());
360- crossSummary = crossKvCacheManager->analyzePrefixReuse (uniqueTokens, *req);
357+ summary = kv_cache_manager::PrefixReuseSummary{};
358+ if (crossKvCacheManager)
359+ {
360+ crossSummary = kv_cache_manager::PrefixReuseSummary{};
361+ }
361362 }
362363 // Beneficial-to-skip check using the cached summary
363364 if (!StaticBatchScheduling && skippingIsRelevant && (isFirstChunkContext || isEncoderInit)
0 commit comments