Skip to content

Commit fba8a51

Browse files
mouxinqqmouxin
andauthored
[Feature] Fix mixed cache-aware (#7129)
* [Feature] Config eviction_duration * [Feature] Config eviction_duration * [Feature] Config eviction_duration * [Feature] Config eviction_duration * [Feature] Fix mixed cache-aware --------- Co-authored-by: mouxin <mouxin@baidu.com>
1 parent 3b56411 commit fba8a51

1 file changed

Lines changed: 3 additions & 1 deletion

File tree

fastdeploy/golang_router/internal/gateway/completions.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,7 +412,8 @@ func CommonCompletions(c *gin.Context, extractor PromptExtractor, completionEndp
412412
} else {
413413
logger.Info(ctx, "Parsing completed; starting worker selection.")
414414
// Non-PD mode: use Mixed instance
415-
dest, err := manager.SelectWorker(ctx, "")
415+
message = extractor(rawReq)
416+
dest, err := manager.SelectWorker(ctx, message)
416417
if err != nil {
417418
logger.Error(ctx, "Failed to select worker: %v", err)
418419
c.Writer.WriteHeader(http.StatusBadGateway)
@@ -427,6 +428,7 @@ func CommonCompletions(c *gin.Context, extractor PromptExtractor, completionEndp
427428
defer func() {
428429
for _, url := range releaseTargets {
429430
scheduler_handler.Release(ctx, url)
431+
scheduler_handler.ReleasePrefillTokens(ctx, url, message)
430432
}
431433
}()
432434
}

0 commit comments

Comments
 (0)