Skip to content

Commit a425911

Browse files
committed
feat(scheduler,apikey): bounded session affinity + per-key limits
Two related backend enhancements addressing #150 and #146. Scheduler (#150 — "round_robin still hammering one account"): - Add bounded session affinity. sessionAffinity now tracks boundAt / requestCount, and NextForSessionWithFilter escapes the binding when any of: • requestCount >= 50 • bound for >= 5 minutes • bound account is no longer in the healthy tier This preserves prompt-cache reuse on hot sessions while preventing any single account from absorbing all traffic. - Add Store.GetAffinityMode/SetAffinityMode (bounded/off/strict). Off means every request re-picks via the scheduler; strict is the legacy behavior. system_settings.affinity_mode persists the choice. - FastScheduler in round_robin mode now sorts the healthy bucket by 7d usage ASC before round-robining, so the under-used accounts win the next slot naturally. API Keys (#146 — "support per-key 5h/7d quota and model restrictions"): - Add api_keys.limits JSONB column carrying APIKeyLimits: model_allow / model_deny (whitelist takes priority) rpm, rpd cost_limit_5h, cost_limit_7d token_limit_5h, token_limit_7d - New proxy/apikey_limits.go performs the checks before upstream dispatch: O(1) model set check, plus sliding-window aggregation over usage_logs (cached 60s in Redis, falls back to DB). Limited keys bypass the runtime cache so PATCH takes effect immediately. - 403 (permission_error) for model-denied, 429 (rate_limit_reached) for quota windows. Wired into ChatCompletions, Responses, anthropic, and images handlers. Tested end-to-end against 2004 instance: model whitelist returns 403 for disallowed model, rpm=2 returns 429 after 10 injected usage rows in the last minute, settings persistence verified.
1 parent 265f67b commit a425911

13 files changed

Lines changed: 672 additions & 44 deletions

admin/bootstrap.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,5 +237,6 @@ func defaultBootstrapSettings() *database.SystemSettings {
237237
UsageLogFlushIntervalSeconds: 5,
238238
StreamFlushPolicy: proxy.StreamFlushPolicyImmediate,
239239
StreamFlushIntervalMS: 20,
240+
AffinityMode: "bounded",
240241
}
241242
}

admin/handler.go

Lines changed: 92 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -3388,13 +3388,14 @@ func (h *Handler) ListAPIKeys(c *gin.Context) {
33883388
}
33893389

33903390
type createKeyReq struct {
3391-
Name string `json:"name"`
3392-
Key string `json:"key"`
3393-
QuotaLimit *float64 `json:"quota_limit"`
3394-
Quota *float64 `json:"quota"`
3395-
ExpiresAt string `json:"expires_at"`
3396-
ExpiresInDays *int `json:"expires_in_days"`
3397-
AllowedGroupIDs json.RawMessage `json:"allowed_group_ids"`
3391+
Name string `json:"name"`
3392+
Key string `json:"key"`
3393+
QuotaLimit *float64 `json:"quota_limit"`
3394+
Quota *float64 `json:"quota"`
3395+
ExpiresAt string `json:"expires_at"`
3396+
ExpiresInDays *int `json:"expires_in_days"`
3397+
AllowedGroupIDs json.RawMessage `json:"allowed_group_ids"`
3398+
Limits *database.APIKeyLimits `json:"limits"`
33983399
}
33993400

34003401
// generateKey 生成随机 API Key
@@ -3486,12 +3487,18 @@ func (h *Handler) CreateAPIKey(c *gin.Context) {
34863487
}
34873488
}
34883489

3490+
var limits database.APIKeyLimits
3491+
if req.Limits != nil {
3492+
limits = sanitizeAPIKeyLimits(*req.Limits)
3493+
}
3494+
34893495
id, err := h.db.InsertAPIKeyWithOptions(ctx, database.APIKeyInput{
34903496
Name: req.Name,
34913497
Key: key,
34923498
QuotaLimit: quotaLimit,
34933499
ExpiresAt: expiresAt,
34943500
AllowedGroupIDs: allowedGroupIDs.Values,
3501+
Limits: limits,
34953502
})
34963503
if err != nil {
34973504
writeError(c, http.StatusInternalServerError, "创建失败: "+err.Error())
@@ -3525,12 +3532,13 @@ func (h *Handler) CreateAPIKey(c *gin.Context) {
35253532
}
35263533

35273534
type updateAPIKeyReq struct {
3528-
Name *string `json:"name"`
3529-
QuotaLimit json.RawMessage `json:"quota_limit"`
3530-
Quota json.RawMessage `json:"quota"`
3531-
ExpiresAt json.RawMessage `json:"expires_at"`
3532-
ExpiresInDays *int `json:"expires_in_days"`
3533-
AllowedGroupIDs json.RawMessage `json:"allowed_group_ids"`
3535+
Name *string `json:"name"`
3536+
QuotaLimit json.RawMessage `json:"quota_limit"`
3537+
Quota json.RawMessage `json:"quota"`
3538+
ExpiresAt json.RawMessage `json:"expires_at"`
3539+
ExpiresInDays *int `json:"expires_in_days"`
3540+
AllowedGroupIDs json.RawMessage `json:"allowed_group_ids"`
3541+
Limits *database.APIKeyLimits `json:"limits"`
35343542
}
35353543

35363544
func (h *Handler) UpdateAPIKey(c *gin.Context) {
@@ -3623,6 +3631,10 @@ func (h *Handler) UpdateAPIKey(c *gin.Context) {
36233631
update.Name = *req.Name
36243632
update.NameSet = true
36253633
}
3634+
if req.Limits != nil {
3635+
update.Limits = sanitizeAPIKeyLimits(*req.Limits)
3636+
update.LimitsSet = true
3637+
}
36263638
if err := h.db.UpdateAPIKey(ctx, id, update); err != nil {
36273639
writeInternalError(c, err)
36283640
return
@@ -3634,6 +3646,63 @@ func (h *Handler) UpdateAPIKey(c *gin.Context) {
36343646
writeMessage(c, http.StatusOK, "API Key 已更新")
36353647
}
36363648

3649+
// sanitizeAPIKeyLimits 把请求体里来的 limits 归一:负值置 0,空白模型名过滤,字符串小写。
3650+
// 同时配置 ModelAllow + ModelDeny 时白名单优先(在 enforce 时已生效),这里不强制清空黑名单。
3651+
func sanitizeAPIKeyLimits(in database.APIKeyLimits) database.APIKeyLimits {
3652+
clean := func(items []string) []string {
3653+
if len(items) == 0 {
3654+
return nil
3655+
}
3656+
seen := make(map[string]struct{}, len(items))
3657+
out := make([]string, 0, len(items))
3658+
for _, item := range items {
3659+
item = strings.TrimSpace(item)
3660+
if item == "" {
3661+
continue
3662+
}
3663+
lower := strings.ToLower(item)
3664+
if _, ok := seen[lower]; ok {
3665+
continue
3666+
}
3667+
seen[lower] = struct{}{}
3668+
out = append(out, item)
3669+
}
3670+
return out
3671+
}
3672+
out := database.APIKeyLimits{
3673+
ModelAllow: clean(in.ModelAllow),
3674+
ModelDeny: clean(in.ModelDeny),
3675+
RPM: maxInt(in.RPM, 0),
3676+
RPD: maxInt(in.RPD, 0),
3677+
CostLimit5h: maxFloat(in.CostLimit5h, 0),
3678+
CostLimit7d: maxFloat(in.CostLimit7d, 0),
3679+
TokenLimit5h: maxInt64(in.TokenLimit5h, 0),
3680+
TokenLimit7d: maxInt64(in.TokenLimit7d, 0),
3681+
}
3682+
return out
3683+
}
3684+
3685+
func maxInt(v, lo int) int {
3686+
if v < lo {
3687+
return lo
3688+
}
3689+
return v
3690+
}
3691+
3692+
func maxInt64(v, lo int64) int64 {
3693+
if v < lo {
3694+
return lo
3695+
}
3696+
return v
3697+
}
3698+
3699+
func maxFloat(v, lo float64) float64 {
3700+
if v < lo {
3701+
return lo
3702+
}
3703+
return v
3704+
}
3705+
36373706
func parseOptionalAPIKeyQuota(quotaLimitRaw, quotaRaw json.RawMessage) (float64, bool, error) {
36383707
raw := quotaLimitRaw
36393708
if len(raw) == 0 {
@@ -3764,6 +3833,7 @@ type settingsResponse struct {
37643833
ProxyPoolEnabled bool `json:"proxy_pool_enabled"`
37653834
FastSchedulerEnabled bool `json:"fast_scheduler_enabled"`
37663835
SchedulerMode string `json:"scheduler_mode"`
3836+
AffinityMode string `json:"affinity_mode"`
37673837
MaxRetries int `json:"max_retries"`
37683838
MaxRateLimitRetries int `json:"max_rate_limit_retries"`
37693839
AllowRemoteMigration bool `json:"allow_remote_migration"`
@@ -3824,6 +3894,7 @@ type updateSettingsReq struct {
38243894
ProxyPoolEnabled *bool `json:"proxy_pool_enabled"`
38253895
FastSchedulerEnabled *bool `json:"fast_scheduler_enabled"`
38263896
SchedulerMode *string `json:"scheduler_mode"`
3897+
AffinityMode *string `json:"affinity_mode"`
38273898
MaxRetries *int `json:"max_retries"`
38283899
MaxRateLimitRetries *int `json:"max_rate_limit_retries"`
38293900
AllowRemoteMigration *bool `json:"allow_remote_migration"`
@@ -3966,6 +4037,7 @@ func (h *Handler) GetSettings(c *gin.Context) {
39664037
ProxyPoolEnabled: h.store.GetProxyPoolEnabled(),
39674038
FastSchedulerEnabled: h.store.FastSchedulerEnabled(),
39684039
SchedulerMode: h.store.GetSchedulerMode(),
4040+
AffinityMode: h.store.GetAffinityMode(),
39694041
MaxRetries: h.store.GetMaxRetries(),
39704042
MaxRateLimitRetries: h.store.GetMaxRateLimitRetries(),
39714043
AllowRemoteMigration: h.store.GetAllowRemoteMigration() && adminAuthSource != "disabled",
@@ -4208,6 +4280,11 @@ func (h *Handler) UpdateSettings(c *gin.Context) {
42084280
log.Printf("设置已更新: scheduler_mode = %s", *req.SchedulerMode)
42094281
}
42104282

4283+
if req.AffinityMode != nil {
4284+
h.store.SetAffinityMode(*req.AffinityMode)
4285+
log.Printf("设置已更新: affinity_mode = %s", *req.AffinityMode)
4286+
}
4287+
42114288
if req.MaxRetries != nil {
42124289
v := *req.MaxRetries
42134290
if v < 0 {
@@ -4451,6 +4528,7 @@ func (h *Handler) UpdateSettings(c *gin.Context) {
44514528
ProxyPoolEnabled: h.store.GetProxyPoolEnabled(),
44524529
FastSchedulerEnabled: h.store.FastSchedulerEnabled(),
44534530
SchedulerMode: h.store.GetSchedulerMode(),
4531+
AffinityMode: h.store.GetAffinityMode(),
44544532
MaxRetries: h.store.GetMaxRetries(),
44554533
MaxRateLimitRetries: h.store.GetMaxRateLimitRetries(),
44564534
AllowRemoteMigration: h.store.GetAllowRemoteMigration() && hasAdminSecret,
@@ -4516,6 +4594,7 @@ func (h *Handler) UpdateSettings(c *gin.Context) {
45164594
ProxyPoolEnabled: h.store.GetProxyPoolEnabled(),
45174595
FastSchedulerEnabled: h.store.FastSchedulerEnabled(),
45184596
SchedulerMode: h.store.GetSchedulerMode(),
4597+
AffinityMode: h.store.GetAffinityMode(),
45194598
MaxRetries: h.store.GetMaxRetries(),
45204599
MaxRateLimitRetries: h.store.GetMaxRateLimitRetries(),
45214600
AllowRemoteMigration: h.store.GetAllowRemoteMigration() && adminAuthSource != "disabled",

admin/responses.go

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -49,16 +49,17 @@ type apiKeysResponse struct {
4949

5050
// MaskedAPIKeyRow API Key 响应(含脱敏和完整 key)
5151
type MaskedAPIKeyRow struct {
52-
ID int64 `json:"id"`
53-
Name string `json:"name"`
54-
Key string `json:"key"`
55-
RawKey string `json:"raw_key"`
56-
QuotaLimit float64 `json:"quota_limit"`
57-
QuotaUsed float64 `json:"quota_used"`
58-
ExpiresAt *string `json:"expires_at"`
59-
AllowedGroupIDs []int64 `json:"allowed_group_ids"`
60-
Status string `json:"status"`
61-
CreatedAt string `json:"created_at"`
52+
ID int64 `json:"id"`
53+
Name string `json:"name"`
54+
Key string `json:"key"`
55+
RawKey string `json:"raw_key"`
56+
QuotaLimit float64 `json:"quota_limit"`
57+
QuotaUsed float64 `json:"quota_used"`
58+
ExpiresAt *string `json:"expires_at"`
59+
AllowedGroupIDs []int64 `json:"allowed_group_ids"`
60+
Limits database.APIKeyLimits `json:"limits"`
61+
Status string `json:"status"`
62+
CreatedAt string `json:"created_at"`
6263
}
6364

6465
// NewMaskedAPIKeyRow 创建 API Key 响应
@@ -83,6 +84,7 @@ func NewMaskedAPIKeyRow(row *database.APIKeyRow) *MaskedAPIKeyRow {
8384
QuotaUsed: row.QuotaUsed,
8485
ExpiresAt: expiresAt,
8586
AllowedGroupIDs: append([]int64(nil), row.AllowedGroupIDs...),
87+
Limits: row.Limits,
8688
Status: status,
8789
CreatedAt: row.CreatedAt.Format(time.RFC3339),
8890
}

auth/fast_scheduler.go

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,6 +392,24 @@ func (s *FastScheduler) insertLocked(acc *Account, now time.Time) {
392392
}
393393
return usageI < usageJ
394394
})
395+
} else if s.schedulerMode == "round_robin" && tier == HealthTierHealthy {
396+
// round_robin 模式下,healthy 桶按 7d 用量 ASC 排序后再走轮询。
397+
// 这样同一个 round 里,用得少的账号被先轮到,自然把负载摊平到所有可用账号上,
398+
// 避免出现"轮询模式仍然一直薅同一个号"的现象 (issue #150)。
399+
sort.SliceStable(entries, func(i, j int) bool {
400+
usageI := entries[i].acc.usagePercentForScheduling()
401+
usageJ := entries[j].acc.usagePercentForScheduling()
402+
if usageI == usageJ {
403+
if entries[i].dispatchScore != entries[j].dispatchScore {
404+
return entries[i].dispatchScore > entries[j].dispatchScore
405+
}
406+
if entries[i].proven != entries[j].proven {
407+
return entries[i].proven
408+
}
409+
return entries[i].dbID < entries[j].dbID
410+
}
411+
return usageI < usageJ
412+
})
395413
} else {
396414
sort.SliceStable(entries, func(i, j int) bool {
397415
if entries[i].dispatchScore == entries[j].dispatchScore {

0 commit comments

Comments
 (0)