diff --git a/.env.example b/.env.example
index 7d4c9ab2..c10a7638 100644
--- a/.env.example
+++ b/.env.example
@@ -4,6 +4,9 @@
# ============================================================
# HTTP 服务端口
+# Note: the standard compose files bind to all interfaces (0.0.0.0).
+# Set BIND_HOST=127.0.0.1 in .env to restrict to localhost only.
+# BIND_HOST=0.0.0.0
CODEX_PORT=8080
# 监听地址(默认 0.0.0.0,兼容 Docker 端口映射 / 反向代理 / 公网部署)
diff --git a/.env.sqlite.example b/.env.sqlite.example
index 1276fc7e..b388a0ad 100644
--- a/.env.sqlite.example
+++ b/.env.sqlite.example
@@ -4,6 +4,9 @@
# ============================================================
# HTTP 服务端口
+# Note: the SQLite compose files bind to 127.0.0.1 by default for security.
+# Override with BIND_HOST=0.0.0.0 in .env if you need external access.
+# BIND_HOST=127.0.0.1
CODEX_PORT=8080
# 监听地址(默认 0.0.0.0,兼容 Docker 端口映射 / 反向代理 / 公网部署)
diff --git a/README.md b/README.md
index eb088d46..2c647caf 100644
--- a/README.md
+++ b/README.md
@@ -23,9 +23,10 @@ Run it as a full **PostgreSQL + Redis** production stack or as a single-containe
| One compatible gateway | OpenAI-style Chat Completions / Responses / Images, Anthropic Messages, prefixless compatibility routes, and native Codex Responses forwarding are all exposed through one service. |
-| Account-pool scheduler | Selection is driven by account status, health tier, scheduler score, dynamic concurrency, cooldown recovery, and recent usage so unhealthy accounts are avoided automatically. |
-| Visual admin console | The embedded React / Vite dashboard covers account import and testing, API keys, proxy pools, image studio, prompt filtering, usage analytics, operations, scheduler board, and system settings. |
-| Two deployment shapes | Use PostgreSQL + Redis for production or SQLite + Memory for lightweight single-node deployments; Docker images, source builds, local development, and the interactive deploy script are ready to use. |
+| Account-pool scheduler | Selection is driven by account status, health tier, scheduler score, dynamic concurrency, cooldown recovery, and recent usage so unhealthy accounts are avoided automatically. Supports round_robin and remaining_quota modes, with per-account credit billing flags. |
+| Visual admin console | The embedded React / Vite dashboard covers account import and testing, API keys, proxy pools, image studio (text-to-image + image-to-image), prompt filtering, usage analytics, operations, scheduler board, and system settings. |
+| Two deployment shapes | Use PostgreSQL + Redis for production or SQLite + Memory for lightweight single-node deployments; Docker images, source builds, local development, and the interactive deploy script are ready to use. SQLite mode binds to 127.0.0.1 by default for security. |
+| Billing and observability | Per-account 5h/7d windowed USD cost tracking, credit quota support, API key usage tracking, OAuth PKCE token acquisition, prompt filtering, and a usage dashboard with request logs and trend charts. |
---
@@ -164,6 +165,7 @@ Notes:
- Standard and SQLite modes both read `.env`.
- Before switching deployment modes, replace `.env` with the matching example file.
- The SQLite lightweight mode runs a single `codex2api` container and stores data at `/data/codex2api.db`.
+- **SQLite compose files bind to `127.0.0.1` by default for security.** To expose the SQLite service on all interfaces, set `BIND_HOST=0.0.0.0` in `.env` or override the port binding in the compose file. The standard compose files bind to `0.0.0.0` by default.
- The image studio library is stored under `/data/images`; Docker configurations persist `/data`.
- `docker compose down` does not delete named volumes by default. Data is removed only by commands such as `docker compose down -v`, `docker volume rm`, or `docker volume prune`.
@@ -262,7 +264,7 @@ The standard `.env.example` declares `DATABASE_DRIVER=postgres` and `CACHE_DRIVE
Runtime business settings are stored in the database `SystemSettings` table and can be updated from the admin settings page.
-Examples include `MaxConcurrency`, `GlobalRPM`, `TestModel`, `TestConcurrency`, `ProxyURL`, `PgMaxConns`, `RedisPoolSize`, `AdminSecret`, and auto-cleanup switches.
+Examples include `MaxConcurrency`, `GlobalRPM`, `TestModel`, `TestConcurrency`, `ProxyURL`, `PgMaxConns`, `RedisPoolSize`, `AdminSecret`, `SchedulerMode`, and auto-cleanup switches.
Default settings are written automatically on first startup.
@@ -284,9 +286,11 @@ Default settings are written automatically on first startup.
| `POST /v1/responses` | Responses style endpoint |
| `POST /v1/images/generations` | OpenAI Images generation endpoint |
| `POST /v1/images/edits` | OpenAI Images edit endpoint |
-| `GET /v1/models` | List available models |
+| `GET /v1/models` | List available models (includes gpt-5.5, gpt-5.4, gpt-5.4-mini, gpt-5.3-codex, gpt-image-2, etc.) |
| `GET /health` | Health check |
+> **Pricing**: gpt-5.5 is billed at $5.00/M input and $30.00/M output (standard tier). Priority tier: $12.50/M input, $75.00/M output. Other models follow pricing rules in the billing engine.
+
See [API.md](docs/API.md) for full request formats, response formats, and error codes.
### Token Upload and Account Management
@@ -349,6 +353,27 @@ curl -X POST http://localhost:8080/api/admin/accounts/import \
Import endpoints deduplicate tokens automatically. Existing tokens are not inserted again.
+#### OAuth PKCE Authorization
+
+Codex2API supports acquiring Refresh Tokens through the OAuth PKCE flow, useful when manual token extraction is impractical:
+
+```bash
+# Step 1: Generate an authorization URL
+curl -X POST http://localhost:8080/api/admin/oauth/generate-auth-url \
+ -H "X-Admin-Key: your-admin-secret" \
+ -H "Content-Type: application/json" \
+ -d '{}'
+
+# Step 2: Open the returned auth_url in a browser, complete authorization
+# Step 3: Exchange the authorization code for a token (auto-creates account)
+curl -X POST http://localhost:8080/api/admin/oauth/exchange-code \
+ -H "X-Admin-Key: your-admin-secret" \
+ -H "Content-Type: application/json" \
+ -d '{"session_id": "...", "code": "...", "state": "..."}'
+```
+
+See [API.md](docs/API.md) for the full OAuth flow and all admin endpoints.
+
---
## Admin Dashboard
@@ -361,7 +386,7 @@ Open `/admin/` in a browser.
| Accounts | `/admin/accounts` | Import, test, batch actions, scheduler state |
| API Keys | `/admin/api-keys` | API key creation, inspection, deletion, and credential management |
| Proxies | `/admin/proxies` | Proxy pool management, account proxy assignment, connectivity checks |
-| Image Studio | `/admin/images/studio` | Text-to-image, prompt templates, task history, server-side image library |
+| Image Studio | `/admin/images/studio` | Text-to-image, image-to-image, prompt templates, task history, server-side image library |
| Prompt Filter | `/admin/prompt-filter/overview` | Rules, hit logs, testing, and handling mode configuration |
| Usage | `/admin/usage` | Request logs, metric cards, charts, log cleanup |
| Operations | `/admin/ops` | Runtime monitoring and system overview |
@@ -431,6 +456,24 @@ Observability:
- `GET /api/admin/ops/overview` shows runtime and connection pool state.
- `/admin/ops/scheduler` provides the scheduler board.
+**Scheduler mode** (`scheduler_mode`, via Admin Settings):
+
+| Mode | Behavior |
+| --- | --- |
+| `round_robin` (default) | Round-robin across available accounts per health tier, weighted by dispatch score |
+| `remaining_quota` | Prioritizes accounts with lower usage percent; round-robin for ties |
+
+**Credit accounts** (per-account flags):
+
+When an account has a credit-based billing model instead of a usage-based Free/Pro plan, you can mark it so the scheduler skips usage-window penalties:
+
+| Field | Type | Effect |
+| --- | --- | --- |
+| `credit_enabled` | bool | Mark account as credit-based billing |
+| `credit_skip_usage_window` | bool | When true, skip 7d/5h usage-window penalties for this account |
+
+**Windowed USD cost**: The accounts table displays per-account billed cost over two windows -- the past 5 hours and the past 7 days -- aligned with each account's usage reset boundaries. This shows actual spending per account rather than estimated token costs.
+
---
## Project Structure
diff --git a/README.zh-CN.md b/README.zh-CN.md
index 8ad9fa9c..f751df4d 100644
--- a/README.zh-CN.md
+++ b/README.zh-CN.md
@@ -23,9 +23,10 @@
| 统一兼容入口 | 同时覆盖 OpenAI 风格 Chat Completions / Responses / Images、Anthropic Messages、无前缀兼容路由和 Codex 原生 Responses 转发,客户端侧少改配置即可接入。 |
-| 账号池调度核心 | 围绕账号状态、健康层级、调度分、动态并发、冷却恢复和近期用量做选择,自动避开不可用账号,减少单账号打满和反复失败。 |
-| 可视化管理后台 | 内置 React / Vite 管理台,提供账号导入测试、API Key、代理池、生图、Prompt 检查、用量统计、运维概览、调度看板和系统设置。 |
-| 两种部署形态 | 生产环境用 PostgreSQL + Redis,单机测试用 SQLite + Memory;Docker 镜像、源码构建、本地开发和一键交互部署脚本都已准备好。 |
+| 账号池调度核心 | 围绕账号状态、健康层级、调度分、动态并发、冷却恢复和近期用量做选择,自动避开不可用账号,减少单账号打满和反复失败。支持 round_robin 和 remaining_quota 两种调度模式,以及单账号信用计费标记。 |
+| 可视化管理后台 | 内置 React / Vite 管理台,提供账号导入测试、API Key、代理池、生图(文生图 + 图生图)、Prompt 检查、用量统计、运维概览、调度看板和系统设置。 |
+| 两种部署形态 | 生产环境用 PostgreSQL + Redis,单机测试用 SQLite + Memory;Docker 镜像、源码构建、本地开发和一键交互部署脚本都已准备好。SQLite 模式默认绑定 127.0.0.1 以提升安全性。 |
+| 计费与可观测性 | 单账号 5h/7d 窗口化 USD 费用追踪、信用配额支持、API Key 用量追踪、OAuth PKCE 获取 Token、Prompt 过滤,以及含请求日志与趋势图表的用量仪表盘。 |
---
@@ -209,6 +210,7 @@ docker compose -f docker-compose.sqlite.local.yml logs -f codex2api
- SQLite 镜像版容器名:`codex2api-sqlite`
- SQLite 本地构建版容器名:`codex2api-sqlite-local`
- SQLite 轻量版只启动 `codex2api` 单容器,数据保存在 `/data/codex2api.db`
+- **SQLite compose 文件默认绑定 `127.0.0.1`,仅本机可访问。** 如需暴露给外部,请在 `.env` 中设置 `BIND_HOST=0.0.0.0` 或修改 compose 文件中的端口绑定。标准版 compose 文件默认绑定 `0.0.0.0`(所有网络接口)。
- 生图工作台图库默认保存在 `/data/images`,标准版和 SQLite 版 Docker 配置都会持久化 `/data`
- `docker compose down` 默认不会删除命名卷;只有 `docker compose down -v`、`docker volume rm` 或 `docker volume prune` 才会删除持久化数据
- 不同部署模式的数据卷彼此隔离;切换 compose 文件后看到空数据,通常是切到了另一组卷,而不是原卷被自动删除
@@ -313,7 +315,7 @@ Vite 会自动代理 `/api` 和 `/health` 到后端,开发时访问 `http://lo
以下参数**保存在数据库 `SystemSettings` 中**,通过管理台设置页面修改:
-`MaxConcurrency`、`GlobalRPM`、`TestModel`、`TestConcurrency`、`ProxyURL`、`PgMaxConns`、`RedisPoolSize`、`AdminSecret`、自动清理开关等。
+`MaxConcurrency`、`GlobalRPM`、`TestModel`、`TestConcurrency`、`ProxyURL`、`PgMaxConns`、`RedisPoolSize`、`AdminSecret`、`SchedulerMode`、自动清理开关等。
首次启动时程序会自动写入默认设置。
@@ -335,9 +337,11 @@ Vite 会自动代理 `/api` 和 `/health` 到后端,开发时访问 `http://lo
| `POST /v1/responses` | Responses 风格入口 |
| `POST /v1/images/generations` | OpenAI Images 生成入口 |
| `POST /v1/images/edits` | OpenAI Images 编辑入口 |
-| `GET /v1/models` | 返回可用模型列表 |
+| `GET /v1/models` | 返回可用模型列表(含 gpt-5.5、gpt-5.4、gpt-5.4-mini、gpt-5.3-codex、gpt-image-2 等) |
| `GET /health` | 健康检查 |
+> **计费提示**:gpt-5.5 标准 tier 计费为 $5.00/M 输入 / $30.00/M 输出,priority tier 为 $12.50/M 输入 / $75.00/M 输出。其他模型按 billing 引擎规则计费。
+
> 完整请求/响应格式、错误码参见 [API 文档](docs/API.md)。
### Token 上传与账号管理
@@ -400,6 +404,27 @@ curl -X POST http://localhost:8080/api/admin/accounts/import \
> 所有导入接口自动去重,已存在的 Token 不会重复写入。更多管理接口(导出、迁移、OAuth 授权等)参见 [API 文档](docs/API.md)。
+#### OAuth PKCE 授权
+
+Codex2API 支持通过 OAuth PKCE 流程获取 Refresh Token,适用于无法手动提取 Token 的场景:
+
+```bash
+# 步骤 1:生成授权 URL
+curl -X POST http://localhost:8080/api/admin/oauth/generate-auth-url \
+ -H "X-Admin-Key: your-admin-secret" \
+ -H "Content-Type: application/json" \
+ -d '{}'
+
+# 步骤 2:在浏览器中打开返回的 auth_url,完成授权
+# 步骤 3:用授权码兑换 Token(自动创建账号)
+curl -X POST http://localhost:8080/api/admin/oauth/exchange-code \
+ -H "X-Admin-Key: your-admin-secret" \
+ -H "Content-Type: application/json" \
+ -d '{"session_id": "...", "code": "...", "state": "..."}'
+```
+
+> 完整 OAuth 流程及所有管理接口参见 [API 文档](docs/API.md)。
+
---
## 管理后台
@@ -412,7 +437,7 @@ curl -X POST http://localhost:8080/api/admin/accounts/import \
| 账号管理 | `/admin/accounts` | 导入、测试、批量处理、调度信息查看 |
| API 密钥 | `/admin/api-keys` | API Key 创建、查看、删除与调用凭据管理 |
| 代理管理 | `/admin/proxies` | 代理池维护、账号代理分配与连通性管理 |
-| 生图工作台 | `/admin/images/studio` | 文生图、提示词模板、任务历史和服务器图库 |
+| 生图工作台 | `/admin/images/studio` | 文生图、图生图、提示词模板、任务历史和服务器图库 |
| Prompt 检查 | `/admin/prompt-filter/overview` | Prompt 规则、触发日志、测试和处理模式配置 |
| 使用统计 | `/admin/usage` | 请求日志、统计卡片、图表、日志清空 |
| 运维概览 | `/admin/ops` | 运行态监控与系统概览 |
@@ -495,6 +520,24 @@ curl -X POST http://localhost:8080/api/admin/accounts/import \
- `GET /api/admin/ops/overview` — 系统运行态与连接池概览
- `/admin/ops/scheduler` — 前端调度看板
+**调度模式**(`scheduler_mode`,通过管理后台设置):
+
+| 模式 | 行为 |
+| --- | --- |
+| `round_robin`(默认) | 按健康层级轮询可用账号,权重按调度分排序 |
+| `remaining_quota` | 优先使用用量较低的账号;用量相同时轮询 |
+
+**信用账号**(单账号标记):
+
+对采用信用计费而非 Free/Pro 用量计费的账号,可标记为信用账号以跳过用量窗口惩罚:
+
+| 字段 | 类型 | 作用 |
+| --- | --- | --- |
+| `credit_enabled` | bool | 标记账号为信用计费模式 |
+| `credit_skip_usage_window` | bool | 开启后跳过 7 天/5 小时用量窗口惩罚 |
+
+**窗口化 USD 费用**:账号列表展示每个账号在两个时间窗口内的累计计费金额——过去 5 小时和过去 7 天,窗口对齐各账号的用量重置边界。这反映的是实际扣费金额而非估算的 Token 费用。
+
---
## 目录结构
diff --git a/admin/handler.go b/admin/handler.go
index c6bdb835..b076a3f6 100644
--- a/admin/handler.go
+++ b/admin/handler.go
@@ -203,6 +203,7 @@ func (h *Handler) RegisterRoutes(r *gin.Engine) {
api.GET("/accounts/:id/test", h.TestConnection)
api.GET("/accounts/:id/usage", h.GetAccountUsage)
api.GET("/accounts/:id/auth-json", h.GetAccountAuthJSON)
+ api.PATCH("/accounts/:id/credit", h.UpdateAccountCredit)
api.POST("/accounts/batch-test", h.BatchTest)
api.POST("/accounts/batch-reset-status", h.BatchResetStatus)
api.POST("/accounts/clean-banned", h.CleanBanned)
@@ -242,6 +243,7 @@ func (h *Handler) RegisterRoutes(r *gin.Engine) {
api.PATCH("/image-prompts/:id", h.UpdateImagePromptTemplate)
api.DELETE("/image-prompts/:id", h.DeleteImagePromptTemplate)
api.POST("/images/jobs", h.CreateImageGenerationJob)
+ api.POST("/images/edit-jobs", h.CreateImageEditJob)
api.GET("/images/jobs", h.ListImageGenerationJobs)
api.GET("/images/jobs/:id", h.GetImageGenerationJob)
api.DELETE("/images/jobs/:id", h.DeleteImageGenerationJob)
@@ -381,6 +383,8 @@ type accountResponse struct {
Status string `json:"status"`
ErrorMessage string `json:"error_message,omitempty"`
ATOnly bool `json:"at_only"`
+ CreditEnabled bool `json:"credit_enabled"`
+ CreditSkipUsageWindow bool `json:"credit_skip_usage_window"`
AccountType string `json:"account_type,omitempty"`
OpenAIResponsesAPI bool `json:"openai_responses_api,omitempty"`
BaseURL string `json:"base_url,omitempty"`
@@ -409,6 +413,8 @@ type accountResponse struct {
Usage7dDetail *accountUsageWindow `json:"usage_7d_detail,omitempty"`
Reset5hAt string `json:"reset_5h_at,omitempty"`
Reset7dAt string `json:"reset_7d_at,omitempty"`
+ Billed5h *float64 `json:"billed_5h"`
+ Billed7d *float64 `json:"billed_7d"`
ScoreBreakdown schedulerBreakdownResponse `json:"scheduler_breakdown"`
LastUnauthorizedAt string `json:"last_unauthorized_at,omitempty"`
LastRateLimitedAt string `json:"last_rate_limited_at,omitempty"`
@@ -501,6 +507,8 @@ func (h *Handler) ListAccounts(c *gin.Context) {
Status: row.Status,
ErrorMessage: row.ErrorMessage,
ATOnly: !isOpenAIResponsesAccount && row.GetCredential("refresh_token") == "" && row.GetCredential("access_token") != "",
+ CreditEnabled: row.CreditEnabled,
+ CreditSkipUsageWindow: row.CreditSkipUsageWindow,
AccountType: row.Type,
OpenAIResponsesAPI: isOpenAIResponsesAccount,
BaseURL: baseURL,
@@ -625,6 +633,26 @@ func (h *Handler) ListAccounts(c *gin.Context) {
accounts = append(accounts, resp)
}
+ // 批量查询各账号 5h / 7d 窗口内累计 account_billed
+ for i := range accounts {
+ acc, ok := accountMap[accounts[i].ID]
+ if !ok {
+ continue
+ }
+ if t := acc.GetReset5hAt(); !t.IsZero() {
+ billed, err := h.db.GetAccountBilledSince(ctx, accounts[i].ID, t.Add(-5*time.Hour))
+ if err == nil {
+ accounts[i].Billed5h = &billed
+ }
+ }
+ if t := acc.GetReset7dAt(); !t.IsZero() {
+ billed, err := h.db.GetAccountBilledSince(ctx, accounts[i].ID, t.AddDate(0, 0, -7))
+ if err == nil {
+ accounts[i].Billed7d = &billed
+ }
+ }
+ }
+
c.JSON(http.StatusOK, accountsResponse{Accounts: accounts})
}
@@ -638,6 +666,45 @@ type updateAccountSchedulerReq struct {
}
// UpdateAccountScheduler 更新账号调度配置。
+// UpdateAccountCredit 更新账号信用设置
+func (h *Handler) UpdateAccountCredit(c *gin.Context) {
+ id, err := strconv.ParseInt(c.Param("id"), 10, 64)
+ if err != nil {
+ writeError(c, http.StatusBadRequest, "无效的账号 ID")
+ return
+ }
+
+ var req struct {
+ CreditEnabled *bool `json:"credit_enabled"`
+ CreditSkipUsageWindow *bool `json:"credit_skip_usage_window"`
+ }
+ decoder := json.NewDecoder(c.Request.Body)
+ decoder.DisallowUnknownFields()
+ if err := decoder.Decode(&req); err != nil {
+ writeError(c, http.StatusBadRequest, "请求格式错误")
+ return
+ }
+
+ acc := h.store.FindByID(id)
+ if acc == nil {
+ writeError(c, http.StatusNotFound, "账号不存在")
+ return
+ }
+
+ // 传入 *bool:nil = 不修改该字段
+ if err := h.store.UpdateAccountCredit(id, req.CreditEnabled, req.CreditSkipUsageWindow); err != nil {
+ writeError(c, http.StatusInternalServerError, "更新信用设置失败: "+err.Error())
+ return
+ }
+
+ acc = h.store.FindByID(id)
+ if acc != nil {
+ c.JSON(http.StatusOK, gin.H{"message": "信用设置已更新", "credit_enabled": acc.CreditEnabled, "credit_skip_usage_window": acc.CreditSkipUsageWindow})
+ } else {
+ c.JSON(http.StatusOK, gin.H{"message": "信用设置已更新"})
+ }
+}
+
func (h *Handler) UpdateAccountScheduler(c *gin.Context) {
id, err := strconv.ParseInt(c.Param("id"), 10, 64)
if err != nil {
@@ -3496,6 +3563,7 @@ type settingsResponse struct {
AutoCleanExpired bool `json:"auto_clean_expired"`
ProxyPoolEnabled bool `json:"proxy_pool_enabled"`
FastSchedulerEnabled bool `json:"fast_scheduler_enabled"`
+ SchedulerMode string `json:"scheduler_mode"`
MaxRetries int `json:"max_retries"`
MaxRateLimitRetries int `json:"max_rate_limit_retries"`
AllowRemoteMigration bool `json:"allow_remote_migration"`
@@ -3554,6 +3622,7 @@ type updateSettingsReq struct {
AutoCleanExpired *bool `json:"auto_clean_expired"`
ProxyPoolEnabled *bool `json:"proxy_pool_enabled"`
FastSchedulerEnabled *bool `json:"fast_scheduler_enabled"`
+ SchedulerMode *string `json:"scheduler_mode"`
MaxRetries *int `json:"max_retries"`
MaxRateLimitRetries *int `json:"max_rate_limit_retries"`
AllowRemoteMigration *bool `json:"allow_remote_migration"`
@@ -3694,6 +3763,7 @@ func (h *Handler) GetSettings(c *gin.Context) {
AutoCleanExpired: h.store.GetAutoCleanExpired(),
ProxyPoolEnabled: h.store.GetProxyPoolEnabled(),
FastSchedulerEnabled: h.store.FastSchedulerEnabled(),
+ SchedulerMode: h.store.GetSchedulerMode(),
MaxRetries: h.store.GetMaxRetries(),
MaxRateLimitRetries: h.store.GetMaxRateLimitRetries(),
AllowRemoteMigration: h.store.GetAllowRemoteMigration() && adminAuthSource != "disabled",
@@ -3926,6 +3996,11 @@ func (h *Handler) UpdateSettings(c *gin.Context) {
log.Printf("设置已更新: fast_scheduler_enabled = %t", *req.FastSchedulerEnabled)
}
+ if req.SchedulerMode != nil {
+ h.store.SetSchedulerMode(*req.SchedulerMode)
+ log.Printf("设置已更新: scheduler_mode = %s", *req.SchedulerMode)
+ }
+
if req.MaxRetries != nil {
v := *req.MaxRetries
if v < 0 {
@@ -4167,6 +4242,7 @@ func (h *Handler) UpdateSettings(c *gin.Context) {
AutoCleanExpired: h.store.GetAutoCleanExpired(),
ProxyPoolEnabled: h.store.GetProxyPoolEnabled(),
FastSchedulerEnabled: h.store.FastSchedulerEnabled(),
+ SchedulerMode: h.store.GetSchedulerMode(),
MaxRetries: h.store.GetMaxRetries(),
MaxRateLimitRetries: h.store.GetMaxRateLimitRetries(),
AllowRemoteMigration: h.store.GetAllowRemoteMigration() && hasAdminSecret,
@@ -4230,6 +4306,7 @@ func (h *Handler) UpdateSettings(c *gin.Context) {
AutoCleanExpired: h.store.GetAutoCleanExpired(),
ProxyPoolEnabled: h.store.GetProxyPoolEnabled(),
FastSchedulerEnabled: h.store.FastSchedulerEnabled(),
+ SchedulerMode: h.store.GetSchedulerMode(),
MaxRetries: h.store.GetMaxRetries(),
MaxRateLimitRetries: h.store.GetMaxRateLimitRetries(),
AllowRemoteMigration: h.store.GetAllowRemoteMigration() && adminAuthSource != "disabled",
diff --git a/admin/image_studio.go b/admin/image_studio.go
index 47186da1..dcd6912d 100644
--- a/admin/image_studio.go
+++ b/admin/image_studio.go
@@ -63,7 +63,8 @@ type imageGenerationJobPayload struct {
Style string `json:"style"`
Upscale string `json:"upscale"`
APIKeyID int64 `json:"api_key_id"`
- TemplateID int64 `json:"template_id"`
+ TemplateID int64 `json:"template_id"`
+ InputImages []string `json:"input_images"`
}
type imageJobResponse struct {
@@ -322,6 +323,89 @@ func (h *Handler) CreateImageGenerationJob(c *gin.Context) {
c.JSON(http.StatusOK, imageJobResponse{Job: job})
}
+func (h *Handler) CreateImageEditJob(c *gin.Context) {
+ var req imageGenerationJobPayload
+ if err := c.ShouldBindJSON(&req); err != nil {
+ writeError(c, http.StatusBadRequest, "请求体无效")
+ return
+ }
+ req.Prompt = strings.TrimSpace(req.Prompt)
+ if req.Prompt == "" {
+ writeError(c, http.StatusBadRequest, "提示词不能为空")
+ return
+ }
+ if len(req.InputImages) == 0 {
+ writeError(c, http.StatusBadRequest, "图生图需要上传参考图片")
+ return
+ }
+ if len(req.InputImages) > proxy.MaxImageEditInputCount {
+ writeError(c, http.StatusBadRequest, fmt.Sprintf("参考图片数量超过限制 (%d, 最多 %d)", len(req.InputImages), proxy.MaxImageEditInputCount))
+ return
+ }
+ if len([]rune(req.Prompt)) > 8000 {
+ writeError(c, http.StatusBadRequest, "提示词不能超过 8000 个字符")
+ return
+ }
+ req.Model = normalizeImageStudioModel(req.Model)
+ if req.Model == "" {
+ req.Model = "gpt-image-2"
+ }
+ req.Size = normalizeOptionalImageParam(req.Size)
+ req.Quality = normalizeOptionalImageParam(req.Quality)
+ req.OutputFormat = normalizeOptionalImageParam(req.OutputFormat)
+ if req.OutputFormat == "" {
+ req.OutputFormat = "png"
+ }
+ req.Background = normalizeOptionalImageParam(req.Background)
+ req.Style = normalizeOptionalImageParam(req.Style)
+ req.Upscale = imageproc.NormalizeUpscale(req.Upscale)
+
+ ctx, cancel := context.WithTimeout(c.Request.Context(), 10*time.Second)
+ defer cancel()
+ apiKey, err := h.resolveImageJobAPIKey(ctx, req.APIKeyID)
+ if err != nil {
+ writeError(c, http.StatusBadRequest, err.Error())
+ return
+ }
+ paramsJSON, _ := json.Marshal(req)
+ keyID, keyName, keyMasked := imageJobAPIKeyMeta(apiKey)
+ if h.inspectImageStudioPromptFilter(c, proxy.AppendImageStyleToPrompt(req.Prompt, req.Style), req.Model, keyID, keyName, keyMasked) {
+ return
+ }
+ jobID, err := h.db.InsertImageGenerationJob(ctx, database.ImageGenerationJobInput{
+ Prompt: req.Prompt,
+ ParamsJSON: string(paramsJSON),
+ APIKeyID: keyID,
+ APIKeyName: keyName,
+ APIKeyMasked: keyMasked,
+ })
+ if err != nil {
+ writeInternalError(c, err)
+ return
+ }
+ if req.TemplateID > 0 {
+ _ = h.db.IncrementImagePromptTemplateUsage(ctx, req.TemplateID)
+ }
+ job, err := h.db.GetImageGenerationJob(ctx, jobID)
+ if err != nil {
+ writeInternalError(c, err)
+ return
+ }
+ log.Printf("[image-studio] job=%d queued mode=edit model=%s size=%s quality=%s format=%s image_count=%d api_key=%s template=%d prompt=%q",
+ jobID,
+ imageLogValue(req.Model),
+ imageLogValue(req.Size),
+ imageLogValue(req.Quality),
+ imageLogValue(req.OutputFormat),
+ len(req.InputImages),
+ imageLogAPIKeyLabel(keyID, keyName, keyMasked),
+ req.TemplateID,
+ imageLogPromptPreview(req.Prompt),
+ )
+ go h.runImageEditJob(jobID, req, apiKey)
+ c.JSON(http.StatusOK, imageJobResponse{Job: job})
+}
+
func (h *Handler) ListImageGenerationJobs(c *gin.Context) {
page, pageSize := paginationParams(c, 20)
ctx, cancel := context.WithTimeout(c.Request.Context(), 10*time.Second)
@@ -689,7 +773,7 @@ func (h *Handler) runImageGenerationJob(jobID int64, req imageGenerationJobPaylo
if err != nil {
durationMs := int(time.Since(start).Milliseconds())
log.Printf("[image-studio] job=%d failed duration=%s stage=build_request error=%s", jobID, imageLogDuration(durationMs), security.SanitizeLog(err.Error()))
- _ = h.db.MarkImageJobFailed(context.Background(), jobID, err.Error(), durationMs)
+ _ = h.db.MarkImageJobFailed(ctx, jobID, err.Error(), durationMs)
return
}
log.Printf("[image-studio] job=%d upstream request model=%s size=%s quality=%s format=%s body_bytes=%d prompt_chars=%d prompt=%q",
@@ -715,7 +799,7 @@ func (h *Handler) runImageGenerationJob(jobID int64, req imageGenerationJobPaylo
if buildErr != nil {
durationMs := int(time.Since(start).Milliseconds())
log.Printf("[image-studio] job=%d failed duration=%s stage=build_jpeg_fallback error=%s", jobID, imageLogDuration(durationMs), security.SanitizeLog(buildErr.Error()))
- _ = h.db.MarkImageJobFailed(context.Background(), jobID, buildErr.Error(), durationMs)
+ _ = h.db.MarkImageJobFailed(ctx, jobID, buildErr.Error(), durationMs)
return
}
fallbackStyledPrompt := proxy.AppendImageStyleToPrompt(fallbackReq.Prompt, fallbackReq.Style)
@@ -734,7 +818,7 @@ func (h *Handler) runImageGenerationJob(jobID int64, req imageGenerationJobPaylo
req = fallbackReq
rawBody = fallbackBody
if paramsJSON, marshalErr := json.Marshal(fallbackReq); marshalErr == nil {
- if updateErr := h.db.UpdateImageGenerationJobParamsJSON(context.Background(), jobID, string(paramsJSON)); updateErr != nil {
+ if updateErr := h.db.UpdateImageGenerationJobParamsJSON(ctx, jobID, string(paramsJSON)); updateErr != nil {
logImageJobError(jobID, updateErr)
}
}
@@ -756,7 +840,7 @@ func (h *Handler) runImageGenerationJob(jobID int64, req imageGenerationJobPaylo
upstreamStatus,
security.SanitizeLog(err.Error()),
)
- _ = h.db.MarkImageJobFailed(context.Background(), jobID, err.Error(), durationMs)
+ _ = h.db.MarkImageJobFailed(ctx, jobID, err.Error(), durationMs)
return
}
log.Printf("[image-studio] job=%d upstream completed duration=%s upstream_status=%d response_bytes=%d",
@@ -766,18 +850,18 @@ func (h *Handler) runImageGenerationJob(jobID int64, req imageGenerationJobPaylo
len(responseJSON),
)
- assets, err := h.saveImageJobAssets(context.Background(), jobID, req, responseJSON)
+ assets, err := h.saveImageJobAssets(ctx, jobID, req, responseJSON)
if err != nil {
log.Printf("[image-studio] job=%d failed duration=%s stage=save_assets error=%s", jobID, imageLogDuration(durationMs), security.SanitizeLog(err.Error()))
- _ = h.db.MarkImageJobFailed(context.Background(), jobID, err.Error(), durationMs)
+ _ = h.db.MarkImageJobFailed(ctx, jobID, err.Error(), durationMs)
return
}
if len(assets) == 0 {
log.Printf("[image-studio] job=%d failed duration=%s stage=save_assets error=%s", jobID, imageLogDuration(durationMs), "上游未返回图片")
- _ = h.db.MarkImageJobFailed(context.Background(), jobID, "上游未返回图片", durationMs)
+ _ = h.db.MarkImageJobFailed(ctx, jobID, "上游未返回图片", durationMs)
return
}
- if err := h.db.MarkImageJobSucceeded(context.Background(), jobID, durationMs); err != nil {
+ if err := h.db.MarkImageJobSucceeded(ctx, jobID, durationMs); err != nil {
logImageJobError(jobID, err)
}
log.Printf("[image-studio] job=%d succeeded duration=%s assets=%d total_bytes=%d first_size=%s dir=%s",
@@ -790,6 +874,142 @@ func (h *Handler) runImageGenerationJob(jobID int64, req imageGenerationJobPaylo
)
}
+func buildAdminImageEditRequest(req imageGenerationJobPayload) ([]byte, error) {
+ if len(req.InputImages) == 0 {
+ return nil, fmt.Errorf("input_images is required for image edit")
+ }
+ if len(req.InputImages) > proxy.MaxImageEditInputCount {
+ return nil, fmt.Errorf("too many input images (%d, max %d)", len(req.InputImages), proxy.MaxImageEditInputCount)
+ }
+ body := map[string]any{
+ "model": req.Model,
+ "prompt": proxy.AppendImageStyleToPrompt(req.Prompt, req.Style),
+ "response_format": "b64_json",
+ }
+ images := make([]map[string]string, len(req.InputImages))
+ for i, img := range req.InputImages {
+ images[i] = map[string]string{"image_url": img}
+ }
+ body["images"] = images
+ if req.Size != "" && req.Size != "auto" {
+ body["size"] = req.Size
+ }
+ if req.Quality != "" && req.Quality != "auto" {
+ body["quality"] = req.Quality
+ }
+ if req.OutputFormat != "" {
+ body["output_format"] = req.OutputFormat
+ }
+ if req.Background != "" && req.Background != "auto" {
+ body["background"] = req.Background
+ }
+ return json.Marshal(body)
+}
+
+func (h *Handler) runImageEditJob(jobID int64, req imageGenerationJobPayload, apiKey *database.APIKeyRow) {
+ ctx, cancel := context.WithTimeout(context.Background(), 12*time.Minute)
+ defer cancel()
+ start := time.Now()
+ if err := h.db.MarkImageJobRunning(ctx, jobID); err != nil {
+ logImageJobError(jobID, err)
+ return
+ }
+ log.Printf("[image-studio] job=%d started mode=edit model=%s image_count=%d prompt_chars=%d",
+ jobID,
+ imageLogValue(req.Model),
+ len(req.InputImages),
+ len([]rune(req.Prompt)),
+ )
+
+ rawBody, err := buildAdminImageEditRequest(req)
+ if err != nil {
+ durationMs := int(time.Since(start).Milliseconds())
+ log.Printf("[image-studio] job=%d failed mode=edit duration=%s stage=build_request error=%s", jobID, imageLogDuration(durationMs), security.SanitizeLog(err.Error()))
+ _ = h.db.MarkImageJobFailed(ctx, jobID, err.Error(), durationMs)
+ return
+ }
+
+ imageProxy := h.imageProxy
+ if imageProxy == nil {
+ imageProxy = proxy.NewHandler(h.store, h.db, nil, nil)
+ }
+ responseJSON, upstreamStatus, err := imageProxy.GenerateImageEditForAdmin(ctx, rawBody, apiKey)
+ if shouldFallbackImageJobToJPEG(req, upstreamStatus, err) {
+ pngErr := err
+ pngStatus := upstreamStatus
+ fallbackReq := jpegFallbackImageJobRequest(req)
+ fallbackBody, buildErr := buildAdminImageEditRequest(fallbackReq)
+ if buildErr != nil {
+ durationMs := int(time.Since(start).Milliseconds())
+ log.Printf("[image-studio] job=%d failed mode=edit duration=%s stage=build_jpeg_fallback error=%s", jobID, imageLogDuration(durationMs), security.SanitizeLog(buildErr.Error()))
+ _ = h.db.MarkImageJobFailed(ctx, jobID, buildErr.Error(), durationMs)
+ return
+ }
+ log.Printf("[image-studio] job=%d png_failed_retrying_jpeg mode=edit upstream_status=%d error=%s",
+ jobID,
+ pngStatus,
+ security.SanitizeLog(pngErr.Error()),
+ )
+ responseJSON, upstreamStatus, err = imageProxy.GenerateImageEditForAdmin(ctx, fallbackBody, apiKey)
+ if err == nil {
+ req = fallbackReq
+ rawBody = fallbackBody
+ if paramsJSON, marshalErr := json.Marshal(fallbackReq); marshalErr == nil {
+ if updateErr := h.db.UpdateImageGenerationJobParamsJSON(ctx, jobID, string(paramsJSON)); updateErr != nil {
+ logImageJobError(jobID, updateErr)
+ }
+ }
+ log.Printf("[image-studio] job=%d jpeg_fallback_succeeded mode=edit original_status=%d upstream_status=%d body_bytes=%d",
+ jobID,
+ pngStatus,
+ upstreamStatus,
+ len(rawBody),
+ )
+ } else {
+ err = fmt.Errorf("PNG 生成失败: %s;自动改用 JPEG 重试仍失败: %w", pngErr.Error(), err)
+ }
+ }
+ durationMs := int(time.Since(start).Milliseconds())
+ if err != nil {
+ log.Printf("[image-studio] job=%d failed mode=edit duration=%s upstream_status=%d error=%s",
+ jobID,
+ imageLogDuration(durationMs),
+ upstreamStatus,
+ security.SanitizeLog(err.Error()),
+ )
+ _ = h.db.MarkImageJobFailed(ctx, jobID, err.Error(), durationMs)
+ return
+ }
+ log.Printf("[image-studio] job=%d upstream completed mode=edit duration=%s upstream_status=%d response_bytes=%d",
+ jobID,
+ imageLogDuration(durationMs),
+ upstreamStatus,
+ len(responseJSON),
+ )
+
+ assets, err := h.saveImageJobAssets(ctx, jobID, req, responseJSON)
+ if err != nil {
+ log.Printf("[image-studio] job=%d failed mode=edit duration=%s stage=save_assets error=%s", jobID, imageLogDuration(durationMs), security.SanitizeLog(err.Error()))
+ _ = h.db.MarkImageJobFailed(ctx, jobID, err.Error(), durationMs)
+ return
+ }
+ if len(assets) == 0 {
+ log.Printf("[image-studio] job=%d failed mode=edit duration=%s stage=save_assets error=%s", jobID, imageLogDuration(durationMs), "上游未返回图片")
+ _ = h.db.MarkImageJobFailed(ctx, jobID, "上游未返回图片", durationMs)
+ return
+ }
+ if err := h.db.MarkImageJobSucceeded(ctx, jobID, durationMs); err != nil {
+ logImageJobError(jobID, err)
+ }
+ log.Printf("[image-studio] job=%d succeeded mode=edit duration=%s assets=%d total_bytes=%d first_size=%s",
+ jobID,
+ imageLogDuration(durationMs),
+ len(assets),
+ imageAssetsTotalBytes(assets),
+ imageLogFirstAssetSize(assets),
+ )
+}
+
func buildAdminImageGenerationRequest(req imageGenerationJobPayload) ([]byte, error) {
body := map[string]any{
"model": req.Model,
diff --git a/admin/oauth.go b/admin/oauth.go
index c5cf25e0..4df3532b 100644
--- a/admin/oauth.go
+++ b/admin/oauth.go
@@ -224,6 +224,9 @@ func (h *Handler) ExchangeOAuthCode(c *gin.Context) {
if trimmed := strings.TrimSpace(req.ProxyURL); trimmed != "" {
proxyURL = trimmed
}
+ if proxyURL == "" {
+ proxyURL = h.store.GetProxyURL()
+ }
// Resin 临时身份用于 OAuth 兑换(新账号尚无 DBID)
resinTempID := "oauth-" + req.SessionID
@@ -391,8 +394,12 @@ func (h *Handler) OAuthCallback(c *gin.Context) {
sess.CallbackAt = time.Now()
// 执行 code exchange(Resin 临时身份)
+ proxyURL := sess.ProxyURL
+ if proxyURL == "" {
+ proxyURL = h.store.GetProxyURL()
+ }
resinTempID := "oauth-" + sessionID
- tokenResp, accountInfo, err := doOAuthCodeExchange(c.Request.Context(), code, sess.CodeVerifier, sess.RedirectURI, sess.ProxyURL, resinTempID)
+ tokenResp, accountInfo, err := doOAuthCodeExchange(c.Request.Context(), code, sess.CodeVerifier, sess.RedirectURI, proxyURL, resinTempID)
if err != nil {
sess.ExchangeResult = &oauthExchangeResult{
Success: false,
@@ -429,7 +436,7 @@ func (h *Handler) OAuthCallback(c *gin.Context) {
ctx, cancel := context.WithTimeout(c.Request.Context(), 30*time.Second)
defer cancel()
- id, err := h.db.InsertAccount(ctx, name, tokenResp.RefreshToken, sess.ProxyURL)
+ id, err := h.db.InsertAccount(ctx, name, tokenResp.RefreshToken, proxyURL)
if err != nil {
sess.ExchangeResult = &oauthExchangeResult{
Success: false,
@@ -453,7 +460,7 @@ func (h *Handler) OAuthCallback(c *gin.Context) {
go proxy.InheritLease(resinTempID, fmt.Sprintf("%d", id))
}
- newAcc := accountFromCredentialSeed(id, sess.ProxyURL, seed)
+ newAcc := accountFromCredentialSeed(id, proxyURL, seed)
h.store.AddAccount(newAcc)
email := ""
diff --git a/auth/fast_scheduler.go b/auth/fast_scheduler.go
index 992644c8..76a0094b 100644
--- a/auth/fast_scheduler.go
+++ b/auth/fast_scheduler.go
@@ -33,26 +33,31 @@ type fastSchedulerPosition struct {
// 调度策略:按健康层级分桶,桶内按调度分排序后 round-robin。
// 验证过的账号只作为同分 tie-breaker,避免历史请求量盖过额度快重置优先级。
type FastScheduler struct {
- mu sync.RWMutex
- baseLimit int64
- buckets map[AccountHealthTier][]fastSchedulerEntry
- positions map[int64]fastSchedulerPosition
- cursors [3]atomic.Uint64
- groupCheck func(apiKeyID int64, account *Account) bool
+ mu sync.RWMutex
+ baseLimit int64
+ schedulerMode string
+ buckets map[AccountHealthTier][]fastSchedulerEntry
+ positions map[int64]fastSchedulerPosition
+ cursors [3]atomic.Uint64
+ groupCheck func(apiKeyID int64, account *Account) bool
}
-func NewFastScheduler(baseLimit int64) *FastScheduler {
+func NewFastScheduler(baseLimit int64, schedulerMode string) *FastScheduler {
if baseLimit <= 0 {
baseLimit = 1
}
+ if schedulerMode == "" {
+ schedulerMode = "round_robin"
+ }
return &FastScheduler{
- baseLimit: baseLimit,
+ baseLimit: baseLimit,
+ schedulerMode: schedulerMode,
buckets: map[AccountHealthTier][]fastSchedulerEntry{
HealthTierHealthy: nil,
HealthTierWarm: nil,
HealthTierRisky: nil,
},
- positions: make(map[int64]fastSchedulerPosition),
+ positions: map[int64]fastSchedulerPosition{},
}
}
@@ -65,13 +70,67 @@ func (s *FastScheduler) SetGroupCheck(check func(apiKeyID int64, account *Accoun
s.mu.Unlock()
}
+func (s *FastScheduler) SetSchedulerMode(mode string) {
+ if s == nil {
+ return
+ }
+ s.mu.Lock()
+ defer s.mu.Unlock()
+ if mode == "" {
+ mode = "round_robin"
+ }
+ s.schedulerMode = mode
+
+ // Re-sort all tier buckets according to the new mode.
+ for _, tier := range fastSchedulerTierOrder {
+ entries := s.buckets[tier]
+ if len(entries) == 0 {
+ continue
+ }
+ if mode == "remaining_quota" {
+ sort.SliceStable(entries, func(i, j int) bool {
+ usageI := entries[i].acc.usagePercentForScheduling()
+ usageJ := entries[j].acc.usagePercentForScheduling()
+ if usageI == usageJ {
+ if entries[i].proven != entries[j].proven {
+ return entries[i].proven
+ }
+ return entries[i].dbID < entries[j].dbID
+ }
+ return usageI < usageJ
+ })
+ } else {
+ sort.SliceStable(entries, func(i, j int) bool {
+ if entries[i].dispatchScore == entries[j].dispatchScore {
+ if entries[i].proven != entries[j].proven {
+ return entries[i].proven
+ }
+ return entries[i].dbID < entries[j].dbID
+ }
+ return entries[i].dispatchScore > entries[j].dispatchScore
+ })
+ }
+ s.buckets[tier] = entries
+ s.rebuildPositionsLocked(tier)
+ }
+}
+
+func (s *FastScheduler) SchedulerMode() string {
+ if s == nil {
+ return "round_robin"
+ }
+ s.mu.RLock()
+ defer s.mu.RUnlock()
+ return s.schedulerMode
+}
+
// BuildFastScheduler 用当前 Store 快照构建一个独立 scheduler。
// 该方法不会影响现有生产流量路径,只用于 POC/benchmark/灰度验证。
func (s *Store) BuildFastScheduler() *FastScheduler {
if s == nil {
- return NewFastScheduler(1)
+ return NewFastScheduler(1, "round_robin")
}
- scheduler := NewFastScheduler(atomic.LoadInt64(&s.maxConcurrency))
+ scheduler := NewFastScheduler(atomic.LoadInt64(&s.maxConcurrency), s.GetSchedulerMode())
s.mu.RLock()
accounts := make([]*Account, len(s.accounts))
@@ -124,15 +183,29 @@ func (s *FastScheduler) Rebuild(accounts []*Account) {
if len(entries) == 0 {
continue
}
- sort.SliceStable(entries, func(i, j int) bool {
- if entries[i].dispatchScore == entries[j].dispatchScore {
- if entries[i].proven != entries[j].proven {
- return entries[i].proven
+ if s.schedulerMode == "remaining_quota" {
+ sort.SliceStable(entries, func(i, j int) bool {
+ usageI := entries[i].acc.usagePercentForScheduling()
+ usageJ := entries[j].acc.usagePercentForScheduling()
+ if usageI == usageJ {
+ if entries[i].proven != entries[j].proven {
+ return entries[i].proven
+ }
+ return entries[i].dbID < entries[j].dbID
}
- return entries[i].dbID < entries[j].dbID
- }
- return entries[i].dispatchScore > entries[j].dispatchScore
- })
+ return usageI < usageJ
+ })
+ } else {
+ sort.SliceStable(entries, func(i, j int) bool {
+ if entries[i].dispatchScore == entries[j].dispatchScore {
+ if entries[i].proven != entries[j].proven {
+ return entries[i].proven
+ }
+ return entries[i].dbID < entries[j].dbID
+ }
+ return entries[i].dispatchScore > entries[j].dispatchScore
+ })
+ }
s.buckets[tier] = entries
s.rebuildPositionsLocked(tier)
}
@@ -193,6 +266,7 @@ func (s *FastScheduler) AcquireExcludingWithFilter(apiKeyID int64, exclude map[i
defer s.mu.Unlock()
baseLimit := s.baseLimit
+ var zeroCursor atomic.Uint64
for {
changed := false
for tierIdx, tier := range fastSchedulerTierOrder {
@@ -201,7 +275,12 @@ func (s *FastScheduler) AcquireExcludingWithFilter(apiKeyID int64, exclude map[i
continue
}
- acc, stale := s.scanRangeLocked(tier, 0, len(bucket), &s.cursors[tierIdx], baseLimit, now, apiKeyID, exclude, filter)
+ cursor := &s.cursors[tierIdx]
+ if s.schedulerMode == "remaining_quota" {
+ zeroCursor.Store(0)
+ cursor = &zeroCursor
+ }
+ acc, stale := s.scanRangeLocked(tier, 0, len(bucket), cursor, baseLimit, now, apiKeyID, exclude, filter)
if acc != nil {
return acc
}
@@ -301,15 +380,29 @@ func (s *FastScheduler) insertLocked(acc *Account, now time.Time) {
dispatchScore: dispatchScore,
proven: proven,
})
- sort.SliceStable(entries, func(i, j int) bool {
- if entries[i].dispatchScore == entries[j].dispatchScore {
- if entries[i].proven != entries[j].proven {
- return entries[i].proven
+ if s.schedulerMode == "remaining_quota" {
+ sort.SliceStable(entries, func(i, j int) bool {
+ usageI := entries[i].acc.usagePercentForScheduling()
+ usageJ := entries[j].acc.usagePercentForScheduling()
+ if usageI == usageJ {
+ if entries[i].proven != entries[j].proven {
+ return entries[i].proven
+ }
+ return entries[i].dbID < entries[j].dbID
}
- return entries[i].dbID < entries[j].dbID
- }
- return entries[i].dispatchScore > entries[j].dispatchScore
- })
+ return usageI < usageJ
+ })
+ } else {
+ sort.SliceStable(entries, func(i, j int) bool {
+ if entries[i].dispatchScore == entries[j].dispatchScore {
+ if entries[i].proven != entries[j].proven {
+ return entries[i].proven
+ }
+ return entries[i].dbID < entries[j].dbID
+ }
+ return entries[i].dispatchScore > entries[j].dispatchScore
+ })
+ }
s.buckets[tier] = entries
s.rebuildPositionsLocked(tier)
}
diff --git a/auth/fast_scheduler_test.go b/auth/fast_scheduler_test.go
index 915f5f2d..440b5ee6 100644
--- a/auth/fast_scheduler_test.go
+++ b/auth/fast_scheduler_test.go
@@ -24,7 +24,7 @@ func TestFastSchedulerAcquirePrefersHealthyTier(t *testing.T) {
warm := newFastSchedulerTestAccount(1, HealthTierWarm, 90, 2)
healthy := newFastSchedulerTestAccount(2, HealthTierHealthy, 80, 2)
- scheduler := NewFastScheduler(2)
+ scheduler := NewFastScheduler(2, "round_robin")
scheduler.Rebuild([]*Account{warm, healthy})
got := scheduler.Acquire()
@@ -43,7 +43,7 @@ func TestFastSchedulerSkipsDispatchPausedAccount(t *testing.T) {
atomic.StoreInt32(&paused.DispatchPaused, 1)
fallback := newFastSchedulerTestAccount(2, HealthTierHealthy, 80, 2)
- scheduler := NewFastScheduler(2)
+ scheduler := NewFastScheduler(2, "round_robin")
scheduler.Rebuild([]*Account{paused, fallback})
got := scheduler.Acquire()
@@ -62,7 +62,7 @@ func TestFastSchedulerSkipsErrorAccount(t *testing.T) {
errored.Status = StatusError
fallback := newFastSchedulerTestAccount(2, HealthTierHealthy, 80, 2)
- scheduler := NewFastScheduler(2)
+ scheduler := NewFastScheduler(2, "round_robin")
scheduler.Rebuild([]*Account{errored, fallback})
got := scheduler.Acquire()
@@ -79,7 +79,7 @@ func TestFastSchedulerSkipsErrorAccount(t *testing.T) {
func TestFastSchedulerRespectsConcurrencyLimit(t *testing.T) {
acc := newFastSchedulerTestAccount(1, HealthTierHealthy, 100, 1)
- scheduler := NewFastScheduler(1)
+ scheduler := NewFastScheduler(1, "round_robin")
scheduler.Rebuild([]*Account{acc})
first := scheduler.Acquire()
@@ -105,7 +105,7 @@ func TestFastSchedulerRoundRobinWithinTier(t *testing.T) {
a2 := newFastSchedulerTestAccount(2, HealthTierHealthy, 100, 4)
a3 := newFastSchedulerTestAccount(3, HealthTierHealthy, 100, 4)
- scheduler := NewFastScheduler(4)
+ scheduler := NewFastScheduler(4, "round_robin")
scheduler.Rebuild([]*Account{a1, a2, a3})
var got []int64
@@ -287,7 +287,7 @@ func TestFastSchedulerAcquireExcludingRespectsAPIKeyWhitelist(t *testing.T) {
restricted.SetAllowedAPIKeyIDs([]int64{2})
fallback := newFastSchedulerTestAccount(2, HealthTierHealthy, 80, 1)
- scheduler := NewFastScheduler(1)
+ scheduler := NewFastScheduler(1, "round_robin")
scheduler.Rebuild([]*Account{restricted, fallback})
got := scheduler.AcquireExcluding(1, nil)
@@ -307,7 +307,7 @@ func TestFastSchedulerAcquireExcludingWithFilterRespectsPlanFilter(t *testing.T)
pro := newFastSchedulerTestAccount(2, HealthTierHealthy, 80, 1)
pro.PlanType = "pro"
- scheduler := NewFastScheduler(1)
+ scheduler := NewFastScheduler(1, "round_robin")
scheduler.Rebuild([]*Account{plus, pro})
got := scheduler.AcquireExcludingWithFilter(0, nil, func(acc *Account) bool {
@@ -325,7 +325,7 @@ func TestFastSchedulerAcquireExcludingWithFilterRespectsPlanFilter(t *testing.T)
func TestFastSchedulerUpdateMovesAccountBetweenBuckets(t *testing.T) {
acc := newFastSchedulerTestAccount(1, HealthTierHealthy, 100, 2)
- scheduler := NewFastScheduler(2)
+ scheduler := NewFastScheduler(2, "round_robin")
scheduler.Rebuild([]*Account{acc})
sizes := scheduler.BucketSizes()
@@ -358,7 +358,7 @@ func TestFastSchedulerUpdateMovesAccountBetweenBuckets(t *testing.T) {
func TestFastSchedulerSkipsStaleBucketEntryWithoutUpdate(t *testing.T) {
acc := newFastSchedulerTestAccount(1, HealthTierHealthy, 100, 1)
- scheduler := NewFastScheduler(1)
+ scheduler := NewFastScheduler(1, "round_robin")
scheduler.Rebuild([]*Account{acc})
acc.SetCooldownUntil(time.Now().Add(5*time.Minute), "rate_limited")
@@ -392,7 +392,7 @@ func TestFastSchedulerDispatchScoreOutranksProvenHistory(t *testing.T) {
proven := newFastSchedulerTestAccount(2, HealthTierHealthy, 100, 1)
atomic.StoreInt64(&proven.TotalRequests, 11)
- scheduler := NewFastScheduler(1)
+ scheduler := NewFastScheduler(1, "round_robin")
scheduler.Rebuild([]*Account{highScore, proven})
got := scheduler.Acquire()
@@ -413,7 +413,7 @@ func TestFastSchedulerProvenHistoryBreaksDispatchScoreTies(t *testing.T) {
proven := newFastSchedulerTestAccount(2, HealthTierHealthy, 100, 1)
atomic.StoreInt64(&proven.TotalRequests, 11)
- scheduler := NewFastScheduler(1)
+ scheduler := NewFastScheduler(1, "round_robin")
scheduler.Rebuild([]*Account{unproven, proven})
got := scheduler.Acquire()
@@ -442,7 +442,7 @@ func TestFastSchedulerPrefersPremium7dResetSoonOverProvenAccount(t *testing.T) {
soon.UsagePercent7dValid = true
soon.Reset7dAt = now.Add(36 * time.Hour)
- scheduler := NewFastScheduler(1)
+ scheduler := NewFastScheduler(1, "round_robin")
scheduler.Rebuild([]*Account{later, soon})
got := scheduler.Acquire()
@@ -470,7 +470,7 @@ func TestFastSchedulerPrefersPremium5hResetSoonWithinTier(t *testing.T) {
soon.UsagePercent5hValid = true
soon.Reset5hAt = now.Add(30 * time.Minute)
- scheduler := NewFastScheduler(1)
+ scheduler := NewFastScheduler(1, "round_robin")
scheduler.Rebuild([]*Account{later, soon})
got := scheduler.Acquire()
@@ -600,7 +600,7 @@ func TestFastSchedulerPremium5hRateLimitIsFencedAndRecoversAfterReset(t *testing
Reset5hAt: time.Now().Add(30 * time.Minute),
}
- scheduler := NewFastScheduler(4)
+ scheduler := NewFastScheduler(4, "round_robin")
scheduler.Rebuild([]*Account{acc})
sizes := scheduler.BucketSizes()
@@ -784,10 +784,189 @@ func TestFastSchedulerRelease(t *testing.T) {
acc := newFastSchedulerTestAccount(1, HealthTierHealthy, 100, 2)
atomic.StoreInt64(&acc.ActiveRequests, 1)
- scheduler := NewFastScheduler(2)
+ scheduler := NewFastScheduler(2, "round_robin")
scheduler.Release(acc)
if got := atomic.LoadInt64(&acc.ActiveRequests); got != 0 {
t.Fatalf("ActiveRequests after Release() = %d, want 0", got)
}
}
+
+func TestFastSchedulerRemainingQuotaPicksLowestUsage(t *testing.T) {
+ highUsage := &Account{
+ DBID: 1,
+ AccessToken: "token",
+ Status: StatusReady,
+ HealthTier: HealthTierHealthy,
+ UsagePercent7d: 90,
+ UsagePercent7dValid: true,
+ BaseConcurrencyEffective: 4,
+ DynamicConcurrencyLimit: 4,
+ }
+ lowUsage := &Account{
+ DBID: 2,
+ AccessToken: "token",
+ Status: StatusReady,
+ HealthTier: HealthTierHealthy,
+ UsagePercent7d: 10,
+ UsagePercent7dValid: true,
+ BaseConcurrencyEffective: 4,
+ DynamicConcurrencyLimit: 4,
+ }
+ midUsage := &Account{
+ DBID: 3,
+ AccessToken: "token",
+ Status: StatusReady,
+ HealthTier: HealthTierHealthy,
+ UsagePercent7d: 50,
+ UsagePercent7dValid: true,
+ BaseConcurrencyEffective: 4,
+ DynamicConcurrencyLimit: 4,
+ }
+
+ scheduler := NewFastScheduler(4, "remaining_quota")
+ scheduler.Rebuild([]*Account{highUsage, lowUsage, midUsage})
+
+ got := scheduler.Acquire()
+ if got == nil {
+ t.Fatal("Acquire() returned nil")
+ }
+ defer scheduler.Release(got)
+
+ if got.DBID != lowUsage.DBID {
+ t.Fatalf("Acquire() picked dbID=%d, want lowest-usage account %d", got.DBID, lowUsage.DBID)
+ }
+}
+
+func TestFastSchedulerRemainingQuotaSortOrder(t *testing.T) {
+ a1 := &Account{
+ DBID: 1,
+ AccessToken: "token",
+ Status: StatusReady,
+ HealthTier: HealthTierHealthy,
+ UsagePercent7d: 70,
+ UsagePercent7dValid: true,
+ BaseConcurrencyEffective: 1,
+ DynamicConcurrencyLimit: 1,
+ }
+ a2 := &Account{
+ DBID: 2,
+ AccessToken: "token",
+ Status: StatusReady,
+ HealthTier: HealthTierHealthy,
+ UsagePercent7d: 30,
+ UsagePercent7dValid: true,
+ BaseConcurrencyEffective: 1,
+ DynamicConcurrencyLimit: 1,
+ }
+ a3 := &Account{
+ DBID: 3,
+ AccessToken: "token",
+ Status: StatusReady,
+ HealthTier: HealthTierHealthy,
+ UsagePercent7d: 90,
+ UsagePercent7dValid: true,
+ BaseConcurrencyEffective: 1,
+ DynamicConcurrencyLimit: 1,
+ }
+
+ scheduler := NewFastScheduler(1, "remaining_quota")
+ scheduler.Rebuild([]*Account{a1, a2, a3})
+
+ // Acquire all without releasing; concurrency limit 1 forces
+ // iteration through the usage-ascending sorted order.
+ var got []int64
+ for i := 0; i < 3; i++ {
+ acc := scheduler.Acquire()
+ if acc == nil {
+ t.Fatalf("Acquire() returned nil at iteration %d", i)
+ }
+ got = append(got, acc.DBID)
+ }
+
+ // Expect ascending usage: a2 (30%), a1 (70%), a3 (90%)
+ want := []int64{2, 1, 3}
+ for i := range want {
+ if got[i] != want[i] {
+ t.Fatalf("remaining_quota sort order mismatch: got=%v want=%v", got, want)
+ }
+ }
+}
+
+func TestFastSchedulerRemainingQuotaTieBreakProvenThenDBID(t *testing.T) {
+ unproven := &Account{
+ DBID: 1,
+ AccessToken: "token",
+ Status: StatusReady,
+ HealthTier: HealthTierHealthy,
+ UsagePercent7d: 50,
+ UsagePercent7dValid: true,
+ BaseConcurrencyEffective: 2,
+ DynamicConcurrencyLimit: 2,
+ }
+ unproven.TotalRequests = 0 // not proven
+
+ proven := &Account{
+ DBID: 2,
+ AccessToken: "token",
+ Status: StatusReady,
+ HealthTier: HealthTierHealthy,
+ UsagePercent7d: 50,
+ UsagePercent7dValid: true,
+ BaseConcurrencyEffective: 2,
+ DynamicConcurrencyLimit: 2,
+ }
+ proven.TotalRequests = 11 // proven (>10)
+
+ scheduler := NewFastScheduler(4, "remaining_quota")
+ scheduler.Rebuild([]*Account{unproven, proven})
+
+ // Both same usage; proven account should sort first.
+ got := scheduler.Acquire()
+ if got == nil {
+ t.Fatal("Acquire() returned nil")
+ }
+ defer scheduler.Release(got)
+
+ if got.DBID != proven.DBID {
+ t.Fatalf("Acquire() picked dbID=%d, want proven tie-breaker account %d", got.DBID, proven.DBID)
+ }
+}
+
+func TestFastSchedulerSetSchedulerModeEmptyDefaultsToRoundRobin(t *testing.T) {
+ s := NewFastScheduler(4, "remaining_quota")
+ if s.SchedulerMode() != "remaining_quota" {
+ t.Fatalf("initial mode = %q, want remaining_quota", s.SchedulerMode())
+ }
+ s.SetSchedulerMode("")
+ if s.SchedulerMode() != "round_robin" {
+ t.Fatalf("after empty mode: got %q, want round_robin", s.SchedulerMode())
+ }
+}
+
+func TestFastSchedulerSetSchedulerModeResortsBuckets(t *testing.T) {
+ highUsage := newFastSchedulerTestAccount(1, HealthTierHealthy, 90, 2)
+ highUsage.UsagePercent7d = 90
+ highUsage.UsagePercent7dValid = true
+ lowUsage := newFastSchedulerTestAccount(2, HealthTierHealthy, 10, 2)
+ lowUsage.UsagePercent7d = 10
+ lowUsage.UsagePercent7dValid = true
+
+ s := NewFastScheduler(4, "round_robin")
+ s.Rebuild([]*Account{highUsage, lowUsage})
+
+ // In round_robin mode, acquires are cursor-based, not usage-based
+ first := s.Acquire()
+ s.Release(first)
+
+ s.SetSchedulerMode("remaining_quota")
+ // After re-sort + zero cursor, should pick lowest usage first
+ first = s.Acquire()
+ if first == nil {
+ t.Fatal("Acquire() returned nil after mode switch")
+ }
+ if first.DBID != lowUsage.DBID {
+ t.Fatalf("after remaining_quota switch, Acquire() picked dbID=%d, want lowest-usage account %d", first.DBID, lowUsage.DBID)
+ }
+ s.Release(first)
+}
diff --git a/auth/store.go b/auth/store.go
index 4420b744..b1ed11d8 100644
--- a/auth/store.go
+++ b/auth/store.go
@@ -110,6 +110,8 @@ type Account struct {
// per-account 调度配置(nil = 跟随默认)
ScoreBiasOverride *int64
BaseConcurrencyOverride *int64
+ CreditEnabled bool // 信用账号标记
+ CreditSkipUsageWindow bool // 跳过用量窗口惩罚
AllowedAPIKeyIDs []int64
allowedAPIKeySet map[int64]struct{}
Tags []string
@@ -617,7 +619,7 @@ func (a *Account) schedulerBreakdownLocked(now time.Time) SchedulerBreakdown {
}
}
- if a.UsagePercent7dValid && strings.EqualFold(a.PlanType, "free") {
+ if !(a.CreditEnabled && a.CreditSkipUsageWindow) && a.UsagePercent7dValid && strings.EqualFold(a.PlanType, "free") {
switch {
case a.UsagePercent7d >= 100:
breakdown.UsagePenalty7d = 40
@@ -801,7 +803,7 @@ func (a *Account) recomputeSchedulerLocked(baseLimit int64) {
if !a.LastUnauthorizedAt.IsZero() && now.Sub(a.LastUnauthorizedAt) < 24*time.Hour && tier == HealthTierHealthy {
tier = HealthTierWarm
}
- if a.UsagePercent7dValid && strings.EqualFold(a.PlanType, "free") {
+ if !(a.CreditEnabled && a.CreditSkipUsageWindow) && a.UsagePercent7dValid && strings.EqualFold(a.PlanType, "free") {
switch {
case a.UsagePercent7d >= 95:
tier = HealthTierRisky
@@ -880,6 +882,9 @@ func (a *Account) IsAvailable() bool {
// usageExhaustedLocked 判断 Free 账号 7d 用量是否已耗尽(需持有 mu 读锁)
func (a *Account) usageExhaustedLocked() bool {
+ if a.CreditEnabled && a.CreditSkipUsageWindow {
+ return false
+ }
return a.UsagePercent7dValid && strings.EqualFold(a.PlanType, "free") && a.UsagePercent7d >= 100
}
@@ -1014,6 +1019,16 @@ func (a *Account) GetUsagePercent7d() (float64, bool) {
return a.UsagePercent7d, a.UsagePercent7dValid
}
+// usagePercentForScheduling 返回调度排序用的用量百分比(7d 窗口有效则返回,否则 0)。
+func (a *Account) usagePercentForScheduling() float64 {
+ a.mu.RLock()
+ defer a.mu.RUnlock()
+ if a.UsagePercent7dValid {
+ return a.UsagePercent7d
+ }
+ return 0
+}
+
// SetUsageSnapshot5h 更新 5h 用量快照
func (a *Account) SetUsageSnapshot5h(pct float64, resetAt time.Time) {
a.mu.Lock()
@@ -1491,6 +1506,7 @@ type Store struct {
allowRemoteMigration atomic.Bool // 是否允许远程迁移拉取账号
modelMapping atomic.Value // 模型映射 JSON 字符串
+ schedulerMode atomic.Value // string: "round_robin" or "remaining_quota"
promptFilterConfig atomic.Value // promptfilter.Config
sessionMu sync.RWMutex
sessionBindings map[string]sessionAffinity
@@ -1841,6 +1857,7 @@ func NewStore(db *database.DB, tc cache.TokenCache, settings *database.SystemSet
RecoveryProbeIntervalMinutes: 30,
ProxyURL: "",
MaxRateLimitRetries: 1,
+ SchedulerMode: "round_robin",
}
}
s := &Store{
@@ -1874,6 +1891,7 @@ func NewStore(db *database.DB, tc cache.TokenCache, settings *database.SystemSet
}
atomic.StoreInt64(&s.maxRateLimitRetries, rateLimitRetries)
s.allowRemoteMigration.Store(settings.AllowRemoteMigration)
+ s.schedulerMode.Store(settings.SchedulerMode)
if settings.ModelMapping != "" {
s.modelMapping.Store(settings.ModelMapping)
}
@@ -1882,7 +1900,7 @@ func NewStore(db *database.DB, tc cache.TokenCache, settings *database.SystemSet
fastEnabled := fastSchedulerEnabledFromEnv() || settings.FastSchedulerEnabled
s.fastSchedulerEnabled.Store(fastEnabled)
if fastEnabled {
- s.fastScheduler.Store(NewFastScheduler(int64(settings.MaxConcurrency)))
+ s.fastScheduler.Store(NewFastScheduler(int64(settings.MaxConcurrency), s.GetSchedulerMode()))
log.Printf("快速调度器已启用(请求热路径将优先走本地内存调度器)")
}
@@ -2281,6 +2299,8 @@ func (s *Store) loadFromDB(ctx context.Context) error {
if !row.Enabled {
atomic.StoreInt32(&account.DispatchPaused, 1)
}
+ account.CreditEnabled = row.CreditEnabled
+ account.CreditSkipUsageWindow = row.CreditSkipUsageWindow
if row.Status == "error" {
account.Status = StatusError
account.ErrorMsg = row.ErrorMessage
@@ -2844,6 +2864,10 @@ func (s *Store) Release(acc *Account) {
// SetMaxConcurrency 动态更新每账号并发上限
func (s *Store) SetMaxConcurrency(n int) {
atomic.StoreInt64(&s.maxConcurrency, int64(n))
+ // Update existing scheduler's base limit in-place before full rebuild.
+ if scheduler := s.getFastScheduler(); scheduler != nil {
+ scheduler.SetBaseLimit(int64(n))
+ }
s.recomputeAllAccountSchedulerState()
s.rebuildFastScheduler()
}
@@ -2938,6 +2962,28 @@ func (s *Store) GetModelMapping() string {
return "{}"
}
+// GetSchedulerMode 获取当前调度模式
+func (s *Store) GetSchedulerMode() string {
+ if v, ok := s.schedulerMode.Load().(string); ok {
+ return v
+ }
+ return "round_robin"
+}
+
+// SetSchedulerMode 设置调度模式并传播到 FastScheduler
+func (s *Store) SetSchedulerMode(mode string) {
+ switch mode {
+ case "round_robin", "remaining_quota":
+ // ok
+ default:
+ mode = "round_robin"
+ }
+ s.schedulerMode.Store(mode)
+ if scheduler := s.getFastScheduler(); scheduler != nil {
+ scheduler.SetSchedulerMode(mode)
+ }
+}
+
func promptFilterConfigFromSettings(settings *database.SystemSettings) promptfilter.Config {
cfg := promptfilter.DefaultConfig()
if settings == nil {
@@ -3070,6 +3116,31 @@ func (s *Store) ApplyAccountGroups(dbID int64, groupIDs []int64) bool {
return true
}
+// UpdateAccountCredit 更新账号信用设置
+// 传入 nil 表示不修改该字段。
+func (s *Store) UpdateAccountCredit(dbID int64, creditEnabled, creditSkipUsageWindow *bool) error {
+ acc := s.FindByID(dbID)
+ if acc == nil {
+ return fmt.Errorf("账号 %d 不存在", dbID)
+ }
+ ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+ defer cancel()
+ if err := s.db.UpdateAccountCredit(ctx, dbID, creditEnabled, creditSkipUsageWindow); err != nil {
+ return err
+ }
+ acc.mu.Lock()
+ if creditEnabled != nil {
+ acc.CreditEnabled = *creditEnabled
+ }
+ if creditSkipUsageWindow != nil {
+ acc.CreditSkipUsageWindow = *creditSkipUsageWindow
+ }
+ acc.recomputeSchedulerLocked(atomic.LoadInt64(&s.maxConcurrency))
+ acc.mu.Unlock()
+ s.fastSchedulerUpdate(acc)
+ return nil
+}
+
func (s *Store) ApplyAccountGroupMemberships(memberships map[int64][]int64) {
for _, acc := range s.Accounts() {
acc.mu.Lock()
diff --git a/database/billing.go b/database/billing.go
index d2a9c557..826a9329 100644
--- a/database/billing.go
+++ b/database/billing.go
@@ -2,14 +2,22 @@ package database
import "strings"
-// ModelPricing 模型价格配置(每百万 token 的价格,单位:美元)
+const longContextThreshold = 272000
+
type ModelPricing struct {
- InputPricePerMToken float64 // 输入价格(美元/百万token)
- InputPricePerMTokenPriority float64 // priority service tier 输入价格
- OutputPricePerMToken float64 // 输出价格(美元/百万token)
- OutputPricePerMTokenPriority float64 // priority service tier 输出价格
- CacheReadPricePerMToken float64 // 缓存命中输入价格
- CacheReadPricePerMTokenPriority float64 // priority service tier 缓存命中输入价格
+ InputPricePerMToken float64
+ InputPricePerMTokenPriority float64
+ OutputPricePerMToken float64
+ OutputPricePerMTokenPriority float64
+ CacheReadPricePerMToken float64
+ CacheReadPricePerMTokenPriority float64
+
+ LongInputPricePerMToken float64
+ LongInputPricePerMTokenPriority float64
+ LongOutputPricePerMToken float64
+ LongOutputPricePerMTokenPriority float64
+ LongCacheReadPricePerMToken float64
+ LongCacheReadPricePerMTokenPriority float64
}
type modelPricingRule struct {
@@ -17,29 +25,44 @@ type modelPricingRule struct {
pricing ModelPricing
}
-type costBreakdown struct {
- InputCost float64
- OutputCost float64
- CacheReadCost float64
- TotalCost float64
- InputPricePerMToken float64
- OutputPricePerMToken float64
- CacheReadPricePerMToken float64
- ServiceTierCostMultiplier float64
+type CostBreakdown struct {
+ InputCost float64 `json:"input_cost"`
+ OutputCost float64 `json:"output_cost"`
+ CacheReadCost float64 `json:"cache_read_cost"`
+ TotalCost float64 `json:"total_cost"`
+ InputPricePerMToken float64 `json:"input_price_per_mtoken"`
+ OutputPricePerMToken float64 `json:"output_price_per_mtoken"`
+ CacheReadPricePerMToken float64 `json:"cache_read_price_per_mtoken"`
+ ServiceTierCostMultiplier float64 `json:"service_tier_cost_multiplier"`
}
var (
defaultModelPricing = &ModelPricing{InputPricePerMToken: 1.0, OutputPricePerMToken: 2.0}
modelPricingRules = []modelPricingRule{
- // Codex/GPT-5 系列,参考 sub2api 的动态定价优先、fallback 兜底策略。
{model: "gpt-5.5", pricing: ModelPricing{
- InputPricePerMToken: 2.5,
- InputPricePerMTokenPriority: 5.0,
- OutputPricePerMToken: 15.0,
- OutputPricePerMTokenPriority: 30.0,
- CacheReadPricePerMToken: 0.25,
- CacheReadPricePerMTokenPriority: 0.5,
+ InputPricePerMToken: 5.0,
+ InputPricePerMTokenPriority: 12.5,
+ OutputPricePerMToken: 30.0,
+ OutputPricePerMTokenPriority: 75.0,
+ CacheReadPricePerMToken: 0.5,
+ CacheReadPricePerMTokenPriority: 1.25,
+ LongInputPricePerMToken: 10.0,
+ LongInputPricePerMTokenPriority: 25.0,
+ LongOutputPricePerMToken: 45.0,
+ LongOutputPricePerMTokenPriority: 112.5,
+ LongCacheReadPricePerMToken: 1.0,
+ LongCacheReadPricePerMTokenPriority: 2.5,
+ }},
+ {model: "gpt-5.5-pro", pricing: ModelPricing{
+ InputPricePerMToken: 30.0,
+ InputPricePerMTokenPriority: 75.0,
+ OutputPricePerMToken: 180.0,
+ OutputPricePerMTokenPriority: 450.0,
+ LongInputPricePerMToken: 60.0,
+ LongInputPricePerMTokenPriority: 150.0,
+ LongOutputPricePerMToken: 270.0,
+ LongOutputPricePerMTokenPriority: 675.0,
}},
{model: "gpt-5.4-mini", pricing: ModelPricing{InputPricePerMToken: 0.75, OutputPricePerMToken: 4.5, CacheReadPricePerMToken: 0.075}},
{model: "gpt-5.4-nano", pricing: ModelPricing{InputPricePerMToken: 0.2, OutputPricePerMToken: 1.25, CacheReadPricePerMToken: 0.02}},
@@ -50,6 +73,22 @@ var (
OutputPricePerMTokenPriority: 30.0,
CacheReadPricePerMToken: 0.25,
CacheReadPricePerMTokenPriority: 0.5,
+ LongInputPricePerMToken: 5.0,
+ LongInputPricePerMTokenPriority: 10.0,
+ LongOutputPricePerMToken: 22.5,
+ LongOutputPricePerMTokenPriority: 45.0,
+ LongCacheReadPricePerMToken: 0.5,
+ LongCacheReadPricePerMTokenPriority: 1.0,
+ }},
+ {model: "gpt-5.4-pro", pricing: ModelPricing{
+ InputPricePerMToken: 30.0,
+ InputPricePerMTokenPriority: 75.0,
+ OutputPricePerMToken: 180.0,
+ OutputPricePerMTokenPriority: 450.0,
+ LongInputPricePerMToken: 60.0,
+ LongInputPricePerMTokenPriority: 150.0,
+ LongOutputPricePerMToken: 270.0,
+ LongOutputPricePerMTokenPriority: 675.0,
}},
{model: "gpt-5.3-codex-spark", pricing: ModelPricing{
InputPricePerMToken: 1.25,
@@ -75,8 +114,6 @@ var (
CacheReadPricePerMToken: 0.175,
CacheReadPricePerMTokenPriority: 0.35,
}},
-
- // GPT-4 系列。保持最具体模型优先,避免 gpt-4o-mini 被 gpt-4o/gpt-4 抢先匹配。
{model: "gpt-4o-mini", pricing: ModelPricing{InputPricePerMToken: 0.15, OutputPricePerMToken: 0.6}},
{model: "gpt-4o", pricing: ModelPricing{InputPricePerMToken: 2.5, OutputPricePerMToken: 10.0}},
{model: "gpt-4-turbo", pricing: ModelPricing{InputPricePerMToken: 10.0, OutputPricePerMToken: 30.0}},
@@ -85,9 +122,7 @@ var (
}
)
-// getModelPricing 获取模型价格配置
-// 优先使用确定性的模型族匹配,避免 Go map 迭代顺序导致重叠前缀随机命中。
-func getModelPricing(model string) *ModelPricing {
+func GetModelPricing(model string) *ModelPricing {
normalized := normalizeBillingModelName(model)
if pricing := claudeFamilyPricing(normalized); pricing != nil {
return pricing
@@ -104,31 +139,42 @@ func getModelPricing(model string) *ModelPricing {
return defaultModelPricing
}
-// calculateCost 计算使用费用
-// inputTokens: 输入 token 数量
-// outputTokens: 输出 token 数量
-// model: 模型名称
-// 返回:账号计费金额(美元)
-func calculateCost(inputTokens, outputTokens, cachedTokens int, model string, serviceTier string) float64 {
- return calculateCostBreakdown(inputTokens, outputTokens, cachedTokens, model, serviceTier).TotalCost
+func CalculateCost(inputTokens, outputTokens, cachedTokens int, model string, serviceTier string) float64 {
+ return CalculateCostBreakdown(inputTokens, outputTokens, cachedTokens, model, serviceTier).TotalCost
}
-func calculateCostBreakdown(inputTokens, outputTokens, cachedTokens int, model string, serviceTier string) costBreakdown {
- pricing := getModelPricing(model)
+func CalculateCostBreakdown(inputTokens, outputTokens, cachedTokens int, model string, serviceTier string) CostBreakdown {
+ pricing := GetModelPricing(model)
+ isLong := inputTokens > longContextThreshold
+
inputPrice := pricing.InputPricePerMToken
outputPrice := pricing.OutputPricePerMToken
cacheReadPrice := pricing.CacheReadPricePerMToken
+ if isLong && pricing.LongInputPricePerMToken > 0 {
+ inputPrice = pricing.LongInputPricePerMToken
+ outputPrice = pricing.LongOutputPricePerMToken
+ if pricing.LongCacheReadPricePerMToken > 0 {
+ cacheReadPrice = pricing.LongCacheReadPricePerMToken
+ }
+ }
+
tierMultiplier := serviceTierCostMultiplier(serviceTier)
if usePriorityPricing(serviceTier, pricing) {
tierMultiplier = 1
- if pricing.InputPricePerMTokenPriority > 0 {
+ if isLong && pricing.LongInputPricePerMTokenPriority > 0 {
+ inputPrice = pricing.LongInputPricePerMTokenPriority
+ } else if pricing.InputPricePerMTokenPriority > 0 {
inputPrice = pricing.InputPricePerMTokenPriority
}
- if pricing.OutputPricePerMTokenPriority > 0 {
+ if isLong && pricing.LongOutputPricePerMTokenPriority > 0 {
+ outputPrice = pricing.LongOutputPricePerMTokenPriority
+ } else if pricing.OutputPricePerMTokenPriority > 0 {
outputPrice = pricing.OutputPricePerMTokenPriority
}
- if pricing.CacheReadPricePerMTokenPriority > 0 {
+ if isLong && pricing.LongCacheReadPricePerMTokenPriority > 0 {
+ cacheReadPrice = pricing.LongCacheReadPricePerMTokenPriority
+ } else if pricing.CacheReadPricePerMTokenPriority > 0 {
cacheReadPrice = pricing.CacheReadPricePerMTokenPriority
}
}
@@ -149,7 +195,7 @@ func calculateCostBreakdown(inputTokens, outputTokens, cachedTokens int, model s
cacheReadCost := float64(cachedTokens) / 1000000.0 * cacheReadPrice
outputCost := float64(outputTokens) / 1000000.0 * outputPrice
- return costBreakdown{
+ return CostBreakdown{
InputCost: inputCost * tierMultiplier,
OutputCost: outputCost * tierMultiplier,
CacheReadCost: cacheReadCost * tierMultiplier,
@@ -178,14 +224,18 @@ func normalizeBillingModelName(model string) string {
}
func normalizeCodexBillingModel(model string) (string, bool) {
- compact := strings.NewReplacer(" ", "-", "_", "-").Replace(model)
+ compact := strings.NewReplacer(" ", "-", "_", "-").Replace(strings.ToLower(model))
switch {
+ case strings.Contains(compact, "gpt-5.5-pro") || strings.Contains(compact, "gpt5-5-pro") || strings.Contains(compact, "gpt5.5-pro"):
+ return "gpt-5.5-pro", true
case strings.Contains(compact, "gpt-5.5") || strings.Contains(compact, "gpt5-5") || strings.Contains(compact, "gpt5.5"):
return "gpt-5.5", true
case strings.Contains(compact, "gpt-5.4-mini") || strings.Contains(compact, "gpt5-4-mini") || strings.Contains(compact, "gpt5.4-mini"):
return "gpt-5.4-mini", true
case strings.Contains(compact, "gpt-5.4-nano") || strings.Contains(compact, "gpt5-4-nano") || strings.Contains(compact, "gpt5.4-nano"):
return "gpt-5.4-nano", true
+ case strings.Contains(compact, "gpt-5.4-pro") || strings.Contains(compact, "gpt5-4-pro") || strings.Contains(compact, "gpt5.4-pro"):
+ return "gpt-5.4-pro", true
case strings.Contains(compact, "gpt-5.4") || strings.Contains(compact, "gpt5-4") || strings.Contains(compact, "gpt5.4"):
return "gpt-5.4", true
case strings.Contains(compact, "gpt-5.2") || strings.Contains(compact, "gpt5-2") || strings.Contains(compact, "gpt5.2"):
@@ -196,6 +246,12 @@ func normalizeCodexBillingModel(model string) (string, bool) {
return "gpt-5.3-codex", true
case strings.Contains(compact, "gpt-5.3") || strings.Contains(compact, "gpt5-3") || strings.Contains(compact, "gpt5.3"):
return "gpt-5.3-codex", true
+ case strings.Contains(compact, "codex-auto-review"):
+ // Codex internal auto-review model. ChatGPT backend API only
+ // (chatgpt.com/backend-api/codex). Not available via public API.
+ // Official catalog: Plus/Pro/Team/Business only, excludes free.
+ // Specs match gpt-5.4 (272K context, 4 thinking levels).
+ return "gpt-5.4", true
case strings.Contains(compact, "codex"):
return "gpt-5.3-codex", true
case strings.Contains(compact, "gpt-5") || strings.Contains(compact, "gpt5"):
@@ -292,3 +348,12 @@ func serviceTierCostMultiplier(serviceTier string) float64 {
func normalizeServiceTier(serviceTier string) string {
return strings.ToLower(strings.TrimSpace(serviceTier))
}
+
+// lowercase aliases for internal callers
+func calculateCost(inputTokens, outputTokens, cachedTokens int, model string, serviceTier string) float64 {
+ return CalculateCost(inputTokens, outputTokens, cachedTokens, model, serviceTier)
+}
+
+func calculateCostBreakdown(inputTokens, outputTokens, cachedTokens int, model string, serviceTier string) CostBreakdown {
+ return CalculateCostBreakdown(inputTokens, outputTokens, cachedTokens, model, serviceTier)
+}
diff --git a/database/billing_test.go b/database/billing_test.go
index acc8a215..e7d65396 100644
--- a/database/billing_test.go
+++ b/database/billing_test.go
@@ -18,7 +18,7 @@ func TestGetModelPricingUsesMostSpecificOpenAIPrefix(t *testing.T) {
for _, tt := range tests {
t.Run(tt.model, func(t *testing.T) {
- got := getModelPricing(tt.model)
+ got := GetModelPricing(tt.model)
assertPricing(t, got, tt.wantInput, tt.wantOutput)
})
}
@@ -33,12 +33,12 @@ func TestGetModelPricingUsesSub2APICodexFallbacks(t *testing.T) {
{model: "gpt-5.4-mini-20260401", wantInput: 0.75, wantOutput: 4.5},
{model: "gpt-5.3-codex-spark", wantInput: 1.25, wantOutput: 10.0},
{model: "gpt-5.3-codex", wantInput: 1.75, wantOutput: 14.0},
- {model: "gpt-5.5", wantInput: 2.5, wantOutput: 15.0},
+ {model: "gpt-5.5", wantInput: 5.0, wantOutput: 30.0},
}
for _, tt := range tests {
t.Run(tt.model, func(t *testing.T) {
- got := getModelPricing(tt.model)
+ got := GetModelPricing(tt.model)
assertPricing(t, got, tt.wantInput, tt.wantOutput)
})
}
@@ -59,7 +59,7 @@ func TestGetModelPricingUsesSub2APIClaudeFamilies(t *testing.T) {
for _, tt := range tests {
t.Run(tt.model, func(t *testing.T) {
- got := getModelPricing(tt.model)
+ got := GetModelPricing(tt.model)
assertPricing(t, got, tt.wantInput, tt.wantOutput)
})
}
@@ -84,7 +84,7 @@ func TestCalculateCostHandlesCachedTokensAndServiceTier(t *testing.T) {
want: 0.00955,
},
{
- name: "keeps legacy input price when cache pricing is unavailable",
+ name: "without cache pricing (gpt-4o)",
model: "gpt-4o",
inputTokens: 1000,
outputTokens: 500,
@@ -134,20 +134,22 @@ func TestCalculateCostBreakdownExposesDisplayFields(t *testing.T) {
assertFloatEqual(t, got.ServiceTierCostMultiplier, 0.5)
}
-func TestGPT55PricingMatchesGPT54Fallback(t *testing.T) {
- gpt54 := getModelPricing("gpt-5.4")
- gpt55 := getModelPricing("gpt-5.5")
+func TestGPT55PricingDoesNotMatchGPT54(t *testing.T) {
+ gpt54 := GetModelPricing("gpt-5.4")
+ gpt55 := GetModelPricing("gpt-5.5")
- assertFloatEqual(t, gpt55.InputPricePerMToken, gpt54.InputPricePerMToken)
- assertFloatEqual(t, gpt55.OutputPricePerMToken, gpt54.OutputPricePerMToken)
- assertFloatEqual(t, gpt55.CacheReadPricePerMToken, gpt54.CacheReadPricePerMToken)
- assertFloatEqual(t, gpt55.InputPricePerMTokenPriority, gpt54.InputPricePerMTokenPriority)
- assertFloatEqual(t, gpt55.OutputPricePerMTokenPriority, gpt54.OutputPricePerMTokenPriority)
- assertFloatEqual(t, gpt55.CacheReadPricePerMTokenPriority, gpt54.CacheReadPricePerMTokenPriority)
+ // gpt-5.5 is 2x gpt-5.4: $5/$30 vs $2.5/$15
+ assertFloatEqual(t, gpt55.InputPricePerMToken, 5.0)
+ assertFloatEqual(t, gpt55.OutputPricePerMToken, 30.0)
+ assertFloatEqual(t, gpt55.CacheReadPricePerMToken, 0.5)
+ assertFloatEqual(t, gpt55.InputPricePerMTokenPriority, 12.5)
+ assertFloatEqual(t, gpt55.OutputPricePerMTokenPriority, 75.0)
+ assertFloatEqual(t, gpt55.CacheReadPricePerMTokenPriority, 1.25)
+ _ = gpt54
}
func TestSparkPricingUsesGpt51CodexFallback(t *testing.T) {
- spark := getModelPricing("gpt-5.3-codex-spark-high")
+ spark := GetModelPricing("gpt-5.3-codex-spark-high")
assertFloatEqual(t, spark.InputPricePerMToken, 1.25)
assertFloatEqual(t, spark.OutputPricePerMToken, 10.0)
@@ -158,8 +160,8 @@ func TestSparkPricingUsesGpt51CodexFallback(t *testing.T) {
}
func TestGPT53CodexPricingUsesGPT52CodexFallback(t *testing.T) {
- codex := getModelPricing("gpt-5.3-codex-xhigh")
- gpt52 := getModelPricing("gpt-5.2")
+ codex := GetModelPricing("gpt-5.3-codex-xhigh")
+ gpt52 := GetModelPricing("gpt-5.2")
assertFloatEqual(t, codex.InputPricePerMToken, gpt52.InputPricePerMToken)
assertFloatEqual(t, codex.OutputPricePerMToken, gpt52.OutputPricePerMToken)
@@ -171,7 +173,7 @@ func TestGPT53CodexPricingUsesGPT52CodexFallback(t *testing.T) {
func TestUsageLogBreakdownScalesToStoredBilledTotal(t *testing.T) {
log := &UsageLog{
- Model: "gpt-5.5",
+ Model: "gpt-5.4",
InputTokens: 1000,
StatusCode: 200,
AccountBilled: 0.0025,
@@ -188,7 +190,7 @@ func TestUsageLogBreakdownScalesToStoredBilledTotal(t *testing.T) {
func assertPricing(t *testing.T, got *ModelPricing, wantInput, wantOutput float64) {
t.Helper()
if got == nil {
- t.Fatal("getModelPricing returned nil")
+ t.Fatal("GetModelPricing returned nil")
}
if math.Abs(got.InputPricePerMToken-wantInput) > 1e-12 || math.Abs(got.OutputPricePerMToken-wantOutput) > 1e-12 {
t.Fatalf("pricing = input %.12f output %.12f, want input %.12f output %.12f",
@@ -202,3 +204,122 @@ func assertFloatEqual(t *testing.T, got, want float64) {
t.Fatalf("got %.12f, want %.12f", got, want)
}
}
+
+func TestProModelsHaveCorrectPricing(t *testing.T) {
+ p55 := GetModelPricing("gpt-5.5-pro")
+ if p55.InputPricePerMToken != 30.0 || p55.OutputPricePerMToken != 180.0 {
+ t.Fatalf("gpt-5.5-pro pricing = input %.1f output %.1f, want input 30.0 output 180.0",
+ p55.InputPricePerMToken, p55.OutputPricePerMToken)
+ }
+
+ p54 := GetModelPricing("gpt-5.4-pro")
+ if p54.InputPricePerMToken != 30.0 || p54.OutputPricePerMToken != 180.0 {
+ t.Fatalf("gpt-5.4-pro pricing = input %.1f output %.1f, want input 30.0 output 180.0",
+ p54.InputPricePerMToken, p54.OutputPricePerMToken)
+ }
+
+ // Verify pro is NOT same as standard
+ std55 := GetModelPricing("gpt-5.5")
+ if std55.InputPricePerMToken == p55.InputPricePerMToken {
+ t.Fatal("gpt-5.5-pro should have different pricing from gpt-5.5")
+ }
+}
+
+func TestLongContextPricingTriggersAbove272KTokens(t *testing.T) {
+ // Below threshold: standard pricing.
+ std := CalculateCostBreakdown(272000, 1000, 0, "gpt-5.4", "")
+ assertFloatEqual(t, std.InputPricePerMToken, 2.5)
+ assertFloatEqual(t, std.OutputPricePerMToken, 15.0)
+
+ // Above threshold: long context premium pricing.
+ long := CalculateCostBreakdown(272001, 1000, 0, "gpt-5.4", "")
+ assertFloatEqual(t, long.InputPricePerMToken, 5.0)
+ assertFloatEqual(t, long.OutputPricePerMToken, 22.5)
+
+ // Verify total cost is higher for long context.
+ if long.TotalCost <= std.TotalCost {
+ t.Fatalf("long context cost %.12f should be > standard cost %.12f", long.TotalCost, std.TotalCost)
+ }
+}
+
+func TestLongContextPricingWithPriorityTier(t *testing.T) {
+ long := CalculateCostBreakdown(300000, 1000, 200, "gpt-5.4", "priority")
+ assertFloatEqual(t, long.InputPricePerMToken, 10.0)
+ assertFloatEqual(t, long.OutputPricePerMToken, 45.0)
+ assertFloatEqual(t, long.CacheReadPricePerMToken, 1.0)
+ assertFloatEqual(t, long.ServiceTierCostMultiplier, 1.0)
+}
+
+func TestLongContextPricingDoesNotApplyWhenNoLongPricingDefined(t *testing.T) {
+ // gpt-4o has no long context pricing fields defined.
+ std := CalculateCostBreakdown(1000, 500, 0, "gpt-4o", "")
+ long := CalculateCostBreakdown(300000, 500, 0, "gpt-4o", "")
+ // Input price should be the same since no long variant exists.
+ assertFloatEqual(t, std.InputPricePerMToken, long.InputPricePerMToken)
+ assertFloatEqual(t, std.OutputPricePerMToken, long.OutputPricePerMToken)
+}
+
+func TestCodexAutoReviewModelNormalizesToGPT54(t *testing.T) {
+ pricing := GetModelPricing("codex-auto-review")
+ if pricing == nil {
+ t.Fatal("GetModelPricing(codex-auto-review) returned nil")
+ }
+ // Should normalize to gpt-5.4 pricing.
+ gpt54 := GetModelPricing("gpt-5.4")
+ assertFloatEqual(t, pricing.InputPricePerMToken, gpt54.InputPricePerMToken)
+ assertFloatEqual(t, pricing.OutputPricePerMToken, gpt54.OutputPricePerMToken)
+ assertFloatEqual(t, pricing.CacheReadPricePerMToken, gpt54.CacheReadPricePerMToken)
+ assertFloatEqual(t, pricing.InputPricePerMTokenPriority, gpt54.InputPricePerMTokenPriority)
+ assertFloatEqual(t, pricing.OutputPricePerMTokenPriority, gpt54.OutputPricePerMTokenPriority)
+ assertFloatEqual(t, pricing.CacheReadPricePerMTokenPriority, gpt54.CacheReadPricePerMTokenPriority)
+}
+
+func TestCodexAutoReviewLongContextPricing(t *testing.T) {
+ // codex-auto-review maps to gpt-5.4 which has long context pricing.
+ long := CalculateCostBreakdown(300000, 500, 100, "codex-auto-review", "")
+ assertFloatEqual(t, long.InputPricePerMToken, 5.0) // long input price
+ assertFloatEqual(t, long.OutputPricePerMToken, 22.5) // long output price
+ assertFloatEqual(t, long.CacheReadPricePerMToken, 0.5) // long cache read price
+}
+
+func TestCodexAutoReviewPriorityPricing(t *testing.T) {
+ bd := CalculateCostBreakdown(1000, 500, 0, "codex-auto-review", "priority")
+ assertFloatEqual(t, bd.InputPricePerMToken, 5.0)
+ assertFloatEqual(t, bd.OutputPricePerMToken, 30.0)
+ assertFloatEqual(t, bd.ServiceTierCostMultiplier, 1.0)
+}
+
+func TestNormalizeCodexBillingModelCodexAutoReview(t *testing.T) {
+ tests := []struct {
+ model string
+ want string
+ }{
+ {"codex-auto-review", "gpt-5.4"},
+ {"codex-auto-review-v2", "gpt-5.4"}, // variant suffix should match
+ {"CODEX-AUTO-REVIEW", "gpt-5.4"}, // case-insensitive
+ {"codex_auto_review", "gpt-5.4"}, // underscores normalized
+ }
+ for _, tt := range tests {
+ t.Run(tt.model, func(t *testing.T) {
+ got, ok := normalizeCodexBillingModel(tt.model)
+ if !ok {
+ t.Fatalf("normalizeCodexBillingModel(%q) ok=false, want true", tt.model)
+ }
+ if got != tt.want {
+ t.Fatalf("normalizeCodexBillingModel(%q) = %q, want %q", tt.model, got, tt.want)
+ }
+ })
+ }
+}
+
+func TestGPT55LongContextPricing(t *testing.T) {
+ // gpt-5.5 has long pricing: $10/$45 standard, $25/$112.5 priority.
+ long := CalculateCostBreakdown(300000, 500, 0, "gpt-5.5", "")
+ assertFloatEqual(t, long.InputPricePerMToken, 10.0)
+ assertFloatEqual(t, long.OutputPricePerMToken, 45.0)
+
+ longPri := CalculateCostBreakdown(300000, 500, 0, "gpt-5.5", "priority")
+ assertFloatEqual(t, longPri.InputPricePerMToken, 25.0)
+ assertFloatEqual(t, longPri.OutputPricePerMToken, 112.5)
+ assertFloatEqual(t, longPri.ServiceTierCostMultiplier, 1.0)
+}
diff --git a/database/postgres.go b/database/postgres.go
index f34811c4..dc56414f 100644
--- a/database/postgres.go
+++ b/database/postgres.go
@@ -30,6 +30,8 @@ type AccountRow struct {
ErrorMessage string
Enabled bool
Locked bool
+ CreditEnabled bool
+ CreditSkipUsageWindow bool
ScoreBiasOverride sql.NullInt64
BaseConcurrencyOverride sql.NullInt64
Tags []string
@@ -481,6 +483,8 @@ func (db *DB) migrate(ctx context.Context) error {
ALTER TABLE accounts ADD COLUMN IF NOT EXISTS today_used_count INT DEFAULT 0;
ALTER TABLE accounts ADD COLUMN IF NOT EXISTS image_quota_reset_at TIMESTAMPTZ NULL;
ALTER TABLE accounts ADD COLUMN IF NOT EXISTS tags JSONB DEFAULT '[]'::jsonb;
+ ALTER TABLE accounts ADD COLUMN IF NOT EXISTS credit_enabled BOOLEAN DEFAULT FALSE;
+ ALTER TABLE accounts ADD COLUMN IF NOT EXISTS credit_skip_usage_window BOOLEAN DEFAULT FALSE;
CREATE TABLE IF NOT EXISTS account_groups (
id SERIAL PRIMARY KEY,
@@ -597,7 +601,8 @@ func (db *DB) migrate(ctx context.Context) error {
auto_clean_rate_limited BOOLEAN DEFAULT FALSE,
background_refresh_interval_minutes INT DEFAULT 2,
usage_probe_max_age_minutes INT DEFAULT 10,
- recovery_probe_interval_minutes INT DEFAULT 30
+ recovery_probe_interval_minutes INT DEFAULT 30,
+ scheduler_mode VARCHAR(20) DEFAULT 'round_robin'
);
CREATE TABLE IF NOT EXISTS account_model_cooldowns (
account_id BIGINT NOT NULL,
@@ -627,6 +632,7 @@ func (db *DB) migrate(ctx context.Context) error {
ALTER TABLE system_settings ADD COLUMN IF NOT EXISTS background_refresh_interval_minutes INT DEFAULT 2;
ALTER TABLE system_settings ADD COLUMN IF NOT EXISTS usage_probe_max_age_minutes INT DEFAULT 10;
ALTER TABLE system_settings ADD COLUMN IF NOT EXISTS recovery_probe_interval_minutes INT DEFAULT 30;
+ ALTER TABLE system_settings ADD COLUMN IF NOT EXISTS scheduler_mode VARCHAR(20) DEFAULT 'round_robin';
ALTER TABLE system_settings ADD COLUMN IF NOT EXISTS resin_url TEXT DEFAULT '';
ALTER TABLE system_settings ADD COLUMN IF NOT EXISTS resin_platform_name TEXT DEFAULT '';
ALTER TABLE system_settings ADD COLUMN IF NOT EXISTS prompt_filter_enabled BOOLEAN DEFAULT FALSE;
@@ -1093,6 +1099,7 @@ type SystemSettings struct {
BackgroundRefreshIntervalMinutes int
UsageProbeMaxAgeMinutes int
RecoveryProbeIntervalMinutes int
+ SchedulerMode string
ResinURL string // Resin 代理池地址(含 Token),例如 http://127.0.0.1:2260/my-token
ResinPlatformName string // Resin 平台标识,例如 codex2api
PromptFilterEnabled bool
@@ -1132,6 +1139,7 @@ func (db *DB) GetSystemSettings(ctx context.Context) (*SystemSettings, error) {
COALESCE(background_refresh_interval_minutes, 2),
COALESCE(usage_probe_max_age_minutes, 10),
COALESCE(recovery_probe_interval_minutes, 30),
+ COALESCE(scheduler_mode, 'round_robin'),
COALESCE(resin_url, ''),
COALESCE(resin_platform_name, ''),
COALESCE(prompt_filter_enabled, false),
@@ -1159,6 +1167,7 @@ func (db *DB) GetSystemSettings(ctx context.Context) (*SystemSettings, error) {
&s.ProxyPoolEnabled, &s.FastSchedulerEnabled, &s.MaxRetries, &s.MaxRateLimitRetries, &s.AllowRemoteMigration,
&s.AutoCleanError, &s.AutoCleanExpired, &s.ModelMapping,
&s.BackgroundRefreshIntervalMinutes, &s.UsageProbeMaxAgeMinutes, &s.RecoveryProbeIntervalMinutes,
+ &s.SchedulerMode,
&s.ResinURL, &s.ResinPlatformName,
&s.PromptFilterEnabled, &s.PromptFilterMode, &s.PromptFilterThreshold, &s.PromptFilterStrictThreshold,
&s.PromptFilterLogMatches, &s.PromptFilterMaxTextLength, &s.PromptFilterSensitiveWords,
@@ -1188,9 +1197,10 @@ func (db *DB) UpdateSystemSettings(ctx context.Context, s *SystemSettings) error
prompt_filter_sensitive_words, prompt_filter_custom_patterns, prompt_filter_disabled_patterns,
client_compat_mode, codex_min_cli_version, usage_log_mode, usage_log_batch_size,
usage_log_flush_interval_seconds, stream_flush_policy, stream_flush_interval_ms,
- image_storage_config
+ image_storage_config,
+ scheduler_mode
)
- VALUES (1, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38, $39, $40, $41, $42, $43)
+ VALUES (1, $1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14, $15, $16, $17, $18, $19, $20, $21, $22, $23, $24, $25, $26, $27, $28, $29, $30, $31, $32, $33, $34, $35, $36, $37, $38, $39, $40, $41, $42, $43, $44)
ON CONFLICT (id) DO UPDATE SET
site_name = EXCLUDED.site_name,
site_logo = EXCLUDED.site_logo,
@@ -1234,7 +1244,8 @@ func (db *DB) UpdateSystemSettings(ctx context.Context, s *SystemSettings) error
usage_log_flush_interval_seconds = EXCLUDED.usage_log_flush_interval_seconds,
stream_flush_policy = EXCLUDED.stream_flush_policy,
stream_flush_interval_ms = EXCLUDED.stream_flush_interval_ms,
- image_storage_config = EXCLUDED.image_storage_config
+ image_storage_config = EXCLUDED.image_storage_config,
+ scheduler_mode = EXCLUDED.scheduler_mode
`, NormalizeSiteName(s.SiteName), strings.TrimSpace(s.SiteLogo),
s.MaxConcurrency, s.GlobalRPM, s.TestModel, s.TestConcurrency, s.ProxyURL, s.PgMaxConns, s.RedisPoolSize,
s.AutoCleanUnauthorized, s.AutoCleanRateLimited, s.AdminSecret, s.AutoCleanFullUsage, s.ProxyPoolEnabled,
@@ -1245,7 +1256,7 @@ func (db *DB) UpdateSystemSettings(ctx context.Context, s *SystemSettings) error
s.PromptFilterSensitiveWords, s.PromptFilterCustomPatterns, s.PromptFilterDisabledPatterns,
s.ClientCompatMode, s.CodexMinCLIVersion, s.UsageLogMode, s.UsageLogBatchSize,
s.UsageLogFlushIntervalSeconds, s.StreamFlushPolicy, s.StreamFlushIntervalMS,
- s.ImageStorageConfig)
+ s.ImageStorageConfig, s.SchedulerMode)
return err
}
@@ -2928,12 +2939,21 @@ func (db *DB) GetAccountTimeRangeUsage(ctx context.Context, since time.Time) (ma
return result, rows.Err()
}
+// GetAccountBilledSince 返回指定时间戳以来 account_billed 的总和
+func (db *DB) GetAccountBilledSince(ctx context.Context, accountID int64, since time.Time) (float64, error) {
+ var billed float64
+ err := db.conn.QueryRowContext(ctx,
+ `SELECT COALESCE(SUM(account_billed), 0) FROM usage_logs WHERE account_id = $1 AND created_at >= $2 AND status_code <> 499`,
+ accountID, db.timeArg(since)).Scan(&billed)
+ return billed, err
+}
+
// ==================== Accounts ====================
// ListActive 获取所有未删除账号。
func (db *DB) ListActive(ctx context.Context) ([]*AccountRow, error) {
query := `
- SELECT id, name, platform, type, credentials, proxy_url, status, cooldown_reason, cooldown_until, error_message, COALESCE(enabled, true), COALESCE(locked, false), score_bias_override, base_concurrency_override, COALESCE(tags, '[]'), created_at, updated_at
+ SELECT id, name, platform, type, credentials, proxy_url, status, cooldown_reason, cooldown_until, error_message, COALESCE(enabled, true), COALESCE(locked, false), COALESCE(credit_enabled, false), COALESCE(credit_skip_usage_window, false), score_bias_override, base_concurrency_override, COALESCE(tags, '[]'), created_at, updated_at
FROM accounts
WHERE status <> 'deleted' AND COALESCE(error_message, '') <> 'deleted'
ORDER BY id
@@ -2965,6 +2985,8 @@ func (db *DB) ListActive(ctx context.Context) ([]*AccountRow, error) {
&a.ErrorMessage,
&a.Enabled,
&a.Locked,
+ &a.CreditEnabled,
+ &a.CreditSkipUsageWindow,
&a.ScoreBiasOverride,
&a.BaseConcurrencyOverride,
&tagsRaw,
@@ -3074,7 +3096,7 @@ func (db *DB) ClearExpiredModelCooldowns(ctx context.Context) error {
// GetAccountByID 获取未删除账号的完整数据库行。
func (db *DB) GetAccountByID(ctx context.Context, id int64) (*AccountRow, error) {
query := `
- SELECT id, name, platform, type, credentials, proxy_url, status, cooldown_reason, cooldown_until, error_message, COALESCE(enabled, true), COALESCE(locked, false), score_bias_override, base_concurrency_override, COALESCE(tags, '[]'), created_at, updated_at
+ SELECT id, name, platform, type, credentials, proxy_url, status, cooldown_reason, cooldown_until, error_message, COALESCE(enabled, true), COALESCE(locked, false), COALESCE(credit_enabled, false), COALESCE(credit_skip_usage_window, false), score_bias_override, base_concurrency_override, COALESCE(tags, '[]'), created_at, updated_at
FROM accounts
WHERE id = $1 AND status <> 'deleted' AND COALESCE(error_message, '') <> 'deleted'
LIMIT 1
@@ -3098,6 +3120,8 @@ func (db *DB) GetAccountByID(ctx context.Context, id int64) (*AccountRow, error)
&a.ErrorMessage,
&a.Enabled,
&a.Locked,
+ &a.CreditEnabled,
+ &a.CreditSkipUsageWindow,
&a.ScoreBiasOverride,
&a.BaseConcurrencyOverride,
&tagsRaw,
@@ -3340,6 +3364,46 @@ func (db *DB) SetAccountLocked(ctx context.Context, id int64, locked bool) error
return err
}
+// UpdateAccountCredit 更新账号的信用设置(credit_enabled / credit_skip_usage_window)
+// 传入 nil 表示不修改该字段,仅 SET 非 nil 的列。
+func (db *DB) UpdateAccountCredit(ctx context.Context, id int64, creditEnabled, creditSkipUsageWindow *bool) error {
+ var setClauses []string
+ var args []interface{}
+ argIdx := 1
+
+ if creditEnabled != nil {
+ setClauses = append(setClauses, fmt.Sprintf("credit_enabled = $%d", argIdx))
+ args = append(args, *creditEnabled)
+ argIdx++
+ }
+ if creditSkipUsageWindow != nil {
+ setClauses = append(setClauses, fmt.Sprintf("credit_skip_usage_window = $%d", argIdx))
+ args = append(args, *creditSkipUsageWindow)
+ argIdx++
+ }
+
+ if len(setClauses) == 0 {
+ return nil // 没有要更新的字段
+ }
+
+ setClauses = append(setClauses, "updated_at = CURRENT_TIMESTAMP")
+ query := "UPDATE accounts SET " + strings.Join(setClauses, ", ") + fmt.Sprintf(" WHERE id = $%d", argIdx)
+ args = append(args, id)
+
+ res, err := db.conn.ExecContext(ctx, query, args...)
+ if err != nil {
+ return err
+ }
+ affected, err := res.RowsAffected()
+ if err != nil {
+ return err
+ }
+ if affected == 0 {
+ return sql.ErrNoRows
+ }
+ return nil
+}
+
// UpdateCredentials 原子合并更新账号的 credentials(JSONB || 运算符,不覆盖已有字段)
// 解决并发刷新时一个进程覆盖另一个进程写入的字段的问题
func (db *DB) UpdateCredentials(ctx context.Context, id int64, credentials map[string]interface{}) error {
diff --git a/database/sqlite.go b/database/sqlite.go
index 5609e81f..a10aad78 100644
--- a/database/sqlite.go
+++ b/database/sqlite.go
@@ -138,7 +138,8 @@ func (db *DB) migrateSQLite(ctx context.Context) error {
usage_log_flush_interval_seconds INTEGER DEFAULT 5,
stream_flush_policy TEXT DEFAULT 'immediate',
stream_flush_interval_ms INTEGER DEFAULT 20,
- image_storage_config TEXT DEFAULT '{}'
+ image_storage_config TEXT DEFAULT '{}',
+ scheduler_mode TEXT DEFAULT 'round_robin'
);`,
`CREATE TABLE IF NOT EXISTS model_registry (
id TEXT PRIMARY KEY,
@@ -330,8 +331,11 @@ func (db *DB) migrateSQLite(ctx context.Context) error {
{"system_settings", "stream_flush_policy", "TEXT DEFAULT 'immediate'"},
{"system_settings", "stream_flush_interval_ms", "INTEGER DEFAULT 20"},
{"system_settings", "image_storage_config", "TEXT DEFAULT '{}'"},
+ {"system_settings", "scheduler_mode", "TEXT DEFAULT 'round_robin'"},
{"accounts", "enabled", "INTEGER DEFAULT 1"},
{"accounts", "locked", "INTEGER DEFAULT 0"},
+ {"accounts", "credit_enabled", "INTEGER DEFAULT 0"},
+ {"accounts", "credit_skip_usage_window", "INTEGER DEFAULT 0"},
{"accounts", "image_quota_remaining", "INTEGER NULL"},
{"accounts", "image_quota_total", "INTEGER NULL"},
{"accounts", "today_used_count", "INTEGER DEFAULT 0"},
diff --git a/docker-compose.sqlite.local.yml b/docker-compose.sqlite.local.yml
index cac25167..87d74567 100644
--- a/docker-compose.sqlite.local.yml
+++ b/docker-compose.sqlite.local.yml
@@ -5,7 +5,9 @@ services:
build: .
container_name: codex2api-sqlite-local
ports:
- - "${BIND_HOST:-0.0.0.0}:${CODEX_PORT:-8080}:${CODEX_PORT:-8080}"
+ # Bind to localhost only by default for security.
+ # To expose on all interfaces, set BIND_HOST=0.0.0.0 in .env or override here.
+ - "${BIND_HOST:-127.0.0.1}:${CODEX_PORT:-8080}:${CODEX_PORT:-8080}"
env_file:
- .env
volumes:
diff --git a/docker-compose.sqlite.yml b/docker-compose.sqlite.yml
index b20a791d..2833de11 100644
--- a/docker-compose.sqlite.yml
+++ b/docker-compose.sqlite.yml
@@ -5,7 +5,9 @@ services:
image: ghcr.io/james-6-23/codex2api:latest
container_name: codex2api-sqlite
ports:
- - "${BIND_HOST:-0.0.0.0}:${CODEX_PORT:-8080}:${CODEX_PORT:-8080}"
+ # Bind to localhost only by default for security.
+ # To expose on all interfaces, set BIND_HOST=0.0.0.0 in .env or override here.
+ - "${BIND_HOST:-127.0.0.1}:${CODEX_PORT:-8080}:${CODEX_PORT:-8080}"
env_file:
- .env
volumes:
diff --git a/docs/API.md b/docs/API.md
index 1c93ebae..6273cea3 100644
--- a/docs/API.md
+++ b/docs/API.md
@@ -21,6 +21,7 @@
- [代理池管理](#代理池管理)
- [运维监控](#运维监控)
- [模型管理](#模型管理)
+ - [生图工作台](#生图工作台) — 文生图、图生图任务与图库管理
- [OAuth 授权](#oauth-授权) — PKCE 流程获取 Token
- [支持模型](#支持模型)
- [错误码](#错误码)
@@ -287,7 +288,7 @@ data: [DONE]
"object": "list",
"data": [
{ "id": "gpt-5.5", "object": "model", "owned_by": "openai" },
- { "id": "gpt-5.5", "object": "model", "owned_by": "openai" },
+ { "id": "gpt-5.4", "object": "model", "owned_by": "openai" },
{ "id": "gpt-5.4-mini", "object": "model", "owned_by": "openai" },
{ "id": "gpt-5.3-codex", "object": "model", "owned_by": "openai" },
{ "id": "gpt-5.3-codex-spark", "object": "model", "owned_by": "openai" },
@@ -384,6 +385,10 @@ data: [DONE]
"last_used_at": "2024-01-01T11:00:00Z",
"success_requests": 95,
"error_requests": 5,
+ "credit_enabled": false,
+ "credit_skip_usage_window": false,
+ "billed_5h": 0.25,
+ "billed_7d": 3.50,
"usage_percent_7d": 45.2,
"usage_percent_5h": 12.5,
"reset_5h_at": "2024-01-01T17:00:00Z",
@@ -416,6 +421,10 @@ data: [DONE]
| base_concurrency_override | integer/null | 手工配置的基础并发覆盖值,`null` 表示跟随全局 `max_concurrency` |
| base_concurrency_effective | integer | 当前生效的基础并发值 |
| allowed_api_key_ids | integer[] | 允许调用该账号的 API Key ID 列表;空数组表示所有 API Key 均可调用 |
+| credit_enabled | bool | 是否为信用计费模式账号 |
+| credit_skip_usage_window | bool | 是否跳过 7 天/5 小时用量窗口惩罚 |
+| billed_5h | number/null | 过去 5 小时窗口内的累计计费金额(USD) |
+| billed_7d | number/null | 过去 7 天窗口内的累计计费金额(USD) |
#### PATCH /api/admin/accounts/:id/scheduler
@@ -457,6 +466,36 @@ data: [DONE]
}
```
+#### PATCH /api/admin/accounts/:id/credit
+
+更新账号信用设置。
+
+**请求:**
+
+```json
+{
+ "credit_enabled": true,
+ "credit_skip_usage_window": true
+}
+```
+
+**参数说明:**
+
+| 参数 | 类型 | 必填 | 说明 |
+| -------------------------- | ----- | ---- | ---------------------------------------- |
+| credit_enabled | bool | 否 | 标记账号为信用计费模式,省略时保持原值 |
+| credit_skip_usage_window | bool | 否 | 跳过 7 天/5 小时用量窗口惩罚(仅在 `credit_enabled=true` 时生效),省略时保持原值 |
+
+**响应:**
+
+```json
+{
+ "message": "信用设置已更新",
+ "credit_enabled": true,
+ "credit_skip_usage_window": true
+}
+```
+
#### POST /api/admin/accounts
添加 Refresh Token 账号(支持批量)。
@@ -1152,6 +1191,8 @@ curl -X DELETE "http://localhost:8080/api/admin/account-groups/1?force=true" \
"proxy_pool_enabled": false,
"fast_scheduler_enabled": false,
"max_retries": 3,
+ "max_rate_limit_retries": 2,
+ "scheduler_mode": "round_robin",
"allow_remote_migration": false,
"database_driver": "postgres",
"database_label": "PostgreSQL",
@@ -1177,7 +1218,9 @@ curl -X DELETE "http://localhost:8080/api/admin/account-groups/1?force=true" \
"proxy_url": "http://proxy.example.com:8080",
"auto_clean_unauthorized": true,
"auto_clean_rate_limited": false,
- "fast_scheduler_enabled": true
+ "fast_scheduler_enabled": true,
+ "scheduler_mode": "remaining_quota",
+ "max_rate_limit_retries": 2
}
```
@@ -1413,6 +1456,127 @@ curl -X DELETE "http://localhost:8080/api/admin/account-groups/1?force=true" \
}
```
+### 生图工作台
+
+管理后台生图工作台的 API,支持文生图(`/images/jobs`)和图生图(`/images/edit-jobs`)任务,以及图库管理。
+
+#### POST /api/admin/images/jobs
+
+创建文生图任务。
+
+**请求:**
+
+```json
+{
+ "prompt": "A small orange cat sitting on a cloud",
+ "model": "gpt-image-2",
+ "size": "1024x1024",
+ "quality": "high",
+ "output_format": "png",
+ "style": "photorealistic",
+ "api_key_id": 0
+}
+```
+
+**参数说明:**
+
+| 参数 | 类型 | 必填 | 说明 |
+| ------------- | ------ | ---- | ----------------------------- |
+| prompt | string | 是 | 提示词,最长 8000 字符 |
+| model | string | 否 | 模型,默认 `gpt-image-2` |
+| size | string | 否 | 输出尺寸 |
+| quality | string | 否 | 质量等级 |
+| output_format | string | 否 | 输出格式,默认 `png` |
+| style | string | 否 | 风格说明 |
+| upscale | string | 否 | 超分选项 |
+| api_key_id | int | 否 | 指定 API Key ID |
+
+**响应:**
+
+```json
+{
+ "id": 1,
+ "status": "pending"
+}
+```
+
+#### POST /api/admin/images/edit-jobs
+
+创建图生图(image-to-image)任务。与文生图参数类似,但需要额外提供参考图片。
+
+**请求:**
+
+```json
+{
+ "prompt": "Replace the background with a sunset scene",
+ "model": "gpt-image-2",
+ "size": "1024x1024",
+ "output_format": "png",
+ "input_images": [
+ "https://example.com/source.png",
+ "data:image/png;base64,iVBORw0KGgo..."
+ ]
+}
+```
+
+**参数说明:**
+
+| 参数 | 类型 | 必填 | 说明 |
+| ------------- | -------- | ---- | ------------------------------------ |
+| prompt | string | 是 | 编辑提示词,最长 8000 字符 |
+| model | string | 否 | 模型,默认 `gpt-image-2` |
+| input_images | string[] | 是 | 参考图片 URL 或 data URI 列表 |
+| size | string | 否 | 输出尺寸 |
+| quality | string | 否 | 质量等级 |
+| output_format | string | 否 | 输出格式,默认 `png` |
+| api_key_id | int | 否 | 指定 API Key ID |
+
+#### GET /api/admin/images/jobs
+
+获取生图任务列表。支持分页和状态过滤。
+
+**响应:**
+
+```json
+{
+ "jobs": [
+ {
+ "id": 1,
+ "prompt": "A small orange cat",
+ "model": "gpt-image-2",
+ "status": "completed",
+ "created_at": "2024-01-01T12:00:00Z"
+ }
+ ],
+ "total": 50
+}
+```
+
+#### GET /api/admin/images/jobs/:id
+
+获取单个生图任务详情及结果。
+
+#### DELETE /api/admin/images/jobs/:id
+
+删除一个生图任务及其关联的所有图库资产。
+
+```bash
+curl -X DELETE http://localhost:8080/api/admin/images/jobs/1 \
+ -H "X-Admin-Key: your-secret"
+```
+
+#### GET /api/admin/images/assets
+
+获取图库资产列表。
+
+#### GET /api/admin/images/assets/:id/file
+
+获取单个图库资产文件(返回图片二进制或签名 URL)。
+
+#### DELETE /api/admin/images/assets/:id
+
+删除单个图库资产。
+
### OAuth 授权
通过 OAuth PKCE 流程授权获取 Codex 账号的 Refresh Token,适用于无法手动获取 RT 的场景。
@@ -1491,10 +1655,10 @@ curl -X DELETE "http://localhost:8080/api/admin/account-groups/1?force=true" \
## 支持模型
-| 模型 | 说明 |
-| ------------------- | --------------------------------------- |
-| gpt-5.5 | 最新旗舰模型 |
-| gpt-5.4 | 旗舰模型 |
+| 模型 | 说明 |
+| ------------------- | ----------------------------------------------------------- |
+| gpt-5.5 | 最新旗舰模型。计费:$5.00/M 输入 / $30.00/M 输出(标准),priority 分别为 $12.50/M / $75.00/M |
+| gpt-5.4 | 旗舰模型 |
| gpt-5.4-mini | 轻量版 |
| gpt-5.3-codex | 较新版本 |
| gpt-5.3-codex-spark | Codex Spark 模型,仅 Pro 订阅账号可调用 |
diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
new file mode 100644
index 00000000..c13b8a3a
--- /dev/null
+++ b/docs/CHANGELOG.md
@@ -0,0 +1,39 @@
+# Changelog — iteration/may-2026-v2
+
+Dates: 2026-05-13 to 2026-05-20. 17 commits.
+
+## Features
+
+- **Credit quota support (#141).** Added `credit_enabled` and `credit_skip_usage_window` flags to the accounts table. Credit-marked accounts skip usage-window penalties in the scheduler. Managed via `PATCH /api/admin/accounts/:id/credit`.
+
+- **Scheduler mode (#133).** Added `scheduler_mode` system setting with two modes: `round_robin` (default, weighted by dispatch score) and `remaining_quota` (prioritize accounts with lowest usage percent). Configurable from Admin Settings page.
+
+- **5h/7d windowed USD cost display.** Replaced the single total-cost column with a windowed billing view. Each account now shows `billed_5h` and `billed_7d` fields aligned with the account's usage-reset boundaries. This reflects actual spending, not estimated token costs.
+
+- **Image-to-image in Image Studio (#135, #136).** The admin Image Studio now supports image-to-image generation via `POST /api/admin/images/edit-jobs`, accepting reference image URLs or data URIs. Added text-to-image and image-to-image tabs in the frontend.
+
+- **Billing model expansion.** Added pricing for gpt-5.5-pro and gpt-5.4-pro families. Implemented long context (>272K tokens) premium pricing for gpt-5.5, gpt-5.5-pro, gpt-5.4, gpt-5.4-pro with automatic detection. Fixed gpt-4o and gpt-4o-mini cache-read pricing.
+
+## Fixes
+
+- **GPT-5.5 pricing corrected.** Updated standard-tier billing from old values to $5.00/M input / $30.00/M output (priority: $12.50/M / $75.00/M), matching current official pricing.
+
+- **SSE stream isolation.** Prevented SSE response mixing when retrying across accounts, using `c.Writer.Written()` as the retry guard instead of a package-level flag.
+
+- **Usage logging for image errors.** Added usage-log emits for read-error paths in image generation, ensuring billing records are not silently dropped on stream failures.
+
+- **Model mapping initialization.** Restored `modelMapping` init that was accidentally removed during the scheduler_mode refactor.
+
+- **Credit field Scan order.** Fixed PostgreSQL `Scan` argument ordering for credit fields that was causing silent zero-values.
+
+- **Round 2 review fixes.** Addressed Haiku review findings including api.ts syntax cleanup, billing test corrections, and several CRITICAL/HIGH issues from automated review.
+
+## Security
+
+- **SQLite default binds to localhost.** SQLite compose files (`docker-compose.sqlite.yml`, `docker-compose.sqlite.local.yml`) now bind ports to `127.0.0.1` by default. Previously they bound to `0.0.0.0`, exposing the service on all interfaces. Standard (PostgreSQL) compose files retain the `0.0.0.0` default.
+
+- **BIND_HOST env var.** Added `BIND_HOST` environment variable support to control the HTTP listen address across all deployment modes. Documented in `.env.example`, `.env.sqlite.example`, and `CONFIGURATION.md`.
+
+## Breaking Changes
+
+- **SQLite compose port binding.** SQLite deployments upgrading from a previous version that relied on external access via the default compose configuration must now explicitly set `BIND_HOST=0.0.0.0` in `.env` or override the port binding in the compose file. All other behavior remains backwards-compatible.
diff --git a/docs/CONFIGURATION.md b/docs/CONFIGURATION.md
index b57635ff..af2ad25f 100644
--- a/docs/CONFIGURATION.md
+++ b/docs/CONFIGURATION.md
@@ -43,7 +43,10 @@ Codex2API 采用三层配置架构:
| 变量 | 必填 | 默认值 | 说明 |
|------|------|--------|------|
| `CODEX_PORT` | 否 | 8080 | HTTP 服务端口 |
+| `BIND_HOST` | 否 | `127.0.0.1`(SQLite)/ `0.0.0.0`(PostgreSQL) | Docker 端口发布绑定地址(非进程监听地址,由 `CODEX_BIND` 控制)。SQLite compose 默认 `127.0.0.1` 仅本机访问;标准 compose 默认 `0.0.0.0` 所有网络接口 |
| `ADMIN_SECRET` | 否 | - | 管理后台登录密钥 |
+| `CODEX_ALLOW_ANONYMOUS` | 否 | `false` | 设为 `true` 时,未配置任何对外 API Key 也允许 `/v1/*` 直接调用(仅限内网测试场景) |
+| `FAST_SCHEDULER_ENABLED` | 否 | `false` | 通过环境变量启用快速调度器(也可在管理后台运行时开启) |
| `TZ` | 否 | UTC | 时区,如 `Asia/Shanghai` |
### Codex 上游稳定性配置
@@ -51,6 +54,7 @@ Codex2API 采用三层配置架构:
| 变量 | 必填 | 默认值 | 说明 |
|------|------|--------|------|
| `CODEX_UPSTREAM_TRANSPORT` | 否 | `http` | Codex 上游协议:`http` / `auto` / `ws`。HTTP 入站在 `auto` 下仍走 HTTP 上游 |
+| `CODEX_PROXY_URL` | 否 | - | 全局代理 URL,适用于需要为所有 Codex 上游请求统一配置代理的场景 |
| `USE_WEBSOCKET` | 否 | `false` | 旧版开关;未设置 `CODEX_UPSTREAM_TRANSPORT` 时,`true` 等价于 `CODEX_UPSTREAM_TRANSPORT=ws` |
| `CODEX_TRANSPORT_MODE` | 否 | `standard` | Codex HTTP transport:默认标准 Go TLS;`utls_chrome` 可回滚旧 Chrome uTLS 行为 |
| `CODEX_WS_SEND_USER_AGENT` | 否 | `false` | WS 握手是否发送 `User-Agent`/`Version`;默认关闭 |
@@ -128,7 +132,9 @@ Codex2API 采用三层配置架构:
| `MaxConcurrency` | int | 2 | 1-50 | 单账号最大并发请求数 |
| `GlobalRPM` | int | 0 | 0-∞ | 全局每分钟请求限制,0 表示不限 |
| `MaxRetries` | int | 3 | 0-10 | 请求失败最大重试次数 |
+| `MaxRateLimitRetries` | int | 2 | 0-10 | 遇到 429 限流时的最大额外重试次数 |
| `FastSchedulerEnabled` | bool | false | - | 启用快速调度器 |
+| `SchedulerMode` | string | `round_robin` | - | 调度模式:`round_robin`(轮询,按调度分权重排序)或 `remaining_quota`(优先使用用量少的账号) |
### 测试配置
@@ -144,6 +150,17 @@ Codex2API 采用三层配置架构:
| `ProxyURL` | string | "" | 全局代理 URL |
| `ProxyPoolEnabled` | bool | false | 启用代理池 |
+### 账号级设置(单账号)
+
+以下字段存储在 `accounts` 表中,可通过管理后台账号详情或 API 按账号单独设置:
+
+| 字段 | 类型 | 默认值 | 说明 |
+|------|------|--------|------|
+| `credit_enabled` | bool | false | 标记账号为信用计费模式 |
+| `credit_skip_usage_window` | bool | false | 跳过 7 天/5 小时用量窗口惩罚(适用于信用账号) |
+| `score_bias_override` | int/null | null | 手工覆盖调度权重分,`null` 跟随套餐默认 |
+| `base_concurrency_override` | int/null | null | 手工覆盖基础并发值,`null` 跟随全局默认 |
+
### 连接池配置
| 字段 | 类型 | 默认值 | 范围 | 说明 |
diff --git a/frontend/src/api.ts b/frontend/src/api.ts
index e77229c3..cdba8615 100644
--- a/frontend/src/api.ts
+++ b/frontend/src/api.ts
@@ -222,6 +222,8 @@ export const api = {
request<{ message: string; success: number; failed: number }>('/accounts/batch-reset-status', { method: 'POST', body: JSON.stringify({ ids }) }),
getAccountUsage: (id: number) =>
request