feat(tensorzero): 添加 NewAPI 配置支持并优化容器配置

mudssky · mudssky · commit 5e38d48df1d8 · 2026-04-16T02:09:37.000+08:00
- 新增 TENSORZERO_CONFIG_FILE 环境变量用于动态切换配置文件
- 添加 newapi.toml 配置文件作为 NewAPI 的替代配置选项
- 在 .env.example 和 .env.production.example 中添加 NEWAPI_API_KEY
- 修改 compose.yaml 使容器能够通过环境变量动态加载配置文件
- 将配置文件挂载方式从单文件改为目录挂载以支持多配置
- 更新 tensorzero.md 文档说明 NewAPI 配置使用方法
- 为 NewAPI 配置添加基础的重试和超时保护机制
diff --git a/ai/gateway/tensorzero/.env.example b/ai/gateway/tensorzero/.env.example
@@ -1,10 +1,12 @@
+TENSORZERO_CONFIG_FILE=tensorzero.toml
 TENSORZERO_GATEWAY_IMAGE=tensorzero/gateway
 TENSORZERO_UI_IMAGE=tensorzero/ui
 TENSORZERO_CLICKHOUSE_IMAGE=clickhouse/clickhouse-server:lts
 TENSORZERO_VALKEY_IMAGE=valkey/valkey:8-alpine
 TENSORZERO_GATEWAY_PORT=34400
 TENSORZERO_UI_PORT=34401
 DASHSCOPE_API_KEY=sk-xxxx
+NEWAPI_API_KEY=sk-xxxx
 TENSORZERO_CLICKHOUSE_DB=tensorzero
 TENSORZERO_CLICKHOUSE_USER=tensorzero
 TENSORZERO_CLICKHOUSE_PASSWORD=change-me
diff --git a/ai/gateway/tensorzero/.env.production.example b/ai/gateway/tensorzero/.env.production.example
@@ -1,3 +1,4 @@
+TENSORZERO_CONFIG_FILE=tensorzero.toml
 # 生产环境建议固定镜像版本，替换 ClickHouse 凭据，并通过 HTTPS 反向代理暴露 gateway / UI。
 TENSORZERO_GATEWAY_IMAGE=tensorzero/gateway
 TENSORZERO_UI_IMAGE=tensorzero/ui
@@ -6,6 +7,7 @@ TENSORZERO_VALKEY_IMAGE=valkey/valkey:8-alpine
 TENSORZERO_GATEWAY_PORT=34400
 TENSORZERO_UI_PORT=34401
 DASHSCOPE_API_KEY=sk-prod-xxxx
+NEWAPI_API_KEY=sk-prod-xxxx
 TENSORZERO_CLICKHOUSE_DB=tensorzero
 TENSORZERO_CLICKHOUSE_USER=tensorzero
 TENSORZERO_CLICKHOUSE_PASSWORD=change-me-now
diff --git a/ai/gateway/tensorzero/compose.yaml b/ai/gateway/tensorzero/compose.yaml
@@ -45,8 +45,9 @@ services:
       valkey:
         condition: service_healthy
     command:
+      # 通过环境变量切换 tensorzero.toml / newapi.toml，避免每次切换都改 compose。
       - "--config-file"
-      - "/app/config/tensorzero.toml"
+      - "/app/config/${TENSORZERO_CONFIG_FILE:-tensorzero.toml}"
     environment:
       # TensorZero 通过 OpenAI 兼容提供方接入百炼，这里直接注入百炼 API Key。
       DASHSCOPE_API_KEY: ${DASHSCOPE_API_KEY:-}
@@ -55,7 +56,7 @@ services:
       # Valkey URL 负责自定义限流状态存储。
       TENSORZERO_VALKEY_URL: ${TENSORZERO_VALKEY_URL:-redis://valkey:6379/0}
     volumes:
-      - ./tensorzero.toml:/app/config/tensorzero.toml:ro
+      - ./:/app/config:ro
     ports:
       - "${TENSORZERO_GATEWAY_PORT:-34400}:3000"
     extra_hosts:
@@ -78,7 +79,7 @@ services:
       TENSORZERO_GATEWAY_URL: ${TENSORZERO_GATEWAY_URL:-http://tensorzero-gateway:3000}
       TENSORZERO_CLICKHOUSE_URL: ${TENSORZERO_CLICKHOUSE_URL:-http://tensorzero:change-me@clickhouse:8123/tensorzero}
     volumes:
-      - ./tensorzero.toml:/app/config/tensorzero.toml:ro
+      - ./:/app/config:ro
     ports:
       - "${TENSORZERO_UI_PORT:-34401}:4000"
 
diff --git a/ai/gateway/tensorzero/newapi.toml b/ai/gateway/tensorzero/newapi.toml
@@ -0,0 +1,27 @@
+[gateway]
+# 关闭匿名使用分析，并限制全局出站超时，避免上游 NewAPI 抖动时长期占住连接。
+disable_pseudonymous_usage_analytics = true
+global_outbound_http_timeout_ms = 30_000
+
+# TensorZero 对 OpenAI-compatible provider 需要显式配置 `model_name`，
+# 不能像 LiteLLM 那样通过一个 TOML 真正做 `*` 模型透传。
+# 这里保留一个最小模板：把下面的 `api_base` 和 `model_name` 改成你要走的 NewAPI 地址与模型。
+[models.newapi_chat]
+routing = ["newapi"]
+
+[models.newapi_chat.providers.newapi]
+type = "openai"
+api_base = "https://newapi.example.com/v1"
+model_name = "qwen-plus"
+api_key_location = "env::NEWAPI_API_KEY"
+# 保留一层轻量重试与超时，避免偶发 429 / 5xx 直接透传给上层调用方。
+retries = { num_retries = 1, max_delay_s = 4 }
+timeouts = { non_streaming.total_ms = 15_000, streaming.ttft_ms = 3_000 }
+
+[functions.chat_simple]
+# 最小聊天函数，不做降级与复杂路由，适合作为 NewAPI 的简洁接入模板。
+type = "chat"
+
+[functions.chat_simple.variants.newapi]
+type = "chat_completion"
+model = "newapi_chat"
diff --git a/ai/gateway/tensorzero/tensorzero.md b/ai/gateway/tensorzero/tensorzero.md
@@ -14,20 +14,23 @@
 - `.env.example`：开发环境变量示例。
 - `.env.production.example`：生产环境变量示例。
 - `tensorzero.toml`：生产级网关配置示例，包含百炼接入、降级、重试和限流规则。
+- `newapi.toml`：面向 NewAPI 的替代配置，沿用同样的降级、重试和限流思路。
 - `.env.local`：本地私有环境变量覆盖文件，可选。
 
 ## 环境变量
 
 建议先在 `ai/gateway/tensorzero/.env.local` 中配置以下值：
 
 ```dotenv
+TENSORZERO_CONFIG_FILE=tensorzero.toml
 TENSORZERO_GATEWAY_IMAGE=tensorzero/gateway
 TENSORZERO_UI_IMAGE=tensorzero/ui
 TENSORZERO_CLICKHOUSE_IMAGE=clickhouse/clickhouse-server:lts
 TENSORZERO_VALKEY_IMAGE=valkey/valkey:8-alpine
 TENSORZERO_GATEWAY_PORT=34400
 TENSORZERO_UI_PORT=34401
 DASHSCOPE_API_KEY=sk-xxxx
+NEWAPI_API_KEY=sk-xxxx
 TENSORZERO_CLICKHOUSE_DB=tensorzero
 TENSORZERO_CLICKHOUSE_USER=tensorzero
 TENSORZERO_CLICKHOUSE_PASSWORD=change-me
@@ -38,7 +41,9 @@ TENSORZERO_VALKEY_URL=redis://valkey:6379/0
 
 其中：
 
+- `TENSORZERO_CONFIG_FILE`：要加载的配置文件名，默认 `tensorzero.toml`；如果要切到 NewAPI 版，改成 `newapi.toml`。
 - `DASHSCOPE_API_KEY`：阿里百炼 API Key，供 `tensorzero.toml` 里的 OpenAI 兼容提供方配置使用。
+- `NEWAPI_API_KEY`：供 `newapi.toml` 使用的 NewAPI 密钥。
 - `TENSORZERO_GATEWAY_PORT`：宿主机访问 TensorZero Gateway 的端口，默认 `34400`。
 - `TENSORZERO_UI_PORT`：宿主机访问 TensorZero UI 的端口，默认 `34401`。
 - `TENSORZERO_CLICKHOUSE_*`：ClickHouse 初始化账号、密码和数据库。
@@ -47,6 +52,27 @@ TENSORZERO_VALKEY_URL=redis://valkey:6379/0
 
 如果你要准备生产环境，可以复制 `./.env.production.example` 再按实际环境改值。
 
+如果你想切到 NewAPI 版配置，只需要把 `.env.local` 里的：
+
+```dotenv
+TENSORZERO_CONFIG_FILE=newapi.toml
+```
+
+然后补好：
+
+```dotenv
+NEWAPI_API_KEY=sk-xxxx
+```
+
+同时直接编辑 `newapi.toml` 里的：
+
+```toml
+api_base = "https://newapi.example.com/v1"
+model_name = "qwen-plus"
+```
+
+把它改成你的 NewAPI 地址和默认模型。
+
 ## 启动方式
 
 推荐直接使用：
@@ -94,14 +120,25 @@ curl http://127.0.0.1:34400/inference `
 
 ## 配置说明
 
-`tensorzero.toml` 当前提供的是一套生产向示例：
+`tensorzero.toml` 当前提供的是一套面向阿里百炼的生产向示例：
 
 - `qwen_plus_prod`：主模型，直连阿里百炼 `qwen-plus`
 - `qwen_flash_fallback`：降级模型，直连阿里百炼 `qwen-flash`
 - `chat_prod`：默认生产聊天函数，优先主模型，失败后顺序降级
 - `retries` + `timeouts`：在 provider 和 variant 两层分别约束重试和超时
 - `rate_limiting.rules`：示例化地展示全局总量保护与按 `tenant_id` 的细粒度限流
 
+`newapi.toml` 则是一个更简洁的 NewAPI 版本：
+
+- `newapi_chat`：单模型模板，通过 `api_base` 指向你的 NewAPI
+- `chat_simple`：最小聊天函数，不带降级和复杂实验路由
+- `retries` + `timeouts`：只保留一层轻量保护，避免配置过重
+
+注意：
+
+- TensorZero 的 OpenAI-compatible provider 需要显式配置 `model_name`，所以 `newapi.toml` 不能像 LiteLLM 一样用单个配置文件真正做任意模型 `*` 透传。
+- 如果你要切到另一个 NewAPI 模型，最简单的做法是直接改 `newapi.toml` 的 `model_name`；如果你要同时支持多个模型，就复制一份 `models.*` 和 `functions.*` 配置块。
+
 注意：
 
 - `tokens_per_hour` 规则要求请求中显式传 `max_tokens`，否则无法对 token 做保守预估。