mudssky
diff --git a/‎ai/gateway/litellm/.env.example‎
Lines changed: 4 additions & 2 deletions b/‎ai/gateway/litellm/.env.example‎
Lines changed: 4 additions & 2 deletions
diff --git a/‎ai/gateway/litellm/.env.production.example‎
Lines changed: 7 additions & 0 deletions b/‎ai/gateway/litellm/.env.production.example‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎ai/gateway/litellm/compose.yaml‎
Lines changed: 6 additions & 6 deletions b/‎ai/gateway/litellm/compose.yaml‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎ai/gateway/litellm/litellm.md‎
Lines changed: 25 additions & 15 deletions b/‎ai/gateway/litellm/litellm.md‎
Lines changed: 25 additions & 15 deletions
diff --git a/‎ai/gateway/litellm/qwen.yaml‎
Lines changed: 44 additions & 0 deletions b/‎ai/gateway/litellm/qwen.yaml‎
Lines changed: 44 additions & 0 deletions
diff --git a/‎ai/gateway/litellm/start.ps1‎
Lines changed: 1 addition & 1 deletion b/‎ai/gateway/litellm/start.ps1‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎ai/gateway/portkey/.env.example‎
Lines changed: 4 additions & 0 deletions b/‎ai/gateway/portkey/.env.example‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎ai/gateway/portkey/.env.production.example‎
Lines changed: 5 additions & 0 deletions b/‎ai/gateway/portkey/.env.production.example‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎ai/gateway/portkey/compose.yaml‎
Lines changed: 7 additions & 0 deletions b/‎ai/gateway/portkey/compose.yaml‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎ai/gateway/portkey/newapi.config.example.json‎
Lines changed: 34 additions & 0 deletions b/‎ai/gateway/portkey/newapi.config.example.json‎
Lines changed: 34 additions & 0 deletions
@@ -1,4 +1,6 @@
-NEWAPI_KEY=sk-xxxx
-NEWAPI_API_BASE=https://example.com
+LITELLM_IMAGE=docker.litellm.ai/berriai/litellm:main-latest
+LITELLM_HOST_PORT=34000
+DASHSCOPE_API_KEY=sk-xxxx
+DASHSCOPE_API_BASE=https://dashscope.aliyuncs.com/compatible-mode/v1
 LITELLM_MASTER_KEY=sk-litellm-123456
 DATABASE_URL=postgresql://postgres:12345678@host.docker.internal:5432/litellm
@@ -0,0 +1,7 @@
+# 生产环境建议固定镜像版本，并通过反向代理暴露 HTTPS。
+LITELLM_IMAGE=docker.litellm.ai/berriai/litellm:main-latest
+LITELLM_HOST_PORT=34000
+DASHSCOPE_API_KEY=sk-prod-xxxx
+DASHSCOPE_API_BASE=https://dashscope.aliyuncs.com/compatible-mode/v1
+LITELLM_MASTER_KEY=sk-litellm-prod-change-me
+DATABASE_URL=postgresql://litellm:change-me@postgres.internal:5432/litellm
@@ -1,21 +1,21 @@
 services:
   litellm:
     container_name: litellm
-    image: docker.litellm.ai/berriai/litellm:main-latest
+    image: ${LITELLM_IMAGE:-docker.litellm.ai/berriai/litellm:main-latest}
     restart: unless-stopped
     ports:
-      - "34000:4000"
+      - "${LITELLM_HOST_PORT:-34000}:4000"
     environment:
       # LiteLLM 官方镜像会把 PORT 透传给内部 uvicorn 启动参数，未显式设置时会报错。
       PORT: "4000"
       # 通过 compose 插值保留当前默认数据库地址；start.ps1 会在存在 .env.local 时传入 --env-file 以支持本地覆盖。
       DATABASE_URL: ${DATABASE_URL:-postgresql://postgres:12345678@host.docker.internal:5432/litellm}
-      # LiteLLM 运行时会从容器环境变量读取这些值，再由 newapi.yaml 中的 os.environ/... 引用。
-      NEWAPI_KEY: ${NEWAPI_KEY:-}
-      NEWAPI_API_BASE: ${NEWAPI_API_BASE:-}
+      # LiteLLM 运行时会从容器环境变量读取这些值，再由 qwen.yaml 中的 os.environ/... 引用。
+      DASHSCOPE_API_KEY: ${DASHSCOPE_API_KEY:-}
+      DASHSCOPE_API_BASE: ${DASHSCOPE_API_BASE:-https://dashscope.aliyuncs.com/compatible-mode/v1}
       LITELLM_MASTER_KEY: ${LITELLM_MASTER_KEY:-}
     volumes:
-      - ./newapi.yaml:/app/config.yaml:ro
+      - ./qwen.yaml:/app/config.yaml:ro
     extra_hosts:
       # 在 Linux Docker Engine 场景补齐宿主机别名，保持默认 DATABASE_URL 可用。
       - "host.docker.internal:host-gateway"
@@ -1,32 +1,40 @@
 # LiteLLM 网关说明
 
-这个目录用于启动一个基于 LiteLLM Proxy 的统一模型网关，当前配置会把客户端传入的模型名透传到 NewAPI。
+这个目录用于启动一个基于 LiteLLM Proxy 的生产级 Qwen 网关，默认直连阿里百炼的 OpenAI 兼容接口。
 
 相关文件职责如下：
 
-- `newapi.yaml`：LiteLLM 代理配置，定义模型透传和 `master_key`。
+- `qwen.yaml`：LiteLLM 生产配置，定义主模型、降级模型、限流、重试和超时策略。
 - `compose.yaml`：LiteLLM 容器模板，定义镜像、端口、挂载和默认环境变量。
 - `start.ps1`：统一入口，封装常用 `docker compose` 操作。
-- `../.env.local`：本地私有环境变量，保存 `NEWAPI_KEY`、`NEWAPI_API_BASE`、`LITELLM_MASTER_KEY`、可选 `DATABASE_URL`。
+- `.env.example`：开发环境变量示例。
+- `.env.production.example`：生产环境变量示例。
+- `.env.local`：本地私有环境变量，保存 `DASHSCOPE_API_KEY`、`DASHSCOPE_API_BASE`、`LITELLM_MASTER_KEY`、可选 `DATABASE_URL`。
 
 ## 环境变量
 
-建议先在 `ai/gateway/.env.local` 中配置以下值：
+建议先在 `ai/gateway/litellm/.env.local` 中配置以下值：
 
 ```dotenv
-NEWAPI_KEY=sk-xxxx
-NEWAPI_API_BASE=https://example.com
+LITELLM_IMAGE=docker.litellm.ai/berriai/litellm:main-latest
+LITELLM_HOST_PORT=34000
+DASHSCOPE_API_KEY=sk-xxxx
+DASHSCOPE_API_BASE=https://dashscope.aliyuncs.com/compatible-mode/v1
 LITELLM_MASTER_KEY=sk-litellm-123456
 DATABASE_URL=postgresql://postgres:12345678@host.docker.internal:5432/litellm
 ```
 
 说明：
 
-- `NEWAPI_KEY`：NewAPI 的访问密钥。
-- `NEWAPI_API_BASE`：NewAPI 的 OpenAI 兼容接口地址。
+- `LITELLM_IMAGE`：LiteLLM 镜像，可用于生产环境固定到经过验证的标签。
+- `LITELLM_HOST_PORT`：宿主机暴露端口，默认 `34000`。
+- `DASHSCOPE_API_KEY`：阿里百炼 API Key。
+- `DASHSCOPE_API_BASE`：阿里百炼 OpenAI 兼容接口地址；中国内地默认可用 `https://dashscope.aliyuncs.com/compatible-mode/v1`。
 - `LITELLM_MASTER_KEY`：LiteLLM Proxy 对外暴露的网关密钥。
 - `DATABASE_URL`：LiteLLM 的数据库连接串；如果未配置，会回退到默认的宿主机 PostgreSQL 地址。
 
+如果你想准备生产环境变量，可直接复制 `./.env.production.example` 再按实际环境改值。
+
 ## 启动方式
 
 推荐直接使用 `start.ps1`：
@@ -38,7 +46,7 @@ DATABASE_URL=postgresql://postgres:12345678@host.docker.internal:5432/litellm
 默认等价于：
 
 ```powershell
-docker compose --env-file ai/gateway/.env.local `
+docker compose --env-file ai/gateway/litellm/.env.local `
   -f ai/gateway/litellm/compose.yaml `
   --project-directory ai/gateway/litellm `
   up -d
@@ -70,7 +78,7 @@ docker compose --env-file ai/gateway/.env.local `
 如果你想直接执行 `docker compose`，建议保持和脚本一致的参数：
 
 ```powershell
-docker compose --env-file ai/gateway/.env.local `
+docker compose --env-file ai/gateway/litellm/.env.local `
   -f ai/gateway/litellm/compose.yaml `
   --project-directory ai/gateway/litellm `
   logs -f litellm
@@ -92,13 +100,15 @@ OpenAI 兼容接口示例：
 curl http://127.0.0.1:34000/v1/chat/completions `
   -H "Content-Type: application/json" `
   -H "Authorization: Bearer sk-litellm-123456" `
-  -d "{\"model\":\"gpt-4o\",\"messages\":[{\"role\":\"user\",\"content\":\"你好\"}]}"
+  -d "{\"model\":\"qwen-chat\",\"messages\":[{\"role\":\"user\",\"content\":\"你好\"}]}"
 ```
 
 ## 配置说明
 
-当前 `newapi.yaml` 的关键点：
+当前 `qwen.yaml` 的关键点：
 
-- `model_name: "*"` 允许客户端传入任意模型名。
-- `model: "openai/{{ model }}"` 会把客户端的模型名透传给下游 NewAPI。
-- `api_base`、`api_key`、`master_key` 都从容器环境变量读取，便于本地通过 `.env.local` 管理敏感值。
+- `qwen-chat`：主模型，默认走百炼 `qwen-plus`。
+- `qwen-chat-fallback`：降级模型，默认走百炼 `qwen-flash`。
+- `rpm`：在部署层做第一道限流保护，避免上游配额被瞬时打爆。
+- `num_retries` + `timeout`：对临时失败、超时和抖动做统一收敛。
+- `fallbacks` + `cooldown_time`：主模型异常时自动切到降级模型，并对异常部署做短暂冷却。
@@ -0,0 +1,44 @@
+model_list:
+  - model_name: qwen-chat
+    litellm_params:
+      # 主力模型默认走阿里百炼的 qwen-plus，优先保证效果。
+      model: "openai/qwen-plus"
+      api_base: "os.environ/DASHSCOPE_API_BASE"
+      api_key: "os.environ/DASHSCOPE_API_KEY"
+      # 使用每分钟请求数作为网关侧的第一层限流保护，实际数值应按百炼配额再调整。
+      rpm: 300
+      timeout: 20
+
+  - model_name: qwen-chat-fallback
+    litellm_params:
+      # 降级模型使用 qwen-flash，在主模型限流或超时时承担兜底流量。
+      model: "openai/qwen-flash"
+      api_base: "os.environ/DASHSCOPE_API_BASE"
+      api_key: "os.environ/DASHSCOPE_API_KEY"
+      rpm: 600
+      timeout: 15
+
+litellm_settings:
+  # 生产环境默认关闭遥测，并在网关层统一控制重试与超时。
+  telemetry: false
+  drop_params: true
+  num_retries: 2
+  request_timeout: 25
+
+router_settings:
+  # 开启前置检查，避免已知不可用的部署继续接流量。
+  enable_pre_call_checks: true
+  # 主模型失败后自动切到降级模型。
+  fallbacks:
+    - qwen-chat:
+        - qwen-chat-fallback
+  # 连续失败后将主模型暂时冷却，避免放大上游故障。
+  allowed_fails: 2
+  cooldown_time: 30
+  num_retries: 2
+  timeout: 20000
+
+general_settings:
+  master_key: "os.environ/LITELLM_MASTER_KEY"
+  proxy_batch_write_at: 60
+  database_connection_pool_limit: 10
@@ -13,7 +13,7 @@ $ErrorActionPreference = 'Stop'
 
 $scriptDir = Split-Path -Parent $MyInvocation.MyCommand.Path
 $composeFile = Join-Path $scriptDir 'compose.yaml'
-$envFile = '.env.local'
+$envFile = Join-Path $scriptDir '.env.local'
 
 function Show-Usage {
     <#
 
@@ -0,0 +1,4 @@
+PORTKEY_IMAGE=portkeyai/gateway:latest
+PORTKEY_HOST_PORT=34200
+NEWAPI_API_BASE=https://newapi.example.com/v1
+NEWAPI_API_KEY=sk-xxxx
@@ -0,0 +1,5 @@
+# 生产环境建议固定镜像版本，并把公网入口交给反向代理或 API 网关。
+PORTKEY_IMAGE=portkeyai/gateway:latest
+PORTKEY_HOST_PORT=34200
+NEWAPI_API_BASE=https://newapi.example.com/v1
+NEWAPI_API_KEY=sk-prod-xxxx
@@ -0,0 +1,7 @@
+services:
+  portkey:
+    container_name: portkey
+    image: ${PORTKEY_IMAGE:-portkeyai/gateway:latest}
+    restart: unless-stopped
+    ports:
+      - "${PORTKEY_HOST_PORT:-34200}:8787"
@@ -0,0 +1,34 @@
+{
+  "strategy": {
+    "mode": "fallback",
+    "on_status_codes": [408, 429, 500, 502, 503, 504]
+  },
+  "request_timeout": 15000,
+  "retry": {
+    "attempts": 2,
+    "on_status_codes": [408, 429, 500, 502, 503, 504],
+    "use_retry_after_headers": true
+  },
+  "targets": [
+    {
+      "provider": "openai",
+      "api_key": "sk-your-newapi-key",
+      "custom_host": "https://newapi-primary.example.com/v1",
+      "override_params": {
+        "model": "qwen-plus",
+        "temperature": 0.3,
+        "max_tokens": 4096
+      }
+    },
+    {
+      "provider": "openai",
+      "api_key": "sk-your-newapi-key",
+      "custom_host": "https://newapi-primary.example.com/v1",
+      "override_params": {
+        "model": "qwen-flash",
+        "temperature": 0.3,
+        "max_tokens": 4096
+      }
+    }
+  ]
+}