cli-proxy-api-plus/config.example.yaml at main · anschmieg/cli-proxy-api-plus · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
# Server host/interface to bind to. Default is empty ("") to bind all interfaces (IPv4 + IPv6).
# Use "127.0.0.1" or "localhost" to restrict access to local machine only.
host: ""

# Server port
port: 8317

# TLS settings for HTTPS. When enabled, the server listens with the provided certificate and key.
tls:
  enable: false
  cert: ""
  key: ""

# Management API settings
remote-management:
  # Whether to allow remote (non-localhost) management access.
  # When false, only localhost can access management endpoints (a key is still required).
  allow-remote: false

  # Management key. If a plaintext value is provided here, it will be hashed on startup.
  # All management requests (even from localhost) require this key.
  # Leave empty to disable the Management API entirely (404 for all /v0/management routes).
  secret-key: ""

  # Disable the bundled management control panel asset download and HTTP route when true.
  disable-control-panel: false

  # GitHub repository for the management control panel. Accepts a repository URL or releases API URL.
  panel-github-repository: "https://github.com/router-for-me/Cli-Proxy-API-Management-Center"

# Authentication directory (supports ~ for home directory)
auth-dir: "~/.cli-proxy-api"

# API keys for authentication
api-keys:
  - "your-api-key-1"
  - "your-api-key-2"
  - "your-api-key-3"

# Enable debug logging
debug: false

# When true, disable high-overhead HTTP middleware features to reduce per-request memory usage under high concurrency.
commercial-mode: false

# Open OAuth URLs in incognito/private browser mode.
# Useful when you want to login with a different account without logging out from your current session.
# Default: false (but Kiro auth defaults to true for multi-account support)
incognito-browser: true

# When true, write application logs to rotating files instead of stdout
logging-to-file: false

# Maximum total size (MB) of log files under the logs directory. When exceeded, the oldest log
# files are deleted until within the limit. Set to 0 to disable.
logs-max-total-size-mb: 0

# When false, disable in-memory usage statistics aggregation
usage-statistics-enabled: false

# Proxy URL. Supports socks5/http/https protocols. Example: socks5://user:pass@192.168.1.1:1080/
proxy-url: ""

# When true, unprefixed model requests only use credentials without a prefix (except when prefix == model name).
force-model-prefix: false

# Number of times to retry a request. Retries will occur if the HTTP response code is 403, 408, 500, 502, 503, or 504.
request-retry: 3

# Maximum wait time in seconds for a cooled-down credential before triggering a retry.
max-retry-interval: 30

# Quota exceeded behavior
quota-exceeded:
  switch-project: true # Whether to automatically switch to another project when a quota is exceeded
  switch-preview-model: true # Whether to automatically switch to a preview model when a quota is exceeded

# Routing strategy for selecting credentials when multiple match.
routing:
  strategy: "round-robin" # round-robin (default), fill-first

# Knowledge base (RAG) settings.
# knowledge:
#   enabled: true
#   qdrant:
#     url: "http://localhost:6333"
#     collection: "ai_gateway_knowledge"
#     vector-size: 1536
#     distance: "Cosine"
#     auto-create: true
#     timeout-seconds: 10
#   embeddings:
#     base-url: "https://api.openai.com/v1"
#     api-key: "sk-your-embedding-key"
#     api-key-header: "Authorization"
#     api-key-prefix: "Bearer"
#     model: "text-embedding-3-small"
#     timeout-seconds: 10
#   search:
#     limit: 5
#     min-score: 0.65
#     max-context-chars: 2500

# Profiles (system prompts, tool policy, knowledge base, MCP allowlist).
# profiles:
#   - id: "rag-bot"
#     name: "RAG Bot"
#     description: "Uses project knowledge base and a custom system prompt"
#     default-model: "gemini-2.5-flash"
#     system-prompt: "You are a helpful assistant."
#     knowledge-base: "project-foo"
#     tools:
#       enabled: true
#       allowlist:
#         - "web_search"
#     mcp-servers:
#       - "local-tools"

# MCP servers (local spawn or remote SSE).
# mcp-servers:
#   - id: "local-tools"
#     name: "Local Tools"
#     type: "local" # local | sse
#     command: "/usr/local/bin/mcp-server"
#     args:
#       - "--port=9010"
#     env:
#       MCP_ENV: "prod"
#     enabled: true
#   - id: "remote-tools"
#     name: "Remote Tools"
#     type: "sse"
#     url: "https://mcp.example.com/sse"
#     enabled: true

# Tool mappings (Anthropic tool types or tool names -> MCP tool).
# tool-mappings:
#   - anthropic-tool-type: "web_search_20250305"
#     tool-name: "web_search"
#     mcp-server-id: "remote-tools"
#     mcp-tool-name: "search"

# When true, enable authentication for the WebSocket API (/v1/ws).
ws-auth: false

# When > 0, emit blank lines every N seconds for non-streaming responses to prevent idle timeouts.
nonstream-keepalive-interval: 0

# Streaming behavior (SSE keep-alives + safe bootstrap retries).
# streaming:
#   keepalive-seconds: 15   # Default: 0 (disabled). <= 0 disables keep-alives.
#   bootstrap-retries: 1    # Default: 0 (disabled). Retries before first byte is sent.

# When true, enable official Codex instructions injection for Codex API requests.
# When false (default), CodexInstructionsForModel returns immediately without modification.
codex-instructions-enabled: false

# Gemini API keys
# gemini-api-key:
#   - api-key: "AIzaSy...01"
#     prefix: "test" # optional: require calls like "test/gemini-3-pro-preview" to target this credential
#     base-url: "https://generativelanguage.googleapis.com"
#     headers:
#       X-Custom-Header: "custom-value"
#     proxy-url: "socks5://proxy.example.com:1080"
#     models:
#       - name: "gemini-2.5-flash" # upstream model name
#         alias: "gemini-flash"    # client alias mapped to the upstream model
#     excluded-models:
#       - "gemini-2.5-pro"     # exclude specific models from this provider (exact match)
#       - "gemini-2.5-*"       # wildcard matching prefix (e.g. gemini-2.5-flash, gemini-2.5-pro)
#       - "*-preview"          # wildcard matching suffix (e.g. gemini-3-pro-preview)
#       - "*flash*"            # wildcard matching substring (e.g. gemini-2.5-flash-lite)
#   - api-key: "AIzaSy...02"

# Codex API keys
# codex-api-key:
#   - api-key: "sk-atSM..."
#     prefix: "test" # optional: require calls like "test/gpt-5-codex" to target this credential
#     base-url: "https://www.example.com" # use the custom codex API endpoint
#     headers:
#       X-Custom-Header: "custom-value"
#     proxy-url: "socks5://proxy.example.com:1080" # optional: per-key proxy override
#     models:
#       - name: "gpt-5-codex"   # upstream model name
#         alias: "codex-latest" # client alias mapped to the upstream model
#     excluded-models:
#       - "gpt-5.1"         # exclude specific models (exact match)
#       - "gpt-5-*"         # wildcard matching prefix (e.g. gpt-5-medium, gpt-5-codex)
#       - "*-mini"          # wildcard matching suffix (e.g. gpt-5-codex-mini)
#       - "*codex*"         # wildcard matching substring (e.g. gpt-5-codex-low)

# Claude API keys
# claude-api-key:
#   - api-key: "sk-atSM..." # use the official claude API key, no need to set the base url
#   - api-key: "sk-atSM..."
#     prefix: "test" # optional: require calls like "test/claude-sonnet-latest" to target this credential
#     base-url: "https://www.example.com" # use the custom claude API endpoint
#     headers:
#       X-Custom-Header: "custom-value"
#     proxy-url: "socks5://proxy.example.com:1080" # optional: per-key proxy override
#     models:
#       - name: "claude-3-5-sonnet-20241022" # upstream model name
#         alias: "claude-sonnet-latest"      # client alias mapped to the upstream model
#     excluded-models:
#       - "claude-opus-4-5-20251101" # exclude specific models (exact match)
#       - "claude-3-*"               # wildcard matching prefix (e.g. claude-3-7-sonnet-20250219)
#       - "*-thinking"               # wildcard matching suffix (e.g. claude-opus-4-5-thinking)
#       - "*haiku*"                  # wildcard matching substring (e.g. claude-3-5-haiku-20241022)
#     cloak:                         # optional: request cloaking for non-Claude-Code clients
#       mode: "auto"                 # "auto" (default): cloak only when client is not Claude Code
#                                    # "always": always apply cloaking
#                                    # "never": never apply cloaking
#       strict-mode: false           # false (default): prepend Claude Code prompt to user system messages
#                                    # true: strip all user system messages, keep only Claude Code prompt
#       sensitive-words:             # optional: words to obfuscate with zero-width characters
#         - "API"
#         - "proxy"

# Kiro (AWS CodeWhisperer) configuration
# Note: Kiro API currently only operates in us-east-1 region
#kiro:
#  - token-file: "~/.aws/sso/cache/kiro-auth-token.json" # path to Kiro token file
#    agent-task-type: "" # optional: "vibe" or empty (API default)
#  - access-token: "aoaAAAAA..." # or provide tokens directly
#    refresh-token: "aorAAAAA..."
#    profile-arn: "arn:aws:codewhisperer:us-east-1:..."
#    proxy-url: "socks5://proxy.example.com:1080" # optional: proxy override

# OpenAI compatibility providers
# openai-compatibility:
#   - name: "openrouter" # The name of the provider; it will be used in the user agent and other places.
#     prefix: "test" # optional: require calls like "test/kimi-k2" to target this provider's credentials
#     base-url: "https://openrouter.ai/api/v1" # The base URL of the provider.
#     headers:
#       X-Custom-Header: "custom-value"
#     api-key-entries:
#       - api-key: "sk-or-v1-...b780"
#         proxy-url: "socks5://proxy.example.com:1080" # optional: per-key proxy override
#       - api-key: "sk-or-v1-...b781" # without proxy-url
#     models: # The models supported by the provider.
#       - name: "moonshotai/kimi-k2:free" # The actual model name.
#         alias: "kimi-k2" # The alias used in the API.

# Vertex API keys (Vertex-compatible endpoints, use API key + base URL)
# vertex-api-key:
#   - api-key: "vk-123..."                        # x-goog-api-key header
#     prefix: "test"                              # optional: require calls like "test/vertex-pro" to target this credential
#     base-url: "https://example.com/api"         # e.g. https://zenmux.ai/api
#     proxy-url: "socks5://proxy.example.com:1080" # optional per-key proxy override
#     headers:
#       X-Custom-Header: "custom-value"
#     models:                                     # optional: map aliases to upstream model names
#       - name: "gemini-2.5-flash"                # upstream model name
#         alias: "vertex-flash"                   # client-visible alias
#       - name: "gemini-2.5-pro"
#         alias: "vertex-pro"

# Amp Integration
# ampcode:
#   # Configure upstream URL for Amp CLI OAuth and management features
#   upstream-url: "https://ampcode.com"
#   # Optional: Override API key for Amp upstream (otherwise uses env or file)
#   upstream-api-key: ""
#   # Per-client upstream API key mapping
#   # Maps client API keys (from top-level api-keys) to different Amp upstream API keys.
#   # Useful when different clients need to use different Amp accounts/quotas.
#   # If a client key isn't mapped, falls back to upstream-api-key (default behavior).
#   upstream-api-keys:
#     - upstream-api-key: "amp_key_for_team_a"    # Upstream key to use for these clients
#       api-keys:                                 # Client keys that use this upstream key
#         - "your-api-key-1"
#         - "your-api-key-2"
#     - upstream-api-key: "amp_key_for_team_b"
#       api-keys:
#         - "your-api-key-3"
#   # Restrict Amp management routes (/api/auth, /api/user, etc.) to localhost only (default: false)
#   restrict-management-to-localhost: false
#   # Force model mappings to run before checking local API keys (default: false)
#   force-model-mappings: false
#   # Amp Model Mappings
#   # Route unavailable Amp models to alternative models available in your local proxy.
#   # Useful when Amp CLI requests models you don't have access to (e.g., Claude Opus 4.5)
#   # but you have a similar model available (e.g., Claude Sonnet 4).
#   model-mappings:
#     - from: "claude-opus-4-5-20251101"          # Model requested by Amp CLI
#       to: "gemini-claude-opus-4-5-thinking"     # Route to this available model instead
#     - from: "claude-sonnet-4-5-20250929"
#       to: "gemini-claude-sonnet-4-5-thinking"
#     - from: "claude-haiku-4-5-20251001"
#       to: "gemini-2.5-flash"

# Global OAuth model name aliases (per channel)
# These aliases rename model IDs for both model listing and request routing.
# Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot.
# NOTE: Aliases do not apply to gemini-api-key, codex-api-key, claude-api-key, openai-compatibility, vertex-api-key, or ampcode.
# You can repeat the same name with different aliases to expose multiple client model names.
oauth-model-alias:
  # Expose official Anthropic model IDs while routing to Kiro-backed models.
  kiro:
    # Official non-dated IDs
    - name: "kiro-claude-sonnet-4-5"
      alias: "claude-sonnet-4.5"
      fork: true
    - name: "kiro-claude-sonnet-4-5"
      alias: "claude-sonnet-4-5"
      fork: true
    - name: "kiro-claude-haiku-4-5"
      alias: "claude-haiku-4.5"
      fork: true
    - name: "kiro-claude-haiku-4-5"
      alias: "claude-haiku-4-5"
      fork: true
    - name: "kiro-claude-sonnet-4-5"
      alias: "claude-3-5-sonnet"
      fork: true
    - name: "kiro-claude-haiku-4-5"
      alias: "claude-3-5-haiku"
      fork: true
    - name: "kiro-claude-sonnet-4-5"
      alias: "claude-3-5-sonnet-20241022"
      fork: true
    - name: "kiro-claude-haiku-4-5"
      alias: "claude-3-5-haiku-20241022"
      fork: true

# OAuth provider excluded models
# Supported channels: gemini-cli, vertex, aistudio, antigravity, claude, codex, qwen, iflow, kiro, github-copilot.
# oauth-excluded-models:
#   gemini-cli:
#     - "gemini-2.5-pro"     # exclude specific models (exact match)
#     - "gemini-2.5-*"       # wildcard matching prefix (e.g. gemini-2.5-flash, gemini-2.5-pro)
#     - "*-preview"          # wildcard matching suffix (e.g. gemini-3-pro-preview)
#     - "*flash*"            # wildcard matching substring (e.g. gemini-2.5-flash-lite)
#   vertex:
#     - "gemini-3-pro-preview"
#   aistudio:
#     - "gemini-3-pro-preview"
#   antigravity:
#     - "gemini-3-pro-preview"
#   claude:
#     - "claude-3-5-haiku-20241022"
#   codex:
#     - "gpt-5-codex-mini"
#   qwen:
#     - "vision-model"
#   iflow:
#     - "tstars2.0"
#   kiro:
#     - "kiro-claude-haiku-4-5"
#   github-copilot:
#     - "raptor-mini"

# Optional payload configuration
# payload:
#   default: # Default rules only set parameters when they are missing in the payload.
#     - models:
#         - name: "gemini-2.5-pro" # Supports wildcards (e.g., "gemini-*")
#           protocol: "gemini" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
#       params: # JSON path (gjson/sjson syntax) -> value
#         "generationConfig.thinkingConfig.thinkingBudget": 32768
#   default-raw: # Default raw rules set parameters using raw JSON when missing (must be valid JSON).
#     - models:
#         - name: "gemini-2.5-pro" # Supports wildcards (e.g., "gemini-*")
#           protocol: "gemini" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
#       params: # JSON path (gjson/sjson syntax) -> raw JSON value (strings are used as-is, must be valid JSON)
#         "generationConfig.responseJsonSchema": "{\"type\":\"object\",\"properties\":{\"answer\":{\"type\":\"string\"}}}"
#   override: # Override rules always set parameters, overwriting any existing values.
#     - models:
#         - name: "gpt-*" # Supports wildcards (e.g., "gpt-*")
#           protocol: "codex" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
#       params: # JSON path (gjson/sjson syntax) -> value
#         "reasoning.effort": "high"
#   override-raw: # Override raw rules always set parameters using raw JSON (must be valid JSON).
#     - models:
#         - name: "gpt-*" # Supports wildcards (e.g., "gpt-*")
#           protocol: "codex" # restricts the rule to a specific protocol, options: openai, gemini, claude, codex
#       params: # JSON path (gjson/sjson syntax) -> raw JSON value (strings are used as-is, must be valid JSON)
#         "response_format": "{\"type\":\"json_schema\",\"json_schema\":{\"name\":\"answer\",\"schema\":{\"type\":\"object\"}}}"

# Model Pooling & Clustering
# Two-level system for aggregating models across providers under virtual model IDs.
#
# Level 1 — Pools: Route the same logical model from multiple providers under one virtual ID.
#   Useful when you have the same model available via different subscriptions/providers.
#   The gateway tries members in priority order and fails over on 429/503 errors.
#
# Level 2 — Clusters: Group multiple pools (or raw model IDs) under a semantic name.
#   Use this to expose intent-based model IDs like "coding-high", "coding-fast", etc.
#
# Strategies:
#   - "round-robin" (default for pools): rotate across members on each request
#   - "priority" (default for clusters): try members in order, fail over on exhaustion
#
# model-pools:
#   enabled: true
#   default-strategy: round-robin
#
#   # Level 1: Pools — same model, multiple providers
#   pools:
#     - id: "claude-sonnet-4"           # Virtual model ID exposed to clients
#       strategy: "priority"            # Override default strategy for this pool
#       members:
#         - model: "kiro-claude-sonnet-4-5"        # Actual model name sent to provider
#           provider: "kiro"                        # Provider type
#           priority: 0                             # Lower = preferred (used with "priority" strategy)
#           weight: 2                               # Higher = more requests (used with "round-robin")
#         - model: "claude-sonnet-4-20250514"
#           provider: "claude"
#           priority: 1
#
#     - id: "gpt-4.1"
#       members:
#         - model: "gpt-4.1"
#           provider: "codex"
#
#   # Level 2: Clusters — semantic names grouping multiple pools
#   clusters:
#     - id: "coding-high"               # Semantic model ID exposed to clients
#       description: "Best available model for complex coding tasks"
#       strategy: "priority"
#       members:
#         - pool: "claude-sonnet-4"     # Reference a Level 1 pool by ID
#           priority: 0
#         - pool: "gpt-4.1"
#           priority: 1
#
#     - id: "coding-fast"
#       description: "Fast, cost-efficient model for simple coding tasks"
#       members:
#         - model: "kiro-claude-haiku-4-5"    # Can reference raw models directly
#         - model: "gpt-4.1-mini"
#
#     - id: "chat"
#       description: "General-purpose conversational model"
#       members:
#         - pool: "claude-sonnet-4"
#
#     - id: "reasoning"
#       description: "Models optimized for complex reasoning and analysis"
#       members:
#         - model: "claude-sonnet-4-20250514(64000)"   # Supports thinking suffixes
#         - model: "gpt-4.1"