Skip to content

Commit 70d86ce

Browse files
authored
feat(ai-proxy): add per-protocol request_body override and rename max_tokens mapping to llm_options (#13269)
1 parent 546518b commit 70d86ce

10 files changed

Lines changed: 494 additions & 103 deletions

File tree

apisix/plugins/ai-protocols/init.lua

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@
2222

2323
local converters = require("apisix.plugins.ai-protocols.converters")
2424
local ipairs = ipairs
25+
local pairs = pairs
26+
local table = table
2527

2628
local _M = {}
2729

@@ -66,6 +68,18 @@ end
6668

6769

6870

71+
--- Get the list of all registered protocol names.
72+
-- @return table Array of protocol names
73+
function _M.names()
74+
local names = {}
75+
for name in pairs(registered) do
76+
names[#names + 1] = name
77+
end
78+
table.sort(names)
79+
return names
80+
end
81+
82+
6983
--- Find a converter that can bridge from client_protocol to a protocol
7084
-- supported by the driver. Delegates to the converters registry.
7185
-- @param client_protocol string The detected client protocol

apisix/plugins/ai-providers/base.lua

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ local transport_http = require("apisix.plugins.ai-transport.http")
3636
local transport_auth = require("apisix.plugins.ai-transport.auth")
3737
local log_sanitize = require("apisix.utils.log-sanitize")
3838
local protocols = require("apisix.plugins.ai-protocols")
39+
local deep_merge = require("apisix.plugins.ai-proxy.merge").deep_merge
3940
local ngx = ngx
4041
local ngx_now = ngx.now
4142
local tonumber = tonumber
@@ -183,12 +184,21 @@ function _M.build_request(self, ctx, conf, request_body, opts)
183184
end
184185
end
185186

186-
-- Apply request body override via provider capability hook
187-
if opts.override_request_body then
187+
-- Apply llm_options via provider capability hook (always force-overwrites)
188+
if opts.override_llm_options then
188189
local cap = self.capabilities and self.capabilities[ctx.ai_target_protocol]
189190
if cap and cap.rewrite_request_body then
190-
cap.rewrite_request_body(request_body, opts.override_request_body,
191-
opts.request_body_force_override)
191+
cap.rewrite_request_body(request_body, opts.override_llm_options, true)
192+
end
193+
end
194+
195+
-- Apply per-target-protocol request body override (deep merge)
196+
if opts.request_body_override_map then
197+
local patch = opts.request_body_override_map[ctx.ai_target_protocol]
198+
if patch then
199+
core.log.info("applying request_body override for target protocol '",
200+
ctx.ai_target_protocol, "'")
201+
request_body = deep_merge(request_body, patch, opts.request_body_force_override)
192202
end
193203
end
194204
params.body = request_body

apisix/plugins/ai-proxy/base.lua

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,9 @@ function _M.before_proxy(conf, ctx, on_error)
125125
model_options = ai_instance.options,
126126
conf = ai_instance.provider_conf or {},
127127
auth = ai_instance.auth,
128-
override_request_body =
128+
override_llm_options =
129+
core.table.try_read_attr(ai_instance, "override", "llm_options"),
130+
request_body_override_map =
129131
core.table.try_read_attr(ai_instance, "override", "request_body"),
130132
request_body_force_override =
131133
core.table.try_read_attr(ai_instance, "override", "request_body_force_override"),

apisix/plugins/ai-proxy/merge.lua

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
--
2+
-- Licensed to the Apache Software Foundation (ASF) under one or more
3+
-- contributor license agreements. See the NOTICE file distributed with
4+
-- this work for additional information regarding copyright ownership.
5+
-- The ASF licenses this file to You under the Apache License, Version 2.0
6+
-- (the "License"); you may not use this file except in compliance with
7+
-- the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing, software
12+
-- distributed under the License is distributed on an "AS IS" BASIS,
13+
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
-- See the License for the specific language governing permissions and
15+
-- limitations under the License.
16+
--
17+
18+
--- Deep-merge helper for ai-proxy request body overrides.
19+
-- Semantics:
20+
-- * Both sides are plain objects (string-keyed tables) -> recursive merge.
21+
-- * Otherwise (scalar, array, type mismatch, cjson.empty_array/empty_object)
22+
-- -> patch value replaces target value wholesale.
23+
-- This matches RFC 7396 JSON Merge Patch minus null-deletion.
24+
25+
local core = require("apisix.core")
26+
local pairs = pairs
27+
local next = next
28+
local type = type
29+
local getmetatable = getmetatable
30+
31+
local _M = {}
32+
33+
34+
-- Returns true when tbl is a plain object (string keys only, or empty) that
35+
-- we should recurse into. Arrays (cjson array_mt) and cjson sentinels are
36+
-- treated as "replace wholesale".
37+
local function is_plain_object(tbl)
38+
if type(tbl) ~= "table" then
39+
return false
40+
end
41+
local mt = getmetatable(tbl)
42+
if mt == core.json.array_mt then
43+
return false
44+
end
45+
local k = next(tbl)
46+
if k == nil then
47+
return true
48+
end
49+
return type(k) == "string"
50+
end
51+
52+
53+
local function deep_merge(target, patch, force)
54+
if not is_plain_object(patch) then
55+
return patch
56+
end
57+
if not is_plain_object(target) then
58+
-- target is not an object but patch is; patch wins only if force or
59+
-- target is nil (which the caller handles)
60+
return patch
61+
end
62+
for k, v in pairs(patch) do
63+
if is_plain_object(v) and is_plain_object(target[k]) then
64+
-- Both sides are objects: always recurse regardless of force
65+
deep_merge(target[k], v, force)
66+
elseif target[k] == nil or force then
67+
target[k] = v
68+
end
69+
end
70+
return target
71+
end
72+
_M.deep_merge = deep_merge
73+
74+
75+
return _M

apisix/plugins/ai-proxy/schema.lua

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
--
1717
local schema_def = require("apisix.schema_def")
1818
local ai_providers_schema = require("apisix.plugins.ai-providers.schema")
19+
local protocols = require("apisix.plugins.ai-protocols")
20+
local ipairs = ipairs
1921

2022
local _M = {}
2123

@@ -72,33 +74,59 @@ local model_options_schema = {
7274
additionalProperties = true,
7375
}
7476

77+
-- Build per-target-protocol request body override schema.
78+
-- Each registered protocol gets an optional "any-shape object" entry.
79+
local request_body_override_properties = {}
80+
for _, proto_name in ipairs(protocols.names()) do
81+
request_body_override_properties[proto_name] = {
82+
type = "object",
83+
description = "Deep-merged into the outgoing request body when the "
84+
.. "target protocol is '" .. proto_name .. "'.",
85+
additionalProperties = true,
86+
}
87+
end
88+
89+
local request_body_override_schema = {
90+
type = "object",
91+
description = "Per target-protocol request body overrides. Keys are target "
92+
.. "protocol names; values are partial request bodies that are "
93+
.. "deep-merged into the outgoing body (objects merged recursively, "
94+
.. "arrays and scalars replaced wholesale).",
95+
properties = request_body_override_properties,
96+
additionalProperties = false,
97+
}
98+
99+
local llm_options_schema = {
100+
type = "object",
101+
properties = {
102+
max_tokens = {
103+
type = "integer",
104+
minimum = 1,
105+
description = "Maximum number of output tokens. APISIX automatically "
106+
.. "maps this to the correct field name for the target provider "
107+
.. "(e.g. max_completion_tokens for OpenAI, max_output_tokens "
108+
.. "for Responses API). Always force-overwrites the client value.",
109+
},
110+
},
111+
additionalProperties = false,
112+
}
113+
75114
local override_schema = {
76115
type = "object",
77116
properties = {
78117
endpoint = {
79118
type = "string",
80119
description = "To be specified to override the endpoint of the AI Instance",
81120
},
82-
request_body = {
83-
type = "object",
84-
properties = {
85-
max_tokens = {
86-
type = "integer",
87-
minimum = 1,
88-
description = "Maximum number of output tokens. APISIX automatically "
89-
.. "maps this to the correct field name for the target provider "
90-
.. "(e.g. max_completion_tokens for OpenAI, max_output_tokens "
91-
.. "for Responses API).",
92-
},
93-
},
94-
additionalProperties = false,
95-
},
121+
llm_options = llm_options_schema,
122+
request_body = request_body_override_schema,
96123
request_body_force_override = {
97124
type = "boolean",
98125
default = false,
99126
description = "When false (default), client request body fields take "
100-
.. "priority and override values only fill in missing fields. "
101-
.. "When true, override values forcefully overwrite client fields.",
127+
.. "priority and request_body override values only fill in "
128+
.. "missing fields. When true, request_body override values "
129+
.. "forcefully overwrite client fields.",
102130
},
103131
},
104132
}

docs/en/latest/plugins/ai-proxy-multi.md

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -81,9 +81,10 @@ In addition, the Plugin also supports logging LLM request information in the acc
8181
| logging.payloads | boolean | False | false | | If true, log request and response payload. |
8282
| instances.override | object | False | | | Override setting. |
8383
| instances.override.endpoint | string | False | | | LLM provider endpoint to replace the default endpoint with. If not configured, the Plugin uses the default OpenAI endpoint `https://api.openai.com/v1/chat/completions`. |
84-
| instances.override.request_body | object | False | | | Request body overrides. See [Provider-aware `max_tokens` mapping](./ai-proxy.md#provider-aware-max_tokens-mapping) in the `ai-proxy` documentation for how the contained fields are forwarded to each provider. |
85-
| instances.override.request_body.max_tokens | integer | False | | ≥ 1 | Maximum number of output tokens. APISIX automatically maps this to the provider-specific field name (e.g. `max_completion_tokens` for OpenAI Chat Completions, `max_output_tokens` for OpenAI Responses API, `max_tokens` for most other providers). By default, client request fields take priority and the override value only fills in when the client did not set it; set `instances.override.request_body_force_override` to `true` to forcefully overwrite the client value. |
86-
| instances.override.request_body_force_override | boolean | False | false | | When `false` (default), client request body fields take priority and `instances.override.request_body` values only fill in missing fields. When `true`, `instances.override.request_body` values forcefully overwrite client request body fields. |
84+
| instances.override.llm_options | object | False | | | Provider-aware LLM options. See [Provider-aware `max_tokens` mapping](./ai-proxy.md#provider-aware-max_tokens-mapping) in the `ai-proxy` documentation. |
85+
| instances.override.llm_options.max_tokens | integer | False | | ≥ 1 | Maximum number of output tokens. APISIX automatically maps this to the provider-specific field name. Always force-overwrites the client value. |
86+
| instances.override.request_body | object | False | | | Per target-protocol request body overrides. See [Per-protocol request body override](./ai-proxy.md#per-protocol-request-body-override) in the `ai-proxy` documentation. |
87+
| instances.override.request_body_force_override | boolean | False | false | | When `false` (default), client request body fields take priority and `instances.override.request_body` values only fill in missing fields. When `true`, `instances.override.request_body` values forcefully overwrite client fields. Does not affect `instances.override.llm_options`. |
8788
| instances.checks | object | False | | | Health check configurations. Note that at the moment, OpenAI, DeepSeek, and AIMLAPI do not provide an official health check endpoint. Other LLM services that you can configure under `openai-compatible` provider may have available health check endpoints. |
8889
| instances.checks.active | object | True | | | Active health check configurations. |
8990
| instances.checks.active.type | string | False | http | [http, https, tcp] | Type of health check connection. |

docs/en/latest/plugins/ai-proxy.md

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,10 @@ In addition, the Plugin also supports logging LLM request information in the acc
6666
| options.model | string | False | | | Name of the LLM model, such as `gpt-4` or `gpt-3.5`. Refer to the LLM provider's API documentation for available models. |
6767
| override | object | False | | | Override setting. |
6868
| override.endpoint | string | False | | | Custom LLM provider endpoint, required when `provider` is `openai-compatible`. |
69-
| override.request_body | object | False | | | Request body overrides. See [Provider-aware `max_tokens` mapping](#provider-aware-max_tokens-mapping) for how the contained fields are forwarded to each provider. |
70-
| override.request_body.max_tokens | integer | False | | ≥ 1 | Maximum number of output tokens. APISIX automatically maps this to the provider-specific field name (e.g. `max_completion_tokens` for OpenAI Chat Completions, `max_output_tokens` for OpenAI Responses API, `max_tokens` for most other providers). By default, client request fields take priority and the override value only fills in when the client did not set it; set `override.request_body_force_override` to `true` to forcefully overwrite the client value. |
71-
| override.request_body_force_override | boolean | False | false | | When `false` (default), client request body fields take priority and `override.request_body` values only fill in missing fields. When `true`, `override.request_body` values forcefully overwrite client request body fields. |
69+
| override.llm_options | object | False | | | Provider-aware LLM options. See [Provider-aware `max_tokens` mapping](#provider-aware-max_tokens-mapping). |
70+
| override.llm_options.max_tokens | integer | False | | ≥ 1 | Maximum number of output tokens. APISIX automatically maps this to the provider-specific field name (e.g. `max_completion_tokens` for OpenAI Chat Completions, `max_output_tokens` for OpenAI Responses API, `max_tokens` for most other providers). Always force-overwrites the client value. |
71+
| override.request_body | object | False | | | Per target-protocol request body overrides. Keys are target protocol names (`openai-chat`, `openai-responses`, `openai-embeddings`, `anthropic-messages`); values are partial request bodies that are deep-merged into the outgoing body (objects merged recursively, arrays and scalars replaced wholesale). See [Per-protocol request body override](#per-protocol-request-body-override). |
72+
| override.request_body_force_override | boolean | False | false | | When `false` (default), client request body fields take priority and `override.request_body` values only fill in missing fields. When `true`, `override.request_body` values forcefully overwrite client fields. Does not affect `override.llm_options`, which always force-overwrites. |
7273
| logging | object | False | | | Logging configurations. Does not affect `error.log`. |
7374
| logging.summaries | boolean | False | false | | If true, logs request LLM model, duration, request, and response tokens. |
7475
| logging.payloads | boolean | False | false | | If true, logs request and response payload. |
@@ -82,7 +83,7 @@ In addition, the Plugin also supports logging LLM request information in the acc
8283

8384
## Provider-aware `max_tokens` mapping
8485

85-
LLM providers and API endpoints disagree on the field name used to cap the number of output tokens. Configuring `override.request_body.max_tokens` lets you set a single value in APISIX and have it forwarded under the field name expected by each provider/endpoint.
86+
LLM providers and API endpoints disagree on the field name used to cap the number of output tokens. Configuring `override.llm_options.max_tokens` lets you set a single value in APISIX and have it forwarded under the field name expected by each provider/endpoint. `llm_options` always force-overwrites the client value.
8687

8788
The table below shows, for each `provider` and target API endpoint, the upstream field name APISIX rewrites `max_tokens` to. A `` means the provider does not expose that endpoint.
8889

@@ -100,10 +101,21 @@ The table below shows, for each `provider` and target API endpoint, the upstream
100101

101102
¹ When `provider` is `openai` and the target is the Chat Completions endpoint, APISIX always rewrites to `max_completion_tokens` and removes any `max_tokens` field from the request body — `max_tokens` has been deprecated in favor of `max_completion_tokens` by OpenAI.
102103

104+
## Per-protocol request body override
105+
106+
`override.request_body` provides fine-grained, per-protocol control over the outgoing request body. Keys are target protocol names (`openai-chat`, `openai-responses`, `openai-embeddings`, `anthropic-messages`); values are partial JSON objects that are deep-merged into the outgoing body after protocol conversion.
107+
108+
Merge semantics:
109+
110+
- Both sides are plain objects (string-keyed) → recursive merge.
111+
- Otherwise (scalar, array, type mismatch) → patch value replaces target value wholesale.
112+
103113
Priority between client request and override is controlled by `override.request_body_force_override`:
104114

105-
- `false` (default): if the client request body already sets the corresponding field, it is preserved; the override value only fills in when the field is missing.
106-
- `true`: the override value forcefully overwrites the field in the client request body.
115+
- `false` (default): if the client request body already sets the field, it is preserved; the override value only fills in when the field is missing.
116+
- `true`: the override value forcefully overwrites the client field.
117+
118+
When both `llm_options` and `request_body` are configured, `llm_options` is applied first (always force), then `request_body` deep-merges on top. This means `request_body` can override fields set by `llm_options`.
107119

108120
## Examples
109121

0 commit comments

Comments
 (0)