Skip to content

Commit 4775bfc

Browse files
committed
feat(ai-cache): enhance error handling for unsupported requests and improve cache key generation
1 parent 6f15de7 commit 4775bfc

6 files changed

Lines changed: 215 additions & 7 deletions

File tree

apisix/plugins/ai-cache.lua

Lines changed: 18 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
local core = require("apisix.core")
1919
local schema = require("apisix.plugins.ai-cache.schema")
2020
local key_mod = require("apisix.plugins.ai-cache.key")
21+
local binding = require("apisix.plugins.ai-protocols.binding")
2122
local redis_util = require("apisix.utils.redis")
2223

2324
local ngx = ngx
@@ -65,6 +66,18 @@ end
6566

6667

6768
function _M.access(conf, ctx)
69+
if not ctx.picked_ai_instance then
70+
local handled, code, body = binding.on_unsupported(
71+
conf.fail_mode, _M.name, ctx,
72+
"no ai instance picked (request did not pass through ai-proxy/ai-proxy-multi)",
73+
500, "ai-cache must be used with the ai-proxy or ai-proxy-multi plugin")
74+
if handled then
75+
return code, body
76+
end
77+
ctx.ai_cache_status = "BYPASS"
78+
return
79+
end
80+
6881
-- Streaming responses are not cached in PR-1 (SSE replay is a later
6982
-- increment). ai-proxy (higher priority) has already classified the
7083
-- request, so bypass before doing any work.
@@ -89,8 +102,8 @@ function _M.access(conf, ctx)
89102
return
90103
end
91104

92-
ctx.ai_cache_key = "ai-cache:l1:" .. key_mod.scope(conf, ctx)
93-
.. ":" .. key_mod.fingerprint(ctx, body)
105+
ctx.ai_cache_fingerprint = key_mod.fingerprint(ctx, body)
106+
ctx.ai_cache_key = key_mod.build(conf, ctx, ctx.ai_cache_fingerprint)
94107

95108
local red
96109
red, err = redis_util.new(conf)
@@ -180,7 +193,7 @@ end
180193

181194

182195
function _M.log(conf, ctx)
183-
if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_key then
196+
if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_fingerprint then
184197
return
185198
end
186199
if ngx.status ~= 200 then
@@ -192,7 +205,8 @@ function _M.log(conf, ctx)
192205
end
193206
local response_body = concat(buf, "", 1, buf.n)
194207

195-
local ok, err = ngx.timer.at(0, write_to_cache, conf, ctx.ai_cache_key, response_body)
208+
local cache_key = key_mod.build(conf, ctx, ctx.ai_cache_fingerprint)
209+
local ok, err = ngx.timer.at(0, write_to_cache, conf, cache_key, response_body)
196210
if not ok then
197211
core.log.warn("ai-cache: failed to schedule cache write: ", err)
198212
end

apisix/plugins/ai-cache/key.lua

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@ local ipairs = ipairs
2424
local pairs = pairs
2525
local concat = table.concat
2626

27+
local KEY_PREFIX = "ai-cache:l1:"
28+
2729
local _M = {}
2830

2931

@@ -52,10 +54,13 @@ function _M.fingerprint(ctx, body)
5254
end
5355

5456

55-
function _M.scope(conf, ctx)
57+
local function scope(conf, ctx)
5658
local ck = conf.cache_key or {}
5759

5860
local parts = {}
61+
if ctx.picked_ai_instance_name then
62+
parts[#parts + 1] = "instance=" .. ctx.picked_ai_instance_name
63+
end
5964
if not ck.share_across_routes then
6065
parts[#parts + 1] = "route=" .. (ctx.var.route_id or "")
6166
end
@@ -75,4 +80,9 @@ function _M.scope(conf, ctx)
7580
end
7681

7782

83+
function _M.build(conf, ctx, fingerprint)
84+
return KEY_PREFIX .. scope(conf, ctx) .. ":" .. fingerprint
85+
end
86+
87+
7888
return _M

apisix/plugins/ai-cache/schema.lua

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
local core = require("apisix.core")
1919
local redis_schema = require("apisix.utils.redis-schema")
20+
local binding = require("apisix.plugins.ai-protocols.binding")
2021

2122
local policy_to_additional_properties = core.table.deepcopy(redis_schema.schema)
2223

@@ -53,6 +54,8 @@ local _M = {
5354
type = "boolean", default = true,
5455
},
5556

57+
fail_mode = binding.schema_property("skip"),
58+
5659
bypass_on = {
5760
type = "array",
5861
minItems = 1,

docs/en/latest/plugins/ai-cache.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ The `ai-cache` Plugin must be used with the [`ai-proxy`](./ai-proxy.md) or [`ai-
4848

4949
By default the cache is isolated per route, so two routes never serve each other's entries even when they see the same protocol, model and messages. Set `cache_key.share_across_routes` to `true` to share one cache space across routes.
5050

51-
The cache key uses the **requested** model, not the model a route may rewrite to server-side (`ai-proxy` `options.model` or `ai-proxy-multi` instance selection). When sharing across routes, isolate routes that rewrite to different upstream models with separate Redis instances or with `cache_key.include_vars`.
51+
Even with `cache_key.share_across_routes` enabled, responses from different upstream models or providers are kept in separate cache entries, so one model's response is never served for another.
5252

5353
:::
5454

@@ -62,6 +62,7 @@ The cache key uses the **requested** model, not the model a route may rewrite to
6262
| cache_key.include_vars | array[string] | False | [] | | NGINX variables added to the cache scope (for example `["http_x_tenant"]`), isolating entries by their values. |
6363
| max_cache_body_size | integer | False | 1048576 | >= 0 | Maximum response body size, in bytes, to cache. Larger responses are not cached. |
6464
| cache_headers | boolean | False | true | | If true, add the `X-AI-Cache-Status` response header (and `X-AI-Cache-Age`, the entry age in seconds, on a hit). |
65+
| fail_mode | string | False | `"skip"` | `skip`, `warn`, `error` | Behavior when the request is not a recognized AI request that this Plugin can cache (for example, a request that did not pass through `ai-proxy` or `ai-proxy-multi`). `skip`: let the request pass through uncached; `warn`: pass through uncached and log a warning; `error`: reject the request. |
6566
| bypass_on | array[object] | False | | | Rules that skip the cache entirely (no lookup, no write-back) when any rule matches. |
6667
| bypass_on[].header | string | True | | | Request header name to match. |
6768
| bypass_on[].equals | string | True | | | Bypass when the request header's value exactly equals this string. |

docs/zh/latest/plugins/ai-cache.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ import TabItem from '@theme/TabItem';
4848

4949
默认情况下缓存按路由隔离,因此即使两个路由看到相同的协议、模型与消息,也不会相互返回对方的缓存条目。将 `cache_key.share_across_routes` 设为 `true` 可让多个路由共享同一个缓存空间。
5050

51-
缓存键使用**请求中**的模型,而非路由在服务端改写后的模型(`ai-proxy``options.model``ai-proxy-multi` 的实例选择)。在跨路由共享时,如果不同路由改写到不同的上游模型,请使用独立的 Redis 实例,或通过 `cache_key.include_vars` 将它们隔离
51+
即使开启 `cache_key.share_across_routes`,来自不同上游模型或 provider 的响应也会分别存储在各自的缓存条目中,因此某个模型的响应绝不会被返回给另一个模型
5252

5353
:::
5454

@@ -62,6 +62,7 @@ import TabItem from '@theme/TabItem';
6262
| cache_key.include_vars | array[string] || [] | | 加入缓存作用域的 NGINX 变量(例如 `["http_x_tenant"]`),按其取值隔离缓存条目。 |
6363
| max_cache_body_size | integer || 1048576 | >= 0 | 允许缓存的最大响应体大小,单位为字节。超过该大小的响应不会被缓存。 |
6464
| cache_headers | boolean || true | | 如果为 true,则添加 `X-AI-Cache-Status` 响应头(命中时还会添加 `X-AI-Cache-Age`,表示缓存条目的存在时长,单位为秒)。 |
65+
| fail_mode | string || `"skip"` | `skip``warn``error` | 当请求不是该插件可缓存的 AI 请求时的处理行为(例如未经过 `ai-proxy``ai-proxy-multi` 的请求)。`skip`:放行请求且不缓存;`warn`:放行不缓存并记录 warning 日志;`error`:拒绝请求。 |
6566
| bypass_on | array[object] || | | 当任一规则匹配时,完全跳过缓存(不查询、不回写)的规则列表。 |
6667
| bypass_on[].header | string || | | 要匹配的请求头名称。 |
6768
| bypass_on[].equals | string || | | 当该请求头的值与此字符串完全相等时,绕过缓存。 |

t/plugin/ai-cache.t

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,3 +949,182 @@ POST /cache-route-b
949949
--- error_code: 200
950950
--- response_headers
951951
X-AI-Cache-Status: HIT
952+
953+
954+
955+
=== TEST 42: route with ai-cache but NO ai-proxy in front
956+
--- config
957+
location /t {
958+
content_by_lua_block {
959+
local t = require("lib.test_admin").test
960+
local code, body = t('/apisix/admin/routes/1',
961+
ngx.HTTP_PUT,
962+
[[{
963+
"uri": "/v1/chat/completions",
964+
"upstream": {
965+
"type": "roundrobin",
966+
"nodes": { "127.0.0.1:1980": 1 }
967+
},
968+
"plugins": {
969+
"ai-cache": {
970+
"redis_host": "127.0.0.1",
971+
"redis_port": 6379
972+
}
973+
}
974+
}]]
975+
)
976+
if code >= 300 then
977+
ngx.status = code
978+
end
979+
ngx.say(body)
980+
}
981+
}
982+
--- response_body
983+
passed
984+
985+
986+
987+
=== TEST 43: a request that never passed through ai-proxy is bypassed, not cached
988+
--- request
989+
POST /v1/chat/completions
990+
{"model":"gpt-4o","messages":[{"role":"user","content":"no ai-proxy guard test"}]}
991+
--- more_headers
992+
X-AI-Fixture: openai/chat-basic.json
993+
--- error_code: 200
994+
--- response_headers
995+
X-AI-Cache-Status: BYPASS
996+
997+
998+
999+
=== TEST 44: route with ai-cache fail_mode=error and NO ai-proxy
1000+
--- config
1001+
location /t {
1002+
content_by_lua_block {
1003+
local t = require("lib.test_admin").test
1004+
local code, body = t('/apisix/admin/routes/1',
1005+
ngx.HTTP_PUT,
1006+
[[{
1007+
"uri": "/v1/chat/completions",
1008+
"upstream": {
1009+
"type": "roundrobin",
1010+
"nodes": { "127.0.0.1:1980": 1 }
1011+
},
1012+
"plugins": {
1013+
"ai-cache": {
1014+
"redis_host": "127.0.0.1",
1015+
"redis_port": 6379,
1016+
"fail_mode": "error"
1017+
}
1018+
}
1019+
}]]
1020+
)
1021+
if code >= 300 then
1022+
ngx.status = code
1023+
end
1024+
ngx.say(body)
1025+
}
1026+
}
1027+
--- response_body
1028+
passed
1029+
1030+
1031+
1032+
=== TEST 45: fail_mode=error rejects a request that bypassed the AI proxy
1033+
--- request
1034+
POST /v1/chat/completions
1035+
{"model":"gpt-4o","messages":[{"role":"user","content":"fail_mode error guard test"}]}
1036+
--- more_headers
1037+
X-AI-Fixture: openai/chat-basic.json
1038+
--- error_code: 500
1039+
--- response_body_like eval
1040+
qr/must be used with the ai-proxy/
1041+
1042+
1043+
1044+
=== TEST 46: flush redis, then set one ai-proxy-multi route with two instances
1045+
--- extra_yaml_config
1046+
plugins:
1047+
- ai-proxy-multi
1048+
- ai-cache
1049+
--- config
1050+
location /t {
1051+
content_by_lua_block {
1052+
local redis = require("resty.redis")
1053+
local red = redis:new()
1054+
red:set_timeout(1000)
1055+
local ok, rerr = red:connect("127.0.0.1", 6379)
1056+
if not ok then
1057+
ngx.say("redis connect failed: ", rerr)
1058+
return
1059+
end
1060+
local fok, ferr = red:flushall()
1061+
if not fok then
1062+
ngx.say("redis flushall failed: ", ferr)
1063+
return
1064+
end
1065+
1066+
local t = require("lib.test_admin").test
1067+
local code, body = t('/apisix/admin/routes/1',
1068+
ngx.HTTP_PUT,
1069+
[[{
1070+
"uri": "/multi",
1071+
"plugins": {
1072+
"ai-proxy-multi": {
1073+
"instances": [
1074+
{
1075+
"name": "instance-gpt4o",
1076+
"provider": "openai",
1077+
"weight": 1,
1078+
"auth": { "header": { "Authorization": "Bearer test-key" } },
1079+
"options": { "model": "gpt-4o" },
1080+
"override": { "endpoint": "http://127.0.0.1:1980" }
1081+
},
1082+
{
1083+
"name": "instance-gpt4o-mini",
1084+
"provider": "openai",
1085+
"weight": 1,
1086+
"auth": { "header": { "Authorization": "Bearer test-key" } },
1087+
"options": { "model": "gpt-4o-mini" },
1088+
"override": { "endpoint": "http://127.0.0.1:1980" }
1089+
}
1090+
]
1091+
},
1092+
"ai-cache": {
1093+
"redis_host": "127.0.0.1",
1094+
"redis_port": 6379
1095+
}
1096+
}
1097+
}]]
1098+
)
1099+
if code >= 300 then
1100+
ngx.status = code
1101+
end
1102+
ngx.say(body)
1103+
}
1104+
}
1105+
--- response_body
1106+
passed
1107+
1108+
1109+
1110+
=== TEST 47: round-robin alternates instances, so each one caches independently
1111+
--- extra_yaml_config
1112+
plugins:
1113+
- ai-proxy-multi
1114+
- ai-cache
1115+
--- pipelined_requests eval
1116+
[
1117+
"POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}',
1118+
"POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}',
1119+
"POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}',
1120+
"POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}',
1121+
]
1122+
--- more_headers
1123+
X-AI-Fixture: openai/chat-basic.json
1124+
--- response_headers eval
1125+
[
1126+
"X-AI-Cache-Status: MISS",
1127+
"X-AI-Cache-Status: MISS",
1128+
"X-AI-Cache-Status: HIT",
1129+
"X-AI-Cache-Status: HIT",
1130+
]

0 commit comments

Comments
 (0)