feat(ai-cache): enhance error handling for unsupported requests and improve cache key generation

janiussyafiq · janiussyafiq · commit 4775bfc1fd82 · 2026-06-24T15:53:11.000+08:00
diff --git a/apisix/plugins/ai-cache.lua b/apisix/plugins/ai-cache.lua
@@ -18,6 +18,7 @@
 local core       = require("apisix.core")
 local schema     = require("apisix.plugins.ai-cache.schema")
 local key_mod    = require("apisix.plugins.ai-cache.key")
+local binding    = require("apisix.plugins.ai-protocols.binding")
 local redis_util = require("apisix.utils.redis")
 
 local ngx        = ngx
@@ -65,6 +66,18 @@ end
 
 
 function _M.access(conf, ctx)
+    if not ctx.picked_ai_instance then
+        local handled, code, body = binding.on_unsupported(
+            conf.fail_mode, _M.name, ctx,
+            "no ai instance picked (request did not pass through ai-proxy/ai-proxy-multi)",
+            500, "ai-cache must be used with the ai-proxy or ai-proxy-multi plugin")
+        if handled then
+            return code, body
+        end
+        ctx.ai_cache_status = "BYPASS"
+        return
+    end
+
     -- Streaming responses are not cached in PR-1 (SSE replay is a later
     -- increment). ai-proxy (higher priority) has already classified the
     -- request, so bypass before doing any work.
@@ -89,8 +102,8 @@ function _M.access(conf, ctx)
         return
     end
 
-    ctx.ai_cache_key = "ai-cache:l1:" .. key_mod.scope(conf, ctx)
-                       .. ":" .. key_mod.fingerprint(ctx, body)
+    ctx.ai_cache_fingerprint = key_mod.fingerprint(ctx, body)
+    ctx.ai_cache_key = key_mod.build(conf, ctx, ctx.ai_cache_fingerprint)
 
     local red
     red, err = redis_util.new(conf)
@@ -180,7 +193,7 @@ end
 
 
 function _M.log(conf, ctx)
-    if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_key then
+    if ctx.ai_cache_status ~= "MISS" or not ctx.ai_cache_fingerprint then
         return
     end
     if ngx.status ~= 200 then
@@ -192,7 +205,8 @@ function _M.log(conf, ctx)
     end
     local response_body = concat(buf, "", 1, buf.n)
 
-    local ok, err = ngx.timer.at(0, write_to_cache, conf, ctx.ai_cache_key, response_body)
+    local cache_key = key_mod.build(conf, ctx, ctx.ai_cache_fingerprint)
+    local ok, err = ngx.timer.at(0, write_to_cache, conf, cache_key, response_body)
     if not ok then
         core.log.warn("ai-cache: failed to schedule cache write: ", err)
     end
diff --git a/apisix/plugins/ai-cache/key.lua b/apisix/plugins/ai-cache/key.lua
@@ -24,6 +24,8 @@ local ipairs = ipairs
 local pairs  = pairs
 local concat = table.concat
 
+local KEY_PREFIX = "ai-cache:l1:"
+
 local _M = {}
 
 
@@ -52,10 +54,13 @@ function _M.fingerprint(ctx, body)
 end
 
 
-function _M.scope(conf, ctx)
+local function scope(conf, ctx)
     local ck = conf.cache_key or {}
 
     local parts = {}
+    if ctx.picked_ai_instance_name then
+        parts[#parts + 1] = "instance=" .. ctx.picked_ai_instance_name
+    end
     if not ck.share_across_routes then
         parts[#parts + 1] = "route=" .. (ctx.var.route_id or "")
     end
@@ -75,4 +80,9 @@ function _M.scope(conf, ctx)
 end
 
 
+function _M.build(conf, ctx, fingerprint)
+    return KEY_PREFIX .. scope(conf, ctx) .. ":" .. fingerprint
+end
+
+
 return _M
diff --git a/apisix/plugins/ai-cache/schema.lua b/apisix/plugins/ai-cache/schema.lua
@@ -17,6 +17,7 @@
 
 local core         = require("apisix.core")
 local redis_schema = require("apisix.utils.redis-schema")
+local binding      = require("apisix.plugins.ai-protocols.binding")
 
 local policy_to_additional_properties = core.table.deepcopy(redis_schema.schema)
 
@@ -53,6 +54,8 @@ local _M = {
             type = "boolean", default = true,
         },
 
+        fail_mode = binding.schema_property("skip"),
+
         bypass_on = {
             type = "array",
             minItems = 1,
diff --git a/docs/en/latest/plugins/ai-cache.md b/docs/en/latest/plugins/ai-cache.md
@@ -48,7 +48,7 @@ The `ai-cache` Plugin must be used with the [`ai-proxy`](./ai-proxy.md) or [`ai-
 
 By default the cache is isolated per route, so two routes never serve each other's entries even when they see the same protocol, model and messages. Set `cache_key.share_across_routes` to `true` to share one cache space across routes.
 
-The cache key uses the **requested** model, not the model a route may rewrite to server-side (`ai-proxy` `options.model` or `ai-proxy-multi` instance selection). When sharing across routes, isolate routes that rewrite to different upstream models with separate Redis instances or with `cache_key.include_vars`.
+Even with `cache_key.share_across_routes` enabled, responses from different upstream models or providers are kept in separate cache entries, so one model's response is never served for another.
 
 :::
 
@@ -62,6 +62,7 @@ The cache key uses the **requested** model, not the model a route may rewrite to
 | cache_key.include_vars | array[string] | False | [] | | NGINX variables added to the cache scope (for example `["http_x_tenant"]`), isolating entries by their values. |
 | max_cache_body_size | integer | False | 1048576 | >= 0 | Maximum response body size, in bytes, to cache. Larger responses are not cached. |
 | cache_headers | boolean | False | true | | If true, add the `X-AI-Cache-Status` response header (and `X-AI-Cache-Age`, the entry age in seconds, on a hit). |
+| fail_mode | string | False | `"skip"` | `skip`, `warn`, `error` | Behavior when the request is not a recognized AI request that this Plugin can cache (for example, a request that did not pass through `ai-proxy` or `ai-proxy-multi`). `skip`: let the request pass through uncached; `warn`: pass through uncached and log a warning; `error`: reject the request. |
 | bypass_on | array[object] | False | | | Rules that skip the cache entirely (no lookup, no write-back) when any rule matches. |
 | bypass_on[].header | string | True | | | Request header name to match. |
 | bypass_on[].equals | string | True | | | Bypass when the request header's value exactly equals this string. |
diff --git a/docs/zh/latest/plugins/ai-cache.md b/docs/zh/latest/plugins/ai-cache.md
@@ -48,7 +48,7 @@ import TabItem from '@theme/TabItem';
 
 默认情况下缓存按路由隔离，因此即使两个路由看到相同的协议、模型与消息，也不会相互返回对方的缓存条目。将 `cache_key.share_across_routes` 设为 `true` 可让多个路由共享同一个缓存空间。
 
-缓存键使用**请求中**的模型，而非路由在服务端改写后的模型（`ai-proxy` 的 `options.model` 或 `ai-proxy-multi` 的实例选择）。在跨路由共享时，如果不同路由改写到不同的上游模型，请使用独立的 Redis 实例，或通过 `cache_key.include_vars` 将它们隔离。
+即使开启 `cache_key.share_across_routes`，来自不同上游模型或 provider 的响应也会分别存储在各自的缓存条目中，因此某个模型的响应绝不会被返回给另一个模型。
 
 :::
 
@@ -62,6 +62,7 @@ import TabItem from '@theme/TabItem';
 | cache_key.include_vars | array[string] | 否 | [] | | 加入缓存作用域的 NGINX 变量（例如 `["http_x_tenant"]`），按其取值隔离缓存条目。 |
 | max_cache_body_size | integer | 否 | 1048576 | >= 0 | 允许缓存的最大响应体大小，单位为字节。超过该大小的响应不会被缓存。 |
 | cache_headers | boolean | 否 | true | | 如果为 true，则添加 `X-AI-Cache-Status` 响应头（命中时还会添加 `X-AI-Cache-Age`，表示缓存条目的存在时长，单位为秒）。 |
+| fail_mode | string | 否 | `"skip"` | `skip`、`warn`、`error` | 当请求不是该插件可缓存的 AI 请求时的处理行为（例如未经过 `ai-proxy` 或 `ai-proxy-multi` 的请求）。`skip`：放行请求且不缓存；`warn`：放行不缓存并记录 warning 日志；`error`：拒绝请求。 |
 | bypass_on | array[object] | 否 | | | 当任一规则匹配时，完全跳过缓存（不查询、不回写）的规则列表。 |
 | bypass_on[].header | string | 是 | | | 要匹配的请求头名称。 |
 | bypass_on[].equals | string | 是 | | | 当该请求头的值与此字符串完全相等时，绕过缓存。 |
diff --git a/t/plugin/ai-cache.t b/t/plugin/ai-cache.t
@@ -949,3 +949,182 @@ POST /cache-route-b
 --- error_code: 200
 --- response_headers
 X-AI-Cache-Status: HIT
+
+
+
+=== TEST 42: route with ai-cache but NO ai-proxy in front
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/v1/chat/completions",
+                    "upstream": {
+                        "type": "roundrobin",
+                        "nodes": { "127.0.0.1:1980": 1 }
+                    },
+                    "plugins": {
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 43: a request that never passed through ai-proxy is bypassed, not cached
+--- request
+POST /v1/chat/completions
+{"model":"gpt-4o","messages":[{"role":"user","content":"no ai-proxy guard test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 200
+--- response_headers
+X-AI-Cache-Status: BYPASS
+
+
+
+=== TEST 44: route with ai-cache fail_mode=error and NO ai-proxy
+--- config
+    location /t {
+        content_by_lua_block {
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/v1/chat/completions",
+                    "upstream": {
+                        "type": "roundrobin",
+                        "nodes": { "127.0.0.1:1980": 1 }
+                    },
+                    "plugins": {
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379,
+                            "fail_mode": "error"
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 45: fail_mode=error rejects a request that bypassed the AI proxy
+--- request
+POST /v1/chat/completions
+{"model":"gpt-4o","messages":[{"role":"user","content":"fail_mode error guard test"}]}
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- error_code: 500
+--- response_body_like eval
+qr/must be used with the ai-proxy/
+
+
+
+=== TEST 46: flush redis, then set one ai-proxy-multi route with two instances
+--- extra_yaml_config
+plugins:
+  - ai-proxy-multi
+  - ai-cache
+--- config
+    location /t {
+        content_by_lua_block {
+            local redis = require("resty.redis")
+            local red = redis:new()
+            red:set_timeout(1000)
+            local ok, rerr = red:connect("127.0.0.1", 6379)
+            if not ok then
+                ngx.say("redis connect failed: ", rerr)
+                return
+            end
+            local fok, ferr = red:flushall()
+            if not fok then
+                ngx.say("redis flushall failed: ", ferr)
+                return
+            end
+
+            local t = require("lib.test_admin").test
+            local code, body = t('/apisix/admin/routes/1',
+                ngx.HTTP_PUT,
+                [[{
+                    "uri": "/multi",
+                    "plugins": {
+                        "ai-proxy-multi": {
+                            "instances": [
+                                {
+                                    "name": "instance-gpt4o",
+                                    "provider": "openai",
+                                    "weight": 1,
+                                    "auth": { "header": { "Authorization": "Bearer test-key" } },
+                                    "options": { "model": "gpt-4o" },
+                                    "override": { "endpoint": "http://127.0.0.1:1980" }
+                                },
+                                {
+                                    "name": "instance-gpt4o-mini",
+                                    "provider": "openai",
+                                    "weight": 1,
+                                    "auth": { "header": { "Authorization": "Bearer test-key" } },
+                                    "options": { "model": "gpt-4o-mini" },
+                                    "override": { "endpoint": "http://127.0.0.1:1980" }
+                                }
+                            ]
+                        },
+                        "ai-cache": {
+                            "redis_host": "127.0.0.1",
+                            "redis_port": 6379
+                        }
+                    }
+                }]]
+            )
+            if code >= 300 then
+                ngx.status = code
+            end
+            ngx.say(body)
+        }
+    }
+--- response_body
+passed
+
+
+
+=== TEST 47: round-robin alternates instances, so each one caches independently
+--- extra_yaml_config
+plugins:
+  - ai-proxy-multi
+  - ai-cache
+--- pipelined_requests eval
+[
+    "POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}',
+    "POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}',
+    "POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}',
+    "POST /multi\n" . '{"model":"gpt-4o","messages":[{"role":"user","content":"multi-instance isolation"}]}',
+]
+--- more_headers
+X-AI-Fixture: openai/chat-basic.json
+--- response_headers eval
+[
+    "X-AI-Cache-Status: MISS",
+    "X-AI-Cache-Status: MISS",
+    "X-AI-Cache-Status: HIT",
+    "X-AI-Cache-Status: HIT",
+]