Skip to content

Commit d0451ff

Browse files
authored
feat: add ai-lakera-guard plugin (#13570)
1 parent c956152 commit d0451ff

15 files changed

Lines changed: 1953 additions & 0 deletions

File tree

Makefile

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -401,6 +401,9 @@ install: runtime
401401
$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search
402402
$(ENV_INSTALL) apisix/plugins/ai-rag/vector-search/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-rag/vector-search
403403

404+
$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/ai-lakera-guard
405+
$(ENV_INSTALL) apisix/plugins/ai-lakera-guard/*.lua $(ENV_INST_LUADIR)/apisix/plugins/ai-lakera-guard
406+
404407
$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/mcp/broker
405408
$(ENV_INSTALL) -d $(ENV_INST_LUADIR)/apisix/plugins/mcp/transport
406409
$(ENV_INSTALL) apisix/plugins/mcp/*.lua $(ENV_INST_LUADIR)/apisix/plugins/mcp

apisix/cli/config.lua

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -246,6 +246,7 @@ local _M = {
246246
"ai-proxy",
247247
"ai-aws-content-moderation",
248248
"ai-aliyun-content-moderation",
249+
"ai-lakera-guard",
249250
"proxy-mirror",
250251
"graphql-proxy-cache",
251252
"proxy-rewrite",

apisix/plugins/ai-lakera-guard.lua

Lines changed: 209 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,209 @@
1+
--
2+
-- Licensed to the Apache Software Foundation (ASF) under one or more
3+
-- contributor license agreements. See the NOTICE file distributed with
4+
-- this work for additional information regarding copyright ownership.
5+
-- The ASF licenses this file to You under the Apache License, Version 2.0
6+
-- (the "License"); you may not use this file except in compliance with
7+
-- the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing, software
12+
-- distributed under the License is distributed on an "AS IS" BASIS,
13+
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
-- See the License for the specific language governing permissions and
15+
-- limitations under the License.
16+
--
17+
local core = require("apisix.core")
18+
local schema_mod = require("apisix.plugins.ai-lakera-guard.schema")
19+
local client = require("apisix.plugins.ai-lakera-guard.client")
20+
local protocols = require("apisix.plugins.ai-protocols")
21+
local binding = require("apisix.plugins.ai-protocols.binding")
22+
23+
local ipairs = ipairs
24+
local type = type
25+
local concat = table.concat
26+
27+
28+
local _M = {
29+
version = 0.1,
30+
priority = 1028,
31+
name = "ai-lakera-guard",
32+
schema = schema_mod.schema,
33+
}
34+
35+
36+
function _M.check_schema(conf)
37+
return schema_mod.check_schema(conf)
38+
end
39+
40+
41+
-- Format only the detectors that actually fired (detected = true) for the
42+
-- client-facing reveal; the raw breakdown may also carry non-detected entries,
43+
-- which belong in the log but not in the deny message.
44+
local function format_breakdown(breakdown)
45+
local parts = {}
46+
for _, entry in ipairs(breakdown or {}) do
47+
if type(entry) == "table" and entry.detected and entry.detector_type then
48+
local part = entry.detector_type
49+
if entry.result and entry.result ~= "" then
50+
part = part .. " (" .. entry.result .. ")"
51+
end
52+
core.table.insert(parts, part)
53+
end
54+
end
55+
return parts
56+
end
57+
58+
59+
local function deny_message(ctx, conf, message, breakdown)
60+
local proto = protocols.get(ctx.ai_client_protocol)
61+
if not proto then
62+
core.log.error("ai-lakera-guard: unsupported protocol: ",
63+
ctx.ai_client_protocol or "unknown")
64+
return message
65+
end
66+
local text = message
67+
if conf.reveal_failure_categories then
68+
local parts = format_breakdown(breakdown)
69+
if #parts > 0 then
70+
text = text .. ". Flagged categories: " .. concat(parts, ", ")
71+
end
72+
end
73+
local usage = ctx.llm_raw_usage
74+
or (proto.empty_usage and proto.empty_usage())
75+
or { prompt_tokens = 0, completion_tokens = 0, total_tokens = 0 }
76+
return proto.build_deny_response({
77+
text = text,
78+
model = ctx.var.request_llm_model,
79+
usage = usage,
80+
stream = ctx.var.request_type == "ai_stream",
81+
})
82+
end
83+
84+
85+
-- Normalize a protocol's canonical {role, content} messages into the shape
86+
-- Lakera /v2/guard accepts: role preserved, content coerced to a plain string.
87+
-- Some adapters (e.g. openai-chat) return body.messages verbatim, so a message's
88+
-- content can be a multimodal array or nil (tool-call turns); flatten the text
89+
-- parts and drop messages that carry no text.
90+
local function normalize_messages(messages)
91+
local out = {}
92+
for _, message in ipairs(messages or {}) do
93+
if type(message) == "table" and type(message.role) == "string" then
94+
local content = message.content
95+
local text
96+
if type(content) == "string" then
97+
text = content
98+
elseif type(content) == "table" then
99+
local parts = {}
100+
for _, part in ipairs(content) do
101+
if type(part) == "table" and part.type == "text"
102+
and type(part.text) == "string" then
103+
core.table.insert(parts, part.text)
104+
end
105+
end
106+
text = concat(parts, " ")
107+
end
108+
if text and text ~= "" then
109+
core.table.insert(out, { role = message.role, content = text })
110+
end
111+
end
112+
end
113+
return out
114+
end
115+
116+
117+
local function request_content_moderation(ctx, conf, messages)
118+
if not messages or #messages == 0 then
119+
return
120+
end
121+
122+
local result, err = client.scan(conf, messages)
123+
if err then
124+
if conf.fail_open then
125+
core.log.warn("ai-lakera-guard: ", err, "; fail_open=true, allowing request")
126+
return
127+
end
128+
core.log.error("ai-lakera-guard: ", err, "; fail_open=false, blocking request")
129+
return conf.deny_code, deny_message(ctx, conf, conf.request_failure_message)
130+
end
131+
132+
if not result.flagged then
133+
return
134+
end
135+
136+
-- Log Lakera's full per-detector verdict (every entry, detected or not) so
137+
-- both alert mode and blocked requests are auditable.
138+
core.log.warn("ai-lakera-guard: request flagged by Lakera Guard",
139+
", breakdown: ", core.json.encode(result.breakdown),
140+
", request_uuid: ", result.request_uuid or "")
141+
142+
if conf.action == "alert" then
143+
return
144+
end
145+
146+
return conf.deny_code, deny_message(ctx, conf, conf.request_failure_message, result.breakdown)
147+
end
148+
149+
150+
function _M.access(conf, ctx)
151+
if not ctx.picked_ai_instance then
152+
local handled, code, body = binding.on_unsupported(
153+
conf.fail_mode, _M.name, ctx,
154+
"no ai instance picked (request did not pass through ai-proxy/ai-proxy-multi)",
155+
500, "no ai instance picked, ai-lakera-guard plugin must be used with "
156+
.. "ai-proxy or ai-proxy-multi plugin")
157+
if handled then
158+
return code, body
159+
end
160+
return
161+
end
162+
163+
local request_tab, err = core.request.get_json_request_body_table()
164+
if not request_tab then
165+
local handled, code, body = binding.on_unsupported(
166+
conf.fail_mode, _M.name, ctx,
167+
"failed to read request body: " .. (err or "unknown error"),
168+
500, "failed to read request body: " .. (err or "unknown error"))
169+
if handled then
170+
return code, body
171+
end
172+
return
173+
end
174+
175+
local proto = protocols.get(ctx.ai_client_protocol)
176+
if not proto or not proto.get_messages then
177+
local handled, code, body = binding.on_unsupported(
178+
conf.fail_mode, _M.name, ctx,
179+
"unsupported protocol: " .. (ctx.ai_client_protocol or "unknown"),
180+
500, "unsupported protocol: " .. (ctx.ai_client_protocol or "unknown"))
181+
if handled then
182+
return code, body
183+
end
184+
return
185+
end
186+
187+
local messages = normalize_messages(proto.get_messages(request_tab))
188+
if #messages == 0 and proto.extract_request_content then
189+
-- The protocol has no role-preserving representation for this body;
190+
-- fall back to a single user message built from the flat extraction.
191+
local text = concat(proto.extract_request_content(request_tab), " ")
192+
if text ~= "" then
193+
messages = { { role = "user", content = text } }
194+
end
195+
end
196+
197+
local code, message = request_content_moderation(ctx, conf, messages)
198+
if code then
199+
if ctx.var.request_type == "ai_stream" then
200+
core.response.set_header("Content-Type", "text/event-stream")
201+
else
202+
core.response.set_header("Content-Type", "application/json")
203+
end
204+
return code, message
205+
end
206+
end
207+
208+
209+
return _M
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
--
2+
-- Licensed to the Apache Software Foundation (ASF) under one or more
3+
-- contributor license agreements. See the NOTICE file distributed with
4+
-- this work for additional information regarding copyright ownership.
5+
-- The ASF licenses this file to You under the Apache License, Version 2.0
6+
-- (the "License"); you may not use this file except in compliance with
7+
-- the License. You may obtain a copy of the License at
8+
--
9+
-- http://www.apache.org/licenses/LICENSE-2.0
10+
--
11+
-- Unless required by applicable law or agreed to in writing, software
12+
-- distributed under the License is distributed on an "AS IS" BASIS,
13+
-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14+
-- See the License for the specific language governing permissions and
15+
-- limitations under the License.
16+
--
17+
local core = require("apisix.core")
18+
local http = require("resty.http")
19+
20+
local type = type
21+
22+
local _M = {}
23+
24+
25+
-- Call Lakera Guard /v2/guard with the given messages.
26+
--
27+
-- `messages` is the role-tagged conversation in Lakera's {role, content} shape;
28+
-- it is forwarded verbatim so the system / user / assistant turns Lakera's
29+
-- message-based policy acts on are preserved, rather than being flattened into a
30+
-- single user message.
31+
--
32+
-- On success returns a result table; on the Lakera-unreachable path (timeout,
33+
-- connection error, non-2xx, decode failure) returns nil + an error string.
34+
--
35+
-- result fields:
36+
-- flagged (boolean) — Lakera's primary enforcement signal
37+
-- breakdown (array|nil) — Lakera's per-detector results, passed through
38+
-- verbatim and unfiltered (both detected and
39+
-- non-detected entries) so the full verdict can be
40+
-- logged exactly as Lakera returned it; selecting
41+
-- which detectors to surface is left to the caller
42+
-- request_uuid (string|nil) — Lakera trace id, when present
43+
function _M.scan(conf, messages)
44+
local body = {
45+
messages = messages,
46+
-- Always request the per-detector breakdown so flagged verdicts can be
47+
-- logged in full (with confidence results); the client-facing reveal is
48+
-- gated separately by reveal_failure_categories.
49+
breakdown = true,
50+
}
51+
if conf.project_id then
52+
body.project_id = conf.project_id
53+
end
54+
-- A future PII-redaction phase should set `body.payload = true` to have Lakera
55+
-- return the matched PII / profanity / regex spans. We don't request it here:
56+
-- this phase doesn't consume those spans, and they can contain sensitive text
57+
-- we shouldn't pull into the gateway unnecessarily.
58+
59+
local headers = {
60+
["Content-Type"] = "application/json",
61+
}
62+
if conf.api_key and conf.api_key ~= "" then
63+
headers["Authorization"] = "Bearer " .. conf.api_key
64+
end
65+
66+
local httpc = http.new()
67+
httpc:set_timeout(conf.timeout)
68+
69+
local res, err = httpc:request_uri(conf.lakera_endpoint, {
70+
method = "POST",
71+
body = core.json.encode(body),
72+
headers = headers,
73+
ssl_verify = conf.ssl_verify,
74+
})
75+
if not res then
76+
return nil, "failed to request Lakera Guard: " .. (err or "unknown error")
77+
end
78+
if res.status ~= 200 then
79+
return nil, "Lakera Guard returned status " .. res.status
80+
end
81+
82+
local data, decode_err = core.json.decode(res.body, { null_as_nil = true })
83+
if not data then
84+
return nil, "failed to decode Lakera Guard response: "
85+
.. (decode_err or "unknown error")
86+
end
87+
if type(data) ~= "table" then
88+
return nil, "unexpected Lakera Guard response: expected a JSON object"
89+
end
90+
91+
return {
92+
flagged = data.flagged == true,
93+
breakdown = type(data.breakdown) == "table" and data.breakdown or nil,
94+
request_uuid = type(data.metadata) == "table" and data.metadata.request_uuid or nil,
95+
}
96+
end
97+
98+
99+
return _M

0 commit comments

Comments
 (0)