From 53d689354adeebf9bb5326a4c10702fb442f62b9 Mon Sep 17 00:00:00 2001 From: LB7666 Date: Sat, 4 Jul 2026 02:44:30 +0800 Subject: [PATCH 01/11] feat(llm-access): keyword-based session moderation gate Add a pre-upstream keyword moderation module for the Kiro and Codex gateways. When request content matches a configured keyword the session is banned in memory and this plus all subsequent requests are blocked; the full request body and (redacted) headers are captured once for admin review, and a reviewer can unban a session. Design highlights: - Phrase matching via Aho-Corasick over normalized text (lowercased, whitespace-collapsed); ASCII keywords require word boundaries while CJK phrases match freely. Only user-visible message content is scanned (system + messages[].content / instructions), not JSON structure noise. - Keywords import from plain-text (one phrase per line) or JSON. - Hot path never reads Postgres: the compiled automaton plus banned / allowlisted session-key sets live in process memory, refreshed on startup and a periodic interval. Already-banned sessions are rejected without a scan or a write; a new ban persists exactly once (JSONB body + headers) via a spawned task. - Admin API + Yew review console: manage keywords and review captured bans (inspect payload, keep or lift the ban). Storage: new AdminModerationStore trait, empty stub, Postgres impl, and migration 0036 (llm_moderation_keywords, llm_moderation_banned_sessions). Co-Authored-By: Claude Opus 4.8 --- Cargo.lock | 1 + Cargo.toml | 1 + crates/backend/src/routes.rs | 2 + crates/frontend/src/api.rs | 265 ++++++++ .../frontend/src/pages/admin_llm_gateway.rs | 1 + crates/frontend/src/pages/admin_moderation.rs | 590 ++++++++++++++++++ crates/frontend/src/pages/mod.rs | 1 + crates/frontend/src/router.rs | 10 + crates/frontend/src/seo.rs | 2 + crates/llm-access-core/Cargo.toml | 1 + crates/llm-access-core/src/lib.rs | 1 + crates/llm-access-core/src/moderation.rs | 416 ++++++++++++ crates/llm-access-core/src/store/empty.rs | 80 ++- crates/llm-access-core/src/store/mod.rs | 25 +- .../llm-access-core/src/store/moderation.rs | 156 +++++ crates/llm-access-core/src/store/traits.rs | 57 ++ .../postgres/0036_keyword_moderation.sql | 34 + crates/llm-access-migrations/src/lib.rs | 5 + crates/llm-access-store/src/postgres.rs | 1 + .../src/postgres/moderation.rs | 335 ++++++++++ crates/llm-access/src/admin.rs | 265 ++++++++ crates/llm-access/src/lib.rs | 40 +- crates/llm-access/src/moderation.rs | 450 +++++++++++++ crates/llm-access/src/provider.rs | 2 + .../provider/anthropic_upstream_dispatch.rs | 79 ++- .../llm-access/src/provider/codex_dispatch.rs | 68 +- .../llm-access/src/provider/kiro_dispatch.rs | 57 +- crates/llm-access/src/provider/state.rs | 8 + crates/llm-access/src/runtime.rs | 32 +- 29 files changed, 2956 insertions(+), 29 deletions(-) create mode 100644 crates/frontend/src/pages/admin_moderation.rs create mode 100644 crates/llm-access-core/src/moderation.rs create mode 100644 crates/llm-access-core/src/store/moderation.rs create mode 100644 crates/llm-access-migrations/migrations/postgres/0036_keyword_moderation.sql create mode 100644 crates/llm-access-store/src/postgres/moderation.rs create mode 100644 crates/llm-access/src/moderation.rs diff --git a/Cargo.lock b/Cargo.lock index 74203f3c..d65faa84 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5787,6 +5787,7 @@ dependencies = [ name = "llm-access-core" version = "0.1.0" dependencies = [ + "aho-corasick", "anyhow", "async-trait", "base64 0.22.1", diff --git a/Cargo.toml b/Cargo.toml index b6f1dd82..a2cceba8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -47,6 +47,7 @@ httparse = "1" lru = "0.16.3" once_cell = "1" ahash = "0.8.12" +aho-corasick = "1.1" # 日志 log = "0.4" diff --git a/crates/backend/src/routes.rs b/crates/backend/src/routes.rs index b9bf2105..c33e993c 100644 --- a/crates/backend/src/routes.rs +++ b/crates/backend/src/routes.rs @@ -516,8 +516,10 @@ pub fn create_router(state: AppState) -> Router { .route("/api/llm-access/*path", any(crate::llm_access_admin_proxy::proxy_public_request)) .route("/admin/llm-gateway", get(seo::seo_spa_shell)) .route("/admin/llm-gateway/monitor", get(seo::seo_spa_shell)) + .route("/admin/llm-gateway/moderation", get(seo::seo_spa_shell)) .route("/static_flow/admin/llm-gateway", get(seo::seo_spa_shell)) .route("/static_flow/admin/llm-gateway/monitor", get(seo::seo_spa_shell)) + .route("/static_flow/admin/llm-gateway/moderation", get(seo::seo_spa_shell)) .route("/admin/kiro-gateway", get(seo::seo_spa_shell)) .route("/admin/kiro-gateway/accounts", any(admin_kiro_accounts_entry)) .route("/admin/kiro-gateway/upstream-channels", get(seo::seo_spa_shell)) diff --git a/crates/frontend/src/api.rs b/crates/frontend/src/api.rs index f30e1b5f..7bd4fb78 100644 --- a/crates/frontend/src/api.rs +++ b/crates/frontend/src/api.rs @@ -10966,6 +10966,99 @@ pub struct TestAdminAnthropicUpstreamModelInput { pub model: String, } +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(default)] +pub struct ModerationKeywordView { + pub id: i64, + pub keyword: String, + pub note: Option, + pub source: String, + pub created_at_ms: i64, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(default)] +pub struct ModerationGateStatsView { + pub loaded: bool, + pub loaded_at_ms: Option, + pub keyword_count: usize, + pub banned_session_count: usize, + pub allowed_session_count: usize, + pub blocked_requests_total: u64, + pub persist_failures_total: u64, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(default)] +pub struct AdminModerationKeywordsResponse { + pub keywords: Vec, + pub total: usize, + pub stats: ModerationGateStatsView, + pub generated_at: i64, +} + +#[derive(Debug, Serialize, Clone, PartialEq, Default)] +pub struct AddAdminModerationKeywordsInput { + pub content: String, + pub format: Option, + pub note: Option, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(default)] +pub struct AddAdminModerationKeywordsResponse { + pub inserted: usize, + pub duplicates: usize, + pub parsed: usize, + pub generated_at: i64, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(default)] +pub struct ModerationBannedSessionView { + pub id: i64, + pub session_key: String, + pub provider: String, + pub key_id: String, + pub key_name: String, + pub session_id: String, + pub matched_keyword: String, + pub matched_context: String, + pub endpoint: String, + pub model: String, + pub client_ip: String, + pub status: String, + pub review_note: Option, + pub banned_at_ms: i64, + pub reviewed_at_ms: Option, + pub updated_at_ms: i64, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(default)] +pub struct ModerationBannedSessionDetailView { + pub session: ModerationBannedSessionView, + pub request_headers_json: String, + pub request_body_json: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] +#[serde(default)] +pub struct AdminModerationBannedSessionsResponse { + pub sessions: Vec, + pub total: usize, + pub limit: usize, + pub offset: usize, + pub has_more: bool, + pub generated_at: i64, +} + +#[derive(Debug, Serialize, Clone, PartialEq, Default)] +pub struct ReviewModerationBannedSessionInput { + pub banned: bool, + pub review_note: Option, +} + #[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Default)] #[serde(default)] pub struct KiroAccountView { @@ -12050,6 +12143,178 @@ pub async fn delete_admin_anthropic_upstream_channel(name: &str) -> Result<(), S } } +pub async fn fetch_admin_moderation_keywords() -> Result { + #[cfg(feature = "mock")] + { + Ok(AdminModerationKeywordsResponse::default()) + } + + #[cfg(not(feature = "mock"))] + { + let url = format!("{}/admin/llm-gateway/moderation/keywords", llm_access_admin_base()); + let response = api_get(&url) + .send() + .await + .map_err(|e| format!("Network error: {:?}", e))?; + if !response.ok() { + let text = response.text().await.unwrap_or_default(); + return Err(format!("Failed: {text}")); + } + response + .json() + .await + .map_err(|e| format!("Parse error: {:?}", e)) + } +} + +pub async fn add_admin_moderation_keywords( + input: &AddAdminModerationKeywordsInput, +) -> Result { + #[cfg(feature = "mock")] + { + let _ = input; + Ok(AddAdminModerationKeywordsResponse::default()) + } + + #[cfg(not(feature = "mock"))] + { + let url = format!("{}/admin/llm-gateway/moderation/keywords", llm_access_admin_base()); + let response = api_post(&url) + .json(input) + .map_err(|e| format!("Serialize error: {:?}", e))? + .send() + .await + .map_err(|e| format!("Network error: {:?}", e))?; + if !response.ok() { + let text = response.text().await.unwrap_or_default(); + return Err(format!("Failed: {text}")); + } + response + .json() + .await + .map_err(|e| format!("Parse error: {:?}", e)) + } +} + +pub async fn delete_admin_moderation_keyword(id: i64) -> Result<(), String> { + #[cfg(feature = "mock")] + { + let _ = id; + Ok(()) + } + + #[cfg(not(feature = "mock"))] + { + let url = format!("{}/admin/llm-gateway/moderation/keywords/{id}", llm_access_admin_base()); + let response = api_delete(&url) + .send() + .await + .map_err(|e| format!("Network error: {:?}", e))?; + if !response.ok() { + let text = response.text().await.unwrap_or_default(); + return Err(format!("Failed: {text}")); + } + Ok(()) + } +} + +pub async fn fetch_admin_moderation_banned_sessions( + status: &str, + limit: usize, + offset: usize, +) -> Result { + #[cfg(feature = "mock")] + { + let _ = (status, limit, offset); + Ok(AdminModerationBannedSessionsResponse::default()) + } + + #[cfg(not(feature = "mock"))] + { + let url = format!( + "{}/admin/llm-gateway/moderation/banned-sessions?status={}&limit={limit}&\ + offset={offset}", + llm_access_admin_base(), + urlencoding::encode(status) + ); + let response = api_get(&url) + .send() + .await + .map_err(|e| format!("Network error: {:?}", e))?; + if !response.ok() { + let text = response.text().await.unwrap_or_default(); + return Err(format!("Failed: {text}")); + } + response + .json() + .await + .map_err(|e| format!("Parse error: {:?}", e)) + } +} + +pub async fn fetch_admin_moderation_banned_session( + id: i64, +) -> Result { + #[cfg(feature = "mock")] + { + let _ = id; + Ok(ModerationBannedSessionDetailView::default()) + } + + #[cfg(not(feature = "mock"))] + { + let url = format!( + "{}/admin/llm-gateway/moderation/banned-sessions/{id}", + llm_access_admin_base() + ); + let response = api_get(&url) + .send() + .await + .map_err(|e| format!("Network error: {:?}", e))?; + if !response.ok() { + let text = response.text().await.unwrap_or_default(); + return Err(format!("Failed: {text}")); + } + response + .json() + .await + .map_err(|e| format!("Parse error: {:?}", e)) + } +} + +pub async fn review_admin_moderation_banned_session( + id: i64, + input: &ReviewModerationBannedSessionInput, +) -> Result { + #[cfg(feature = "mock")] + { + let _ = (id, input); + Ok(ModerationBannedSessionView::default()) + } + + #[cfg(not(feature = "mock"))] + { + let url = format!( + "{}/admin/llm-gateway/moderation/banned-sessions/{id}/review", + llm_access_admin_base() + ); + let response = api_post(&url) + .json(input) + .map_err(|e| format!("Serialize error: {:?}", e))? + .send() + .await + .map_err(|e| format!("Network error: {:?}", e))?; + if !response.ok() { + let text = response.text().await.unwrap_or_default(); + return Err(format!("Failed: {text}")); + } + response + .json() + .await + .map_err(|e| format!("Parse error: {:?}", e)) + } +} + pub async fn fetch_admin_kiro_accounts() -> Result { #[cfg(feature = "mock")] { diff --git a/crates/frontend/src/pages/admin_llm_gateway.rs b/crates/frontend/src/pages/admin_llm_gateway.rs index 5cd081c7..a04f85c1 100644 --- a/crates/frontend/src/pages/admin_llm_gateway.rs +++ b/crates/frontend/src/pages/admin_llm_gateway.rs @@ -6631,6 +6631,7 @@ pub fn admin_llm_gateway_page() -> Html {
to={Route::Admin} classes={classes!("btn-terminal")}>{ "Admin 首页" }> to={Route::AdminLlmGatewayMonitor} classes={classes!("btn-terminal")}>{ "监控页" }> + to={Route::AdminLlmGatewayModeration} classes={classes!("btn-terminal")}>{ "关键词审核" }> to={Route::LlmAccess} classes={classes!("btn-terminal", "btn-terminal-primary")}>{ "公共页" }>
diff --git a/crates/frontend/src/pages/admin_moderation.rs b/crates/frontend/src/pages/admin_moderation.rs new file mode 100644 index 00000000..1b14160c --- /dev/null +++ b/crates/frontend/src/pages/admin_moderation.rs @@ -0,0 +1,590 @@ +//! Keyword moderation review console. +//! +//! Two tabs: **Keywords** (import via txt/json, list, delete) and +//! **Banned sessions** (list captured bans, inspect the full request payload, +//! keep or lift a ban). Backed by the `/admin/llm-gateway/moderation/*` +//! endpoints on the cloud `llm-access` service. + +use web_sys::{HtmlInputElement, HtmlSelectElement, HtmlTextAreaElement}; +use yew::prelude::*; +use yew_router::prelude::Link; + +use crate::{ + api::{ + add_admin_moderation_keywords, delete_admin_moderation_keyword, + fetch_admin_moderation_banned_session, fetch_admin_moderation_banned_sessions, + fetch_admin_moderation_keywords, review_admin_moderation_banned_session, + AddAdminModerationKeywordsInput, AdminModerationBannedSessionsResponse, + AdminModerationKeywordsResponse, ModerationBannedSessionDetailView, + ReviewModerationBannedSessionInput, + }, + components::tab_bar::render_tab_bar, + pages::llm_access_shared::{confirm_destructive, format_timestamp_opt}, + router::Route, +}; + +const TAB_KEYWORDS: &str = "keywords"; +const TAB_SESSIONS: &str = "sessions"; + +fn provider_badge(provider: &str) -> Classes { + let base = classes!("rounded-full", "px-2", "py-1", "font-mono", "text-xs", "font-semibold"); + let color = if provider == "codex" { + classes!("bg-sky-500/10", "text-sky-700", "dark:text-sky-200") + } else { + classes!("bg-violet-500/10", "text-violet-700", "dark:text-violet-200") + }; + classes!(base, color) +} + +fn status_badge(status: &str) -> Classes { + let base = classes!("rounded-full", "px-2", "py-1", "font-mono", "text-xs"); + let color = if status == "banned" { + classes!("bg-red-500/10", "text-red-700", "dark:text-red-200") + } else { + classes!("bg-emerald-500/10", "text-emerald-700", "dark:text-emerald-200") + }; + classes!(base, color) +} + +fn pretty_json(raw: &str) -> String { + serde_json::from_str::(raw) + .ok() + .and_then(|value| serde_json::to_string_pretty(&value).ok()) + .unwrap_or_else(|| raw.to_string()) +} + +#[function_component(AdminModerationPage)] +pub fn admin_moderation_page() -> Html { + let active_tab = use_state(|| TAB_KEYWORDS.to_string()); + let error = use_state(|| None::); + let flash = use_state(|| None::); + let refresh_tick = use_state(|| 0u64); + + // Keyword tab state. + let keywords = use_state(AdminModerationKeywordsResponse::default); + let keywords_loading = use_state(|| true); + let import_content = use_state(String::new); + let import_format = use_state(|| "txt".to_string()); + let import_note = use_state(String::new); + let importing = use_state(|| false); + + // Banned session tab state. + let sessions = use_state(AdminModerationBannedSessionsResponse::default); + let sessions_loading = use_state(|| true); + let session_status = use_state(|| "banned".to_string()); + let selected_detail = use_state(|| None::); + let detail_loading = use_state(|| false); + + let notify = { + let flash = flash.clone(); + let error = error.clone(); + Callback::from(move |(message, is_error): (String, bool)| { + if is_error { + error.set(Some(message)); + flash.set(None); + } else { + flash.set(Some(message)); + error.set(None); + } + }) + }; + + let reload = { + let refresh_tick = refresh_tick.clone(); + Callback::from(move |_| refresh_tick.set((*refresh_tick).saturating_add(1))) + }; + + // Load keywords. + { + let keywords = keywords.clone(); + let keywords_loading = keywords_loading.clone(); + let error = error.clone(); + let tick = *refresh_tick; + use_effect_with(tick, move |_| { + keywords_loading.set(true); + wasm_bindgen_futures::spawn_local(async move { + match fetch_admin_moderation_keywords().await { + Ok(response) => keywords.set(response), + Err(message) => error.set(Some(message)), + } + keywords_loading.set(false); + }); + || () + }); + } + + // Load banned sessions (re-runs on tick or status filter change). + { + let sessions = sessions.clone(); + let sessions_loading = sessions_loading.clone(); + let error = error.clone(); + let status = (*session_status).clone(); + let deps = (*refresh_tick, status.clone()); + use_effect_with(deps, move |_| { + sessions_loading.set(true); + wasm_bindgen_futures::spawn_local(async move { + match fetch_admin_moderation_banned_sessions(&status, 100, 0).await { + Ok(response) => sessions.set(response), + Err(message) => error.set(Some(message)), + } + sessions_loading.set(false); + }); + || () + }); + } + + let on_tab_click = { + let active_tab = active_tab.clone(); + Callback::from(move |tab: String| active_tab.set(tab)) + }; + + let on_import = { + let import_content = import_content.clone(); + let import_format = import_format.clone(); + let import_note = import_note.clone(); + let importing = importing.clone(); + let notify = notify.clone(); + let reload = reload.clone(); + Callback::from(move |_| { + if *importing { + return; + } + let content = (*import_content).clone(); + if content.trim().is_empty() { + notify.emit(("Keyword content is empty".to_string(), true)); + return; + } + let note = (*import_note).clone(); + let input = AddAdminModerationKeywordsInput { + content, + format: Some((*import_format).clone()), + note: (!note.trim().is_empty()).then(|| note.trim().to_string()), + }; + let importing = importing.clone(); + let notify = notify.clone(); + let reload = reload.clone(); + let import_content = import_content.clone(); + importing.set(true); + wasm_bindgen_futures::spawn_local(async move { + match add_admin_moderation_keywords(&input).await { + Ok(outcome) => { + notify.emit(( + format!( + "Imported {} keyword(s), {} duplicate(s) skipped (parsed {})", + outcome.inserted, outcome.duplicates, outcome.parsed + ), + false, + )); + import_content.set(String::new()); + reload.emit(()); + }, + Err(message) => notify.emit((message, true)), + } + importing.set(false); + }); + }) + }; + + let keywords_view = { + let import_content = import_content.clone(); + let import_format = import_format.clone(); + let import_note = import_note.clone(); + let importing = importing.clone(); + let keywords = keywords.clone(); + let keywords_loading = keywords_loading.clone(); + let notify = notify.clone(); + let reload = reload.clone(); + + let on_content_input = { + let import_content = import_content.clone(); + Callback::from(move |e: InputEvent| { + let target: HtmlTextAreaElement = e.target_unchecked_into(); + import_content.set(target.value()); + }) + }; + let on_format_change = { + let import_format = import_format.clone(); + Callback::from(move |e: Event| { + let target: HtmlSelectElement = e.target_unchecked_into(); + import_format.set(target.value()); + }) + }; + let on_note_input = { + let import_note = import_note.clone(); + Callback::from(move |e: InputEvent| { + let target: HtmlInputElement = e.target_unchecked_into(); + import_note.set(target.value()); + }) + }; + + let stats = keywords.stats.clone(); + html! { +
+
+ { stat_card("Keywords", stats.keyword_count.to_string(), stats.loaded) } + { stat_card("Banned sessions", stats.banned_session_count.to_string(), stats.loaded) } + { stat_card("Allowlisted", stats.allowed_session_count.to_string(), stats.loaded) } + { stat_card("Blocked requests", stats.blocked_requests_total.to_string(), stats.loaded) } +
+ +
+

+ { "Import keywords" } +

+