Skip to content

Commit 3690598

Browse files
Roman Chernyakclaude
andcommitted
feat(discovery): surface quarantined tools in retrieve_tools (locked, name-only)
Quarantined tools — both server-level quarantine and tool-level pending/changed approvals — are intentionally absent from the BM25 search index so their untrusted descriptions cannot expose a Tool Poisoning Attack (TPA) payload to the agent. As a side effect, retrieve_tools could only answer "no such tool" for a capability a quarantined server provides, even though the correct remediation is "ask the user to approve it". Add an opt-in second pass to retrieve_tools(include_disabled=true) that enumerates quarantined tools from authoritative state and returns NAME-ONLY locked entries (description and schema withheld) with a status + remediation: - server-level quarantine -> new status `server_quarantined` - tool-level pending/changed -> existing `pending_approval` The pass is fully self-contained — it does NOT alter the shared callability or classification helpers, so visible-tool counts and other consumers are unchanged. Specifically it: - matches the query against the tool NAME only (quarantined tools aren't in the index, so they can't be BM25-ranked); short keywords (>=2 chars, e.g. "ui"/"qa") are retained; - applies the same agent-scope + profile filtering as the callable path, via a shared `serverDiscoverable` helper (de-duplicates what were three inline copies down to one for the discovery surface); - dedups against tools the index loop already handled (a `seen` set), so the brief post-quarantine reindex window can't list a tool as both callable and locked, nor double-count it in the zero-result nudge; - skips tools also denied by operator config (enabled_tools/disabled_tools), which approval cannot unlock — so the agent isn't sent down a dead-end remediation; - prepends its matches so the shared min(limit,10) cap can't truncate them in favor of index hits. Quarantined tools remain non-callable: the call path already blocks server-quarantine and pending/changed before execution (handleQuarantinedToolCall), so no change to isToolCallable is needed — discovery only makes them VISIBLE, never callable. Tests: pending tool surfaced by name with withheld description + remediation; server-level branch (tool-names source injected); query-scoped exclusion; config-denied skipped; dedup against seen; short-keyword tokens; zero-result nudge; quarantined tool still blocked at the call path; remediation present. Co-Authored-By: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent d578440 commit 3690598

3 files changed

Lines changed: 377 additions & 16 deletions

File tree

internal/contracts/types.go

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -223,16 +223,21 @@ type Tool struct {
223223
}
224224

225225
// DisabledToolStatus is the single machine-branchable reason a tool exists but
226-
// is not callable (Spec 049). Exactly one value per locked tool, assigned by
227-
// fixed first-match precedence (server-off → config → user → pending → unknown).
226+
// is not callable (Spec 049). Exactly one value per locked tool. The
227+
// classifier (classifyServerToolStatus) assigns the index-discoverable reasons
228+
// by fixed first-match precedence (server-off → config → user → pending →
229+
// unknown). DisabledStatusServerQuarantined is assigned separately by the
230+
// quarantined-tool discovery pass (quarantined tools are never in the index),
231+
// not by the classifier.
228232
type DisabledToolStatus = string
229233

230234
const (
231-
DisabledStatusServerDisabled DisabledToolStatus = "server_disabled"
232-
DisabledStatusByConfig DisabledToolStatus = "disabled_by_config"
233-
DisabledStatusByUser DisabledToolStatus = "disabled_by_user"
234-
DisabledStatusPendingApproval DisabledToolStatus = "pending_approval"
235-
DisabledStatusUnknown DisabledToolStatus = "disabled_unknown"
235+
DisabledStatusServerDisabled DisabledToolStatus = "server_disabled"
236+
DisabledStatusServerQuarantined DisabledToolStatus = "server_quarantined"
237+
DisabledStatusByConfig DisabledToolStatus = "disabled_by_config"
238+
DisabledStatusByUser DisabledToolStatus = "disabled_by_user"
239+
DisabledStatusPendingApproval DisabledToolStatus = "pending_approval"
240+
DisabledStatusUnknown DisabledToolStatus = "disabled_unknown"
236241
)
237242

238243
// LockedToolEntry is the lean discovery shape for a non-callable tool returned

internal/server/mcp.go

Lines changed: 174 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"sync"
1212
"sync/atomic"
1313
"time"
14+
"unicode"
1415

1516
"github.com/smart-mcp-proxy/mcpproxy-go/internal/auth"
1617
"github.com/smart-mcp-proxy/mcpproxy-go/internal/cache"
@@ -1160,6 +1161,17 @@ func (p *MCPProxyServer) handleRetrieveToolsWithMode(ctx context.Context, reques
11601161
args["include_disabled"] = true
11611162
}
11621163

1164+
// serverDiscoverable applies the agent-scope (Spec 049 FR-007) and profile
1165+
// (Spec 057) filters BEFORE classification so an agent never learns a tool
1166+
// exists on a server it cannot access. Shared by the callable-result loop
1167+
// and the quarantined-tool discovery pass below so the two never drift.
1168+
serverDiscoverable := func(serverName string) bool {
1169+
if enforceAgentScope && authCtx != nil && !authCtx.CanAccessServer(serverName) {
1170+
return false
1171+
}
1172+
return profileScope.Allows(serverName)
1173+
}
1174+
11631175
var callableResults []*config.SearchResult
11641176
var disabledEntries []contracts.LockedToolEntry
11651177
droppedCount := 0
@@ -1174,15 +1186,7 @@ func (p *MCPProxyServer) handleRetrieveToolsWithMode(ctx context.Context, reques
11741186
}
11751187
}
11761188

1177-
// Agent-scope filtering happens BEFORE classification (Spec 049 FR-007)
1178-
// so an agent never learns a locked tool exists on a server it cannot
1179-
// access.
1180-
if enforceAgentScope && !authCtx.CanAccessServer(serverName) {
1181-
continue
1182-
}
1183-
// Spec 057: profile filter — runs independently of agent-scope so that
1184-
// unauthenticated /mcp/p/<slug> connections (AdminContext) are still filtered.
1185-
if !profileScope.Allows(serverName) {
1189+
if !serverDiscoverable(serverName) {
11861190
continue
11871191
}
11881192

@@ -1203,6 +1207,34 @@ func (p *MCPProxyServer) handleRetrieveToolsWithMode(ctx context.Context, reques
12031207
}
12041208
results = callableResults
12051209

1210+
// Quarantined tools are deliberately absent from the search index: their
1211+
// untrusted descriptions/schemas are withheld so a Tool Poisoning Attack
1212+
// payload is never exposed to the agent. The loop above therefore can never
1213+
// surface them, and an agent searching for a capability a quarantined server
1214+
// provides would be told "no such tool". Enumerate those tools from
1215+
// authoritative state (server-level quarantine + tool-level pending/changed
1216+
// approvals) and prepend name-only locked entries (no description/schema) so
1217+
// the agent learns the capability EXISTS but needs the user's approval.
1218+
//
1219+
// `seen` dedupes against tools the loop already handled — primarily for the
1220+
// brief window after a runtime quarantine toggle when a tool may still
1221+
// linger in the index — so a tool is never both a callable result and a
1222+
// locked entry, and droppedCount isn't double-counted. Matches are PREPENDED
1223+
// so the shared min(limit,10) cap below can't truncate them away in favor of
1224+
// index hits. The count feeds the zero-result nudge even without the flag.
1225+
seen := make(map[string]bool, len(callableResults)+len(disabledEntries))
1226+
for _, r := range callableResults {
1227+
seen[r.Tool.Name] = true
1228+
}
1229+
for _, e := range disabledEntries {
1230+
seen[e.Name] = true
1231+
}
1232+
quarantinedMatches := p.collectQuarantinedToolMatches(query, serverDiscoverable, seen, p.serverToolNames)
1233+
droppedCount += len(quarantinedMatches)
1234+
if includeDisabled {
1235+
disabledEntries = append(quarantinedMatches, disabledEntries...)
1236+
}
1237+
12061238
// Spec 035 F4: Resolve annotations for each result and apply annotation-based filtering
12071239
// before building the MCP tool response. This allows agents to self-restrict discovery.
12081240
annotationFilterActive := readOnlyOnly || excludeDestructive || excludeOpenWorld
@@ -5205,6 +5237,9 @@ func disabledToolRemediation(status contracts.DisabledToolStatus) string {
52055237
switch status {
52065238
case contracts.DisabledStatusServerDisabled:
52075239
return "Its server is disabled. Ask the user to enable the server first."
5240+
case contracts.DisabledStatusServerQuarantined:
5241+
return "Its server is quarantined for security review. Its tools cannot be called " +
5242+
"until the user reviews and approves the server in the mcpproxy UI or system tray."
52085243
case contracts.DisabledStatusByConfig:
52095244
return "Locked by operator policy in mcp_config.json (enabled_tools/disabled_tools). " +
52105245
"The user cannot enable this from the UI; ask the operator to change the server config."
@@ -5218,6 +5253,136 @@ func disabledToolRemediation(status contracts.DisabledToolStatus) string {
52185253
}
52195254
}
52205255

5256+
// maxQuarantinedMatches bounds how many quarantined tools the discovery
5257+
// second-pass collects before stopping. The response itself is capped lower
5258+
// (min(limit,10)); this just bounds work on the opt-in path.
5259+
const maxQuarantinedMatches = 50
5260+
5261+
// collectQuarantinedToolMatches finds tools that exist but are quarantined and
5262+
// whose name matches the query, returning lean locked entries (no description
5263+
// or schema — those are withheld because a quarantined tool's description is a
5264+
// potential Tool Poisoning Attack payload). It covers both quarantine layers:
5265+
// - server-level quarantine: every tool on a Quarantined server is locked
5266+
// (status server_quarantined);
5267+
// - tool-level quarantine: pending/changed approval records on a trusted
5268+
// server (status pending_approval).
5269+
//
5270+
// Quarantined tools are not in the search index, so this is the only path that
5271+
// can surface them to discovery. allowServer mirrors the agent-scope/profile
5272+
// filtering applied to the index results so an agent never learns a tool
5273+
// exists on a server it cannot access.
5274+
//
5275+
// Inputs:
5276+
// - allowServer / seen: the discovery scope filter and the set of "server:tool"
5277+
// keys the index loop already handled (callable or locked), so a tool is
5278+
// never surfaced twice.
5279+
// - toolNamesFor: resolves a server's live tool names (injected for testing;
5280+
// production passes p.serverToolNames).
5281+
//
5282+
// A tool that is also denied by operator config (enabled_tools/disabled_tools)
5283+
// is skipped rather than advertised as merely "pending approval": approving it
5284+
// in the UI would not make it callable, so surfacing it would send the agent
5285+
// down a remediation that can never succeed.
5286+
func (p *MCPProxyServer) collectQuarantinedToolMatches(query string, allowServer func(string) bool, seen map[string]bool, toolNamesFor func(string) []string) []contracts.LockedToolEntry {
5287+
tokens := queryTokens(query)
5288+
if len(tokens) == 0 {
5289+
return nil
5290+
}
5291+
5292+
servers, err := p.storage.ListUpstreams()
5293+
if err != nil {
5294+
p.logger.Debug("collectQuarantinedToolMatches: failed to list upstreams", zap.Error(err))
5295+
return nil
5296+
}
5297+
5298+
var matches []contracts.LockedToolEntry
5299+
// add records a quarantined tool; returns false once the work cap is hit.
5300+
add := func(sc *config.ServerConfig, toolName string, status contracts.DisabledToolStatus) bool {
5301+
key := sc.Name + ":" + toolName
5302+
if seen[key] || !toolNameMatchesQuery(toolName, tokens) {
5303+
return true
5304+
}
5305+
if p.isToolConfigDenied(sc.Name, toolName, sc) {
5306+
return true // operator policy, not quarantine — don't advertise as approvable
5307+
}
5308+
seen[key] = true
5309+
matches = append(matches, contracts.LockedToolEntry{Name: key, Server: sc.Name, Status: status})
5310+
return len(matches) < maxQuarantinedMatches
5311+
}
5312+
5313+
for _, sc := range servers {
5314+
if sc == nil || !sc.Enabled || !allowServer(sc.Name) {
5315+
continue
5316+
}
5317+
5318+
if sc.Quarantined {
5319+
for _, toolName := range toolNamesFor(sc.Name) {
5320+
if !add(sc, toolName, contracts.DisabledStatusServerQuarantined) {
5321+
return matches
5322+
}
5323+
}
5324+
continue
5325+
}
5326+
5327+
approvals, aerr := p.storage.ListToolApprovals(sc.Name)
5328+
if aerr != nil {
5329+
continue
5330+
}
5331+
for _, a := range approvals {
5332+
if a == nil || a.Disabled {
5333+
continue
5334+
}
5335+
if a.Status != storage.ToolApprovalStatusPending && a.Status != storage.ToolApprovalStatusChanged {
5336+
continue
5337+
}
5338+
if !add(sc, a.ToolName, contracts.DisabledStatusPendingApproval) {
5339+
return matches
5340+
}
5341+
}
5342+
}
5343+
return matches
5344+
}
5345+
5346+
// queryTokens lowercases the query and splits it into alphanumeric tokens, used
5347+
// for a lightweight name match against quarantined tools (which are not in the
5348+
// BM25 index and so can't be ranked normally). Tokens of length >= 2 are kept so
5349+
// short-but-meaningful capability keywords ("ci", "ui", "qa", "db", "os") still
5350+
// match; if the query has only single-character fields they are kept as a
5351+
// fallback rather than returning nothing.
5352+
func queryTokens(query string) []string {
5353+
fields := strings.FieldsFunc(strings.ToLower(query), func(r rune) bool {
5354+
return !unicode.IsLetter(r) && !unicode.IsDigit(r)
5355+
})
5356+
tokens := make([]string, 0, len(fields))
5357+
for _, f := range fields {
5358+
if len(f) >= 2 {
5359+
tokens = append(tokens, f)
5360+
}
5361+
}
5362+
if len(tokens) == 0 {
5363+
for _, f := range fields {
5364+
if f != "" {
5365+
tokens = append(tokens, f)
5366+
}
5367+
}
5368+
}
5369+
return tokens
5370+
}
5371+
5372+
// toolNameMatchesQuery reports whether a quarantined tool's name is relevant to
5373+
// the query: any query token appears in the lowercased tool name. Tool names
5374+
// usually carry the capability keywords (e.g. "emulator_build_web"), so this is
5375+
// enough to surface "this exists but is quarantined" without the description.
5376+
func toolNameMatchesQuery(toolName string, tokens []string) bool {
5377+
lower := strings.ToLower(toolName)
5378+
for _, tok := range tokens {
5379+
if strings.Contains(lower, tok) {
5380+
return true
5381+
}
5382+
}
5383+
return false
5384+
}
5385+
52215386
func (p *MCPProxyServer) lookupToolAnnotations(serverName, toolName string) *config.ToolAnnotations {
52225387
if p.mainServer == nil || p.mainServer.runtime == nil {
52235388
return nil

0 commit comments

Comments
 (0)