Skip to content

Commit 9702260

Browse files
electrolobzikRoman ChernyakclaudeDumbris
authored
feat(discovery): surface quarantined tools in retrieve_tools (locked, name-only) (#778)
Quarantined tools — both server-level quarantine and tool-level pending/changed approvals — are intentionally absent from the BM25 search index so their untrusted descriptions cannot expose a Tool Poisoning Attack (TPA) payload to the agent. As a side effect, retrieve_tools could only answer "no such tool" for a capability a quarantined server provides, even though the correct remediation is "ask the user to approve it". Add an opt-in second pass to retrieve_tools(include_disabled=true) that enumerates quarantined tools from authoritative state and returns NAME-ONLY locked entries (description and schema withheld) with a status + remediation: - server-level quarantine -> new status `server_quarantined` - tool-level pending/changed -> existing `pending_approval` The pass is fully self-contained — it does NOT alter the shared callability or classification helpers, so visible-tool counts and other consumers are unchanged. Specifically it: - matches the query against the tool NAME only (quarantined tools aren't in the index, so they can't be BM25-ranked); short keywords (>=2 chars, e.g. "ui"/"qa") are retained; - applies the same agent-scope + profile filtering as the callable path, via a shared `serverDiscoverable` helper (de-duplicates what were three inline copies down to one for the discovery surface); - dedups against tools the index loop already handled (a `seen` set), so the brief post-quarantine reindex window can't list a tool as both callable and locked, nor double-count it in the zero-result nudge; - skips tools also denied by operator config (enabled_tools/disabled_tools), which approval cannot unlock — so the agent isn't sent down a dead-end remediation; - prepends its matches so the shared min(limit,10) cap can't truncate them in favor of index hits. Quarantined tools remain non-callable: the call path already blocks server-quarantine and pending/changed before execution (handleQuarantinedToolCall), so no change to isToolCallable is needed — discovery only makes them VISIBLE, never callable. Tests: pending tool surfaced by name with withheld description + remediation; server-level branch (tool-names source injected); query-scoped exclusion; config-denied skipped; dedup against seen; short-keyword tokens; zero-result nudge; quarantined tool still blocked at the call path; remediation present. Co-authored-by: Roman Chernyak <rchernyak@halocollar.com> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com> Co-authored-by: Dumbris <a.dumbris@gmail.com>
1 parent 171249d commit 9702260

3 files changed

Lines changed: 377 additions & 16 deletions

File tree

internal/contracts/types.go

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -231,16 +231,21 @@ type Tool struct {
231231
}
232232

233233
// DisabledToolStatus is the single machine-branchable reason a tool exists but
234-
// is not callable (Spec 049). Exactly one value per locked tool, assigned by
235-
// fixed first-match precedence (server-off → config → user → pending → unknown).
234+
// is not callable (Spec 049). Exactly one value per locked tool. The
235+
// classifier (classifyServerToolStatus) assigns the index-discoverable reasons
236+
// by fixed first-match precedence (server-off → config → user → pending →
237+
// unknown). DisabledStatusServerQuarantined is assigned separately by the
238+
// quarantined-tool discovery pass (quarantined tools are never in the index),
239+
// not by the classifier.
236240
type DisabledToolStatus = string
237241

238242
const (
239-
DisabledStatusServerDisabled DisabledToolStatus = "server_disabled"
240-
DisabledStatusByConfig DisabledToolStatus = "disabled_by_config"
241-
DisabledStatusByUser DisabledToolStatus = "disabled_by_user"
242-
DisabledStatusPendingApproval DisabledToolStatus = "pending_approval"
243-
DisabledStatusUnknown DisabledToolStatus = "disabled_unknown"
243+
DisabledStatusServerDisabled DisabledToolStatus = "server_disabled"
244+
DisabledStatusServerQuarantined DisabledToolStatus = "server_quarantined"
245+
DisabledStatusByConfig DisabledToolStatus = "disabled_by_config"
246+
DisabledStatusByUser DisabledToolStatus = "disabled_by_user"
247+
DisabledStatusPendingApproval DisabledToolStatus = "pending_approval"
248+
DisabledStatusUnknown DisabledToolStatus = "disabled_unknown"
244249
)
245250

246251
// LockedToolEntry is the lean discovery shape for a non-callable tool returned

internal/server/mcp.go

Lines changed: 174 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ import (
1111
"sync"
1212
"sync/atomic"
1313
"time"
14+
"unicode"
1415

1516
"github.com/smart-mcp-proxy/mcpproxy-go/internal/auth"
1617
"github.com/smart-mcp-proxy/mcpproxy-go/internal/cache"
@@ -1220,6 +1221,17 @@ func (p *MCPProxyServer) handleRetrieveToolsWithMode(ctx context.Context, reques
12201221
args["include_disabled"] = true
12211222
}
12221223

1224+
// serverDiscoverable applies the agent-scope (Spec 049 FR-007) and profile
1225+
// (Spec 057) filters BEFORE classification so an agent never learns a tool
1226+
// exists on a server it cannot access. Shared by the callable-result loop
1227+
// and the quarantined-tool discovery pass below so the two never drift.
1228+
serverDiscoverable := func(serverName string) bool {
1229+
if enforceAgentScope && authCtx != nil && !authCtx.CanAccessServer(serverName) {
1230+
return false
1231+
}
1232+
return profileScope.Allows(serverName)
1233+
}
1234+
12231235
var callableResults []*config.SearchResult
12241236
var disabledEntries []contracts.LockedToolEntry
12251237
droppedCount := 0
@@ -1234,15 +1246,7 @@ func (p *MCPProxyServer) handleRetrieveToolsWithMode(ctx context.Context, reques
12341246
}
12351247
}
12361248

1237-
// Agent-scope filtering happens BEFORE classification (Spec 049 FR-007)
1238-
// so an agent never learns a locked tool exists on a server it cannot
1239-
// access.
1240-
if enforceAgentScope && !authCtx.CanAccessServer(serverName) {
1241-
continue
1242-
}
1243-
// Spec 057: profile filter — runs independently of agent-scope so that
1244-
// unauthenticated /mcp/p/<slug> connections (AdminContext) are still filtered.
1245-
if !profileScope.Allows(serverName) {
1249+
if !serverDiscoverable(serverName) {
12461250
continue
12471251
}
12481252

@@ -1263,6 +1267,34 @@ func (p *MCPProxyServer) handleRetrieveToolsWithMode(ctx context.Context, reques
12631267
}
12641268
results = callableResults
12651269

1270+
// Quarantined tools are deliberately absent from the search index: their
1271+
// untrusted descriptions/schemas are withheld so a Tool Poisoning Attack
1272+
// payload is never exposed to the agent. The loop above therefore can never
1273+
// surface them, and an agent searching for a capability a quarantined server
1274+
// provides would be told "no such tool". Enumerate those tools from
1275+
// authoritative state (server-level quarantine + tool-level pending/changed
1276+
// approvals) and prepend name-only locked entries (no description/schema) so
1277+
// the agent learns the capability EXISTS but needs the user's approval.
1278+
//
1279+
// `seen` dedupes against tools the loop already handled — primarily for the
1280+
// brief window after a runtime quarantine toggle when a tool may still
1281+
// linger in the index — so a tool is never both a callable result and a
1282+
// locked entry, and droppedCount isn't double-counted. Matches are PREPENDED
1283+
// so the shared min(limit,10) cap below can't truncate them away in favor of
1284+
// index hits. The count feeds the zero-result nudge even without the flag.
1285+
seen := make(map[string]bool, len(callableResults)+len(disabledEntries))
1286+
for _, r := range callableResults {
1287+
seen[r.Tool.Name] = true
1288+
}
1289+
for _, e := range disabledEntries {
1290+
seen[e.Name] = true
1291+
}
1292+
quarantinedMatches := p.collectQuarantinedToolMatches(query, serverDiscoverable, seen, p.serverToolNames)
1293+
droppedCount += len(quarantinedMatches)
1294+
if includeDisabled {
1295+
disabledEntries = append(quarantinedMatches, disabledEntries...)
1296+
}
1297+
12661298
// Spec 035 F4: Resolve annotations for each result and apply annotation-based filtering
12671299
// before building the MCP tool response. This allows agents to self-restrict discovery.
12681300
annotationFilterActive := readOnlyOnly || excludeDestructive || excludeOpenWorld
@@ -5295,6 +5327,9 @@ func disabledToolRemediation(status contracts.DisabledToolStatus) string {
52955327
switch status {
52965328
case contracts.DisabledStatusServerDisabled:
52975329
return "Its server is disabled. Ask the user to enable the server first."
5330+
case contracts.DisabledStatusServerQuarantined:
5331+
return "Its server is quarantined for security review. Its tools cannot be called " +
5332+
"until the user reviews and approves the server in the mcpproxy UI or system tray."
52985333
case contracts.DisabledStatusByConfig:
52995334
return "Locked by operator policy in mcp_config.json (enabled_tools/disabled_tools). " +
53005335
"The user cannot enable this from the UI; ask the operator to change the server config."
@@ -5308,6 +5343,136 @@ func disabledToolRemediation(status contracts.DisabledToolStatus) string {
53085343
}
53095344
}
53105345

5346+
// maxQuarantinedMatches bounds how many quarantined tools the discovery
5347+
// second-pass collects before stopping. The response itself is capped lower
5348+
// (min(limit,10)); this just bounds work on the opt-in path.
5349+
const maxQuarantinedMatches = 50
5350+
5351+
// collectQuarantinedToolMatches finds tools that exist but are quarantined and
5352+
// whose name matches the query, returning lean locked entries (no description
5353+
// or schema — those are withheld because a quarantined tool's description is a
5354+
// potential Tool Poisoning Attack payload). It covers both quarantine layers:
5355+
// - server-level quarantine: every tool on a Quarantined server is locked
5356+
// (status server_quarantined);
5357+
// - tool-level quarantine: pending/changed approval records on a trusted
5358+
// server (status pending_approval).
5359+
//
5360+
// Quarantined tools are not in the search index, so this is the only path that
5361+
// can surface them to discovery. allowServer mirrors the agent-scope/profile
5362+
// filtering applied to the index results so an agent never learns a tool
5363+
// exists on a server it cannot access.
5364+
//
5365+
// Inputs:
5366+
// - allowServer / seen: the discovery scope filter and the set of "server:tool"
5367+
// keys the index loop already handled (callable or locked), so a tool is
5368+
// never surfaced twice.
5369+
// - toolNamesFor: resolves a server's live tool names (injected for testing;
5370+
// production passes p.serverToolNames).
5371+
//
5372+
// A tool that is also denied by operator config (enabled_tools/disabled_tools)
5373+
// is skipped rather than advertised as merely "pending approval": approving it
5374+
// in the UI would not make it callable, so surfacing it would send the agent
5375+
// down a remediation that can never succeed.
5376+
func (p *MCPProxyServer) collectQuarantinedToolMatches(query string, allowServer func(string) bool, seen map[string]bool, toolNamesFor func(string) []string) []contracts.LockedToolEntry {
5377+
tokens := queryTokens(query)
5378+
if len(tokens) == 0 {
5379+
return nil
5380+
}
5381+
5382+
servers, err := p.storage.ListUpstreams()
5383+
if err != nil {
5384+
p.logger.Debug("collectQuarantinedToolMatches: failed to list upstreams", zap.Error(err))
5385+
return nil
5386+
}
5387+
5388+
var matches []contracts.LockedToolEntry
5389+
// add records a quarantined tool; returns false once the work cap is hit.
5390+
add := func(sc *config.ServerConfig, toolName string, status contracts.DisabledToolStatus) bool {
5391+
key := sc.Name + ":" + toolName
5392+
if seen[key] || !toolNameMatchesQuery(toolName, tokens) {
5393+
return true
5394+
}
5395+
if p.isToolConfigDenied(sc.Name, toolName, sc) {
5396+
return true // operator policy, not quarantine — don't advertise as approvable
5397+
}
5398+
seen[key] = true
5399+
matches = append(matches, contracts.LockedToolEntry{Name: key, Server: sc.Name, Status: status})
5400+
return len(matches) < maxQuarantinedMatches
5401+
}
5402+
5403+
for _, sc := range servers {
5404+
if sc == nil || !sc.Enabled || !allowServer(sc.Name) {
5405+
continue
5406+
}
5407+
5408+
if sc.Quarantined {
5409+
for _, toolName := range toolNamesFor(sc.Name) {
5410+
if !add(sc, toolName, contracts.DisabledStatusServerQuarantined) {
5411+
return matches
5412+
}
5413+
}
5414+
continue
5415+
}
5416+
5417+
approvals, aerr := p.storage.ListToolApprovals(sc.Name)
5418+
if aerr != nil {
5419+
continue
5420+
}
5421+
for _, a := range approvals {
5422+
if a == nil || a.Disabled {
5423+
continue
5424+
}
5425+
if a.Status != storage.ToolApprovalStatusPending && a.Status != storage.ToolApprovalStatusChanged {
5426+
continue
5427+
}
5428+
if !add(sc, a.ToolName, contracts.DisabledStatusPendingApproval) {
5429+
return matches
5430+
}
5431+
}
5432+
}
5433+
return matches
5434+
}
5435+
5436+
// queryTokens lowercases the query and splits it into alphanumeric tokens, used
5437+
// for a lightweight name match against quarantined tools (which are not in the
5438+
// BM25 index and so can't be ranked normally). Tokens of length >= 2 are kept so
5439+
// short-but-meaningful capability keywords ("ci", "ui", "qa", "db", "os") still
5440+
// match; if the query has only single-character fields they are kept as a
5441+
// fallback rather than returning nothing.
5442+
func queryTokens(query string) []string {
5443+
fields := strings.FieldsFunc(strings.ToLower(query), func(r rune) bool {
5444+
return !unicode.IsLetter(r) && !unicode.IsDigit(r)
5445+
})
5446+
tokens := make([]string, 0, len(fields))
5447+
for _, f := range fields {
5448+
if len(f) >= 2 {
5449+
tokens = append(tokens, f)
5450+
}
5451+
}
5452+
if len(tokens) == 0 {
5453+
for _, f := range fields {
5454+
if f != "" {
5455+
tokens = append(tokens, f)
5456+
}
5457+
}
5458+
}
5459+
return tokens
5460+
}
5461+
5462+
// toolNameMatchesQuery reports whether a quarantined tool's name is relevant to
5463+
// the query: any query token appears in the lowercased tool name. Tool names
5464+
// usually carry the capability keywords (e.g. "emulator_build_web"), so this is
5465+
// enough to surface "this exists but is quarantined" without the description.
5466+
func toolNameMatchesQuery(toolName string, tokens []string) bool {
5467+
lower := strings.ToLower(toolName)
5468+
for _, tok := range tokens {
5469+
if strings.Contains(lower, tok) {
5470+
return true
5471+
}
5472+
}
5473+
return false
5474+
}
5475+
53115476
func (p *MCPProxyServer) lookupToolAnnotations(serverName, toolName string) *config.ToolAnnotations {
53125477
if p.mainServer == nil || p.mainServer.runtime == nil {
53135478
return nil

0 commit comments

Comments
 (0)