smart-mcp-proxy
diff --git a/‎CLAUDE.md‎
Lines changed: 3 additions & 0 deletions b/‎CLAUDE.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎internal/security/detect/aggregate.go‎
Lines changed: 149 additions & 0 deletions b/‎internal/security/detect/aggregate.go‎
Lines changed: 149 additions & 0 deletions
diff --git a/‎internal/security/detect/aggregate_test.go‎
Lines changed: 103 additions & 0 deletions b/‎internal/security/detect/aggregate_test.go‎
Lines changed: 103 additions & 0 deletions
diff --git a/‎internal/security/detect/doc.go‎
Lines changed: 25 additions & 0 deletions b/‎internal/security/detect/doc.go‎
Lines changed: 25 additions & 0 deletions
diff --git a/‎internal/security/detect/engine.go‎
Lines changed: 106 additions & 0 deletions b/‎internal/security/detect/engine.go‎
Lines changed: 106 additions & 0 deletions
@@ -155,3 +155,6 @@ tail -f ~/Library/Logs/mcpproxy/main.log  # main log (macOS; Linux: ~/.mcpproxy/
 - Config changes: update both storage and file system; the file watcher hot-reloads.
 - **macOS tray dev** (build / replace / verify with `mcpproxy-ui-test`): [docs/development/macos-tray.md](docs/development/macos-tray.md).
 - **Windows installer**: [docs/github-actions-windows-wix-research.md](docs/github-actions-windows-wix-research.md). **Prerelease** (`next` branch + `v*-rc.*` tags, opt-in, off stable channels): [docs/prerelease-builds.md](docs/prerelease-builds.md).
+
+## Recent Changes
+- 076-deterministic-tool-scanner: Added Go 1.24 + stdlib only for detection (`unicode`, `unicode/utf8`, `encoding/base64`, `encoding/hex`, `regexp`); `golang.org/x/text/unicode/norm` (already an indirect dep via x/text) for NFKC; existing `internal/security/patterns/`, `internal/security/scanner/`, `internal/runtime/tool_quarantine.go`. No new third-party dependency.
@@ -0,0 +1,149 @@
+package detect
+
+import "fmt"
+
+// Severity levels — string values mirror internal/security/scanner so a Finding
+// maps onto scanner.ScanFinding without translation (the scanner wiring copies
+// these strings verbatim). detect cannot import scanner (import cycle), so the
+// vocabulary is mirrored here, not aliased.
+const (
+	SeverityCritical = "critical"
+	SeverityHigh     = "high"
+	SeverityMedium   = "medium"
+	SeverityLow      = "low"
+	SeverityInfo     = "info"
+)
+
+// Threat levels — user-facing severity, mirrors scanner.ThreatLevel*.
+const (
+	ThreatLevelDangerous = "dangerous" // any hard signal → auto-quarantine
+	ThreatLevelWarning   = "warning"   // soft-only → review
+	ThreatLevelInfo      = "info"
+)
+
+// Threat types — the report vocabulary, mirrors scanner.Threat* plus the
+// exfiltration category from the Spec-076 data model.
+const (
+	ThreatToolPoisoning   = "tool_poisoning"
+	ThreatPromptInjection = "prompt_injection"
+	ThreatRugPull         = "rug_pull"
+	ThreatExfiltration    = "exfiltration"
+	ThreatMaliciousCode   = "malicious_code"
+	ThreatUncategorized   = "uncategorized"
+)
+
+// criticalConfidence is the hard-signal confidence at/above which a dangerous
+// finding is rated critical rather than high. Escalating checks (≥3 unicode
+// classes, decoded shell payloads) emit near-1.0 confidence.
+const criticalConfidence = 0.9
+
+// Finding is the per-tool aggregation output. It is self-contained (no scanner
+// import) and converts 1:1 to scanner.ScanFinding in the scanner wiring (T012);
+// the additive Confidence/Signals fields already exist on ScanFinding (T004).
+type Finding struct {
+	RuleID      string
+	Scanner     string
+	ThreatType  string
+	ThreatLevel string
+	Severity    string
+	Category    string
+	Title       string
+	Description string
+	Location    string
+	Evidence    string
+	Confidence  float64
+	Signals     []string
+}
+
+// aggregate combines every signal emitted for one tool into a single Finding,
+// applying the Spec-076 tier and severity semantics (FR-005, FR-006, FR-010).
+// It returns ok=false when there are no signals. It is deterministic: output
+// depends only on the signal slice order.
+func aggregate(tool ToolView, signals []Signal, scannerID string) (Finding, bool) {
+	if len(signals) == 0 {
+		return Finding{}, false
+	}
+
+	// Distinct CheckIDs in first-seen order, plus combined confidence and the
+	// primary (highest-tier, first-seen) signal.
+	seen := make(map[string]struct{}, len(signals))
+	var ids []string
+	var confSum float64
+	var primary Signal
+	haveHard := false
+	maxHardConf := 0.0
+	distinctSoft := make(map[string]struct{})
+
+	for i, s := range signals {
+		confSum += ClampConfidence(s.Confidence)
+		if _, dup := seen[s.CheckID]; !dup {
+			seen[s.CheckID] = struct{}{}
+			ids = append(ids, s.CheckID)
+		}
+		switch s.Tier {
+		case TierHard:
+			if !haveHard {
+				primary = s // first hard signal wins as primary
+				haveHard = true
+			}
+			if c := ClampConfidence(s.Confidence); c > maxHardConf {
+				maxHardConf = c
+			}
+		case TierSoft:
+			distinctSoft[s.CheckID] = struct{}{}
+		}
+		if i == 0 && !haveHard {
+			primary = s // fall back to first signal until a hard one appears
+		}
+	}
+	if !haveHard {
+		primary = signals[0]
+	}
+
+	f := Finding{
+		RuleID:      "detect." + primary.CheckID,
+		Scanner:     scannerID,
+		ThreatType:  primary.ThreatType,
+		Category:    primary.ThreatType,
+		Location:    fmt.Sprintf("%s:%s", tool.Server, tool.Name),
+		Title:       findingTitle(primary, tool),
+		Description: primary.Detail,
+		Evidence:    primary.Evidence,
+		Confidence:  ClampConfidence(confSum),
+		Signals:     ids,
+	}
+
+	if haveHard {
+		f.ThreatLevel = ThreatLevelDangerous
+		if maxHardConf >= criticalConfidence {
+			f.Severity = SeverityCritical
+		} else {
+			f.Severity = SeverityHigh
+		}
+	} else {
+		f.ThreatLevel = ThreatLevelWarning
+		f.Severity = softSeverity(len(distinctSoft))
+	}
+	return f, true
+}
+
+// softSeverity maps the count of distinct soft CheckIDs to a severity:
+// 1→low, 2→medium, 3+→high.
+func softSeverity(distinct int) string {
+	switch {
+	case distinct >= 3:
+		return SeverityHigh
+	case distinct == 2:
+		return SeverityMedium
+	default:
+		return SeverityLow
+	}
+}
+
+func findingTitle(primary Signal, tool ToolView) string {
+	name := tool.Name
+	if name == "" {
+		name = "tool"
+	}
+	return fmt.Sprintf("%s flagged on %s", primary.CheckID, name)
+}
@@ -0,0 +1,103 @@
+package detect
+
+import "testing"
+
+func soft(id string, conf float64) Signal {
+	return Signal{CheckID: id, Tier: TierSoft, ThreatType: ThreatToolPoisoning, Confidence: conf, Detail: id}
+}
+func hard(id string, conf float64) Signal {
+	return Signal{CheckID: id, Tier: TierHard, ThreatType: ThreatPromptInjection, Confidence: conf, Detail: id}
+}
+
+func TestAggregateNoSignals(t *testing.T) {
+	if _, ok := aggregate(ToolView{Name: "x"}, nil, "s"); ok {
+		t.Fatal("no signals must yield ok=false")
+	}
+}
+
+func TestAggregateHardIsDangerous(t *testing.T) {
+	tool := ToolView{Server: "srv", Name: "calc"}
+	f, ok := aggregate(tool, []Signal{hard("unicode.hidden", 0.95)}, "tpa-descriptions")
+	if !ok {
+		t.Fatal("expected a finding")
+	}
+	if f.ThreatLevel != ThreatLevelDangerous {
+		t.Errorf("ThreatLevel = %q, want dangerous", f.ThreatLevel)
+	}
+	if f.Severity != SeverityCritical {
+		t.Errorf("Severity = %q, want critical (escalated hard)", f.Severity)
+	}
+	if f.ThreatType != ThreatPromptInjection {
+		t.Errorf("ThreatType = %q, want prompt_injection", f.ThreatType)
+	}
+	if f.Scanner != "tpa-descriptions" {
+		t.Errorf("Scanner = %q", f.Scanner)
+	}
+	if f.Location != "srv:calc" {
+		t.Errorf("Location = %q, want srv:calc", f.Location)
+	}
+	if len(f.Signals) != 1 || f.Signals[0] != "unicode.hidden" {
+		t.Errorf("Signals = %v", f.Signals)
+	}
+}
+
+func TestAggregateHardNonEscalatedIsHigh(t *testing.T) {
+	f, _ := aggregate(ToolView{Name: "t"}, []Signal{hard("shadowing.cross_server", 0.6)}, "s")
+	if f.Severity != SeverityHigh {
+		t.Errorf("Severity = %q, want high (non-escalated hard)", f.Severity)
+	}
+	if f.ThreatLevel != ThreatLevelDangerous {
+		t.Errorf("ThreatLevel = %q, want dangerous", f.ThreatLevel)
+	}
+}
+
+func TestAggregateSoftSeverityLadder(t *testing.T) {
+	cases := []struct {
+		name string
+		sigs []Signal
+		want string
+	}{
+		{"one→low", []Signal{soft("a", 0.4)}, SeverityLow},
+		{"two→medium", []Signal{soft("a", 0.4), soft("b", 0.3)}, SeverityMedium},
+		{"three→high", []Signal{soft("a", 0.3), soft("b", 0.3), soft("c", 0.3)}, SeverityHigh},
+		{"dupes count once", []Signal{soft("a", 0.2), soft("a", 0.2)}, SeverityLow},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			f, ok := aggregate(ToolView{Name: "t"}, tc.sigs, "s")
+			if !ok {
+				t.Fatal("expected finding")
+			}
+			if f.Severity != tc.want {
+				t.Errorf("Severity = %q, want %q", f.Severity, tc.want)
+			}
+			if f.ThreatLevel != ThreatLevelWarning {
+				t.Errorf("soft-only ThreatLevel = %q, want warning", f.ThreatLevel)
+			}
+		})
+	}
+}
+
+func TestAggregateConsensusRaisesConfidence(t *testing.T) {
+	single, _ := aggregate(ToolView{Name: "t"}, []Signal{soft("a", 0.5)}, "s")
+	double, _ := aggregate(ToolView{Name: "t"}, []Signal{soft("a", 0.5), soft("b", 0.4)}, "s")
+	if !(double.Confidence > single.Confidence) {
+		t.Errorf("consensus confidence %v not greater than single %v", double.Confidence, single.Confidence)
+	}
+	if single.Confidence != 0.5 {
+		t.Errorf("single confidence = %v, want 0.5", single.Confidence)
+	}
+	// Independent signals add, capped at 1.0.
+	capped, _ := aggregate(ToolView{Name: "t"}, []Signal{soft("a", 0.7), soft("b", 0.8)}, "s")
+	if capped.Confidence != 1.0 {
+		t.Errorf("capped confidence = %v, want 1.0", capped.Confidence)
+	}
+}
+
+func TestAggregateDistinctSignalsList(t *testing.T) {
+	f, _ := aggregate(ToolView{Name: "t"}, []Signal{soft("b", 0.2), soft("a", 0.2), soft("b", 0.2)}, "s")
+	// First-seen order, deduped.
+	if len(f.Signals) != 2 || f.Signals[0] != "b" || f.Signals[1] != "a" {
+		t.Errorf("Signals = %v, want [b a]", f.Signals)
+	}
+}
@@ -0,0 +1,25 @@
+// Package detect implements the deterministic, offline MCP tool-scanner v2
+// (Spec 076).
+//
+// Contract (see specs/076-deterministic-tool-scanner/contracts/detect-engine.md):
+//
+//   - Offline: this package performs NO I/O. It imports no networking
+//     (net, net/http), no process execution (os/exec), no filesystem access
+//     (os), and no HTTP/Docker client. Detection runs purely over in-memory
+//     tool definitions supplied by the caller. The offline guarantee is
+//     enforced by the standing import-guard test (imports_test.go) and backs
+//     FR-001.
+//
+//   - Deterministic: identical input (a RegistryView) yields byte-identical
+//     output, including finding and signal ordering. No maps are iterated for
+//     output ordering; no clocks or randomness are consulted.
+//
+//   - Total: every registered Check.Inspect call is run under recover(). A
+//     check that panics or errors is isolated, counted in Coverage, and never
+//     aborts the scan. A degraded scan still returns its other findings, the
+//     same way the existing scanner surfaces scanners_failed.
+//
+// The engine aggregates per-tool Signals into the existing
+// internal/security/scanner.ScanFinding type (now additively carrying
+// Confidence and Signals), so all CLI/REST/MCP entry points keep their shapes.
+package detect
@@ -0,0 +1,106 @@
+package detect
+
+import "sort"
+
+// defaultScannerID is the bundled in-process scanner the engine attributes its
+// findings to, matching the existing tpa-descriptions analyzer it replaces.
+const defaultScannerID = "tpa-descriptions"
+
+// Options configures an Engine.
+type Options struct {
+	// Checks are run, in this order, against every tool. Order is part of the
+	// determinism contract.
+	Checks []Check
+	// ScannerID is stamped onto every finding's Scanner field. Defaults to
+	// "tpa-descriptions" when empty.
+	ScannerID string
+}
+
+// Engine runs all registered checks over a registry snapshot and aggregates
+// per-tool signals into findings. Pure aside from the recover() isolation that
+// keeps a misbehaving check from aborting the scan.
+type Engine struct {
+	checks    []Check
+	scannerID string
+}
+
+// NewEngine builds an Engine from Options.
+func NewEngine(opts Options) *Engine {
+	id := opts.ScannerID
+	if id == "" {
+		id = defaultScannerID
+	}
+	return &Engine{checks: opts.Checks, scannerID: id}
+}
+
+// Coverage records how complete a scan was: a check whose Inspect panicked or
+// errored is recovered, counted here, and never aborts the scan — mirroring the
+// existing scanners_failed degradation path.
+type Coverage struct {
+	ChecksRun      int
+	ChecksFailed   int
+	FailedCheckIDs []string
+}
+
+// Result is the output of a scan.
+type Result struct {
+	Findings []Finding
+	Coverage Coverage
+}
+
+// Scan inspects every tool in the snapshot. The RegistryView is built once per
+// scan (indexes + NormalizedText) if the caller passed an unindexed view, then
+// shared with every check. A check that panics is isolated; the scan still
+// returns its other findings. Output (findings and ordering) is deterministic
+// for identical input.
+func (e *Engine) Scan(reg RegistryView) Result {
+	if reg.ToolsByName == nil {
+		reg = NewRegistryView(reg.Tools)
+	}
+
+	failed := make(map[string]struct{})
+	findings := make([]Finding, 0, len(reg.Tools))
+
+	for i := range reg.Tools {
+		tool := reg.Tools[i]
+		var toolSignals []Signal
+		for _, c := range e.checks {
+			sigs, panicked := safeInspect(c, tool, reg)
+			if panicked {
+				failed[c.ID()] = struct{}{}
+				continue
+			}
+			toolSignals = append(toolSignals, sigs...)
+		}
+		if f, ok := aggregate(tool, toolSignals, e.scannerID); ok {
+			findings = append(findings, f)
+		}
+	}
+
+	failedIDs := make([]string, 0, len(failed))
+	for id := range failed {
+		failedIDs = append(failedIDs, id)
+	}
+	sort.Strings(failedIDs)
+
+	return Result{
+		Findings: findings,
+		Coverage: Coverage{
+			ChecksRun:      len(e.checks) - len(failedIDs),
+			ChecksFailed:   len(failedIDs),
+			FailedCheckIDs: failedIDs,
+		},
+	}
+}
+
+// safeInspect runs one check under recover() so a panic is contained. A check
+// that panics yields no signals and panicked=true.
+func safeInspect(c Check, tool ToolView, reg RegistryView) (sigs []Signal, panicked bool) {
+	defer func() {
+		if r := recover(); r != nil {
+			sigs = nil
+			panicked = true
+		}
+	}()
+	return c.Inspect(tool, reg), false
+}