docs(policy,spec): address PR #124 review — migration, tests, godoc

leo-aa88 · cursoragent · leo-aa88 · commit 185886bf7599 · 2026-06-01T04:37:54.000-03:00
- Fix NewEvaluator/Engine.Evaluator godoc for nil-policy safety enforcement
- Add CHANGELOG [Unreleased] with breaking-change migration guide
- Document tool-level trusted vs requiredFor; plan prefix vs runtime exact match
- Mark MCP SafetyFromMCPMeta/MergeToolSafety as not wired yet
- Validate non-empty spec.safety blocks; export spec.BoolPtr
- Add run_safety CLI fixture: exit 5 from safety only (no Policy)
- Revert testGraphWithTools to production fail-closed defaults
- Add prefix vs exact approval tests; CONTRIBUTING CHANGELOG note

Co-authored-by: Cursor &lt;cursoragent@cursor.com&gt;
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -0,0 +1,33 @@
+# Changelog
+
+All notable changes to this project are documented in this file.
+
+The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/).
+
+## [Unreleased]
+
+### Added
+
+- **`spec.safety` on Tool resources** (issue #103): optional `trusted`, `sideEffects`, and `requiresApproval` fields. [NormalizeProjectGraph] materializes fail-closed defaults on load.
+- **Policy safety fallback**: when no `approvals.requiredFor` entry matches the exact `uses` string, [policy.Derive] consults resolved safety metadata. Unattended mutating tools require `--approve` (exit code **5**, `approval_required`).
+- **Plan risk hints** for tools that will require approval at run, including decision source (`explicit_policy_rule`, `safety_metadata`, `fail_closed_default`).
+
+### Changed
+
+- **Breaking — tool calls without explicit policy are no longer unrestricted.** Previously, `CheckToolCall` with a nil [spec.PolicySpec] allowed all tools. Now fail-closed safety always applies from the project graph (even when the workflow omits `spec.policy` or the Policy resource is missing).
+- Tools with **no** `spec.safety` block behave as **untrusted with side effects** after normalization → require `--approve` unless an explicit `approvals.requiredFor` rule matches.
+
+### Migration
+
+1. For **read-only** native or mock tools (echo, fetch, identity), add:
+   ```yaml
+   spec:
+     safety:
+       sideEffects: false
+   ```
+2. For tools where you accept **tool-wide** unattended use but still gate specific operations, set `trusted: true` and list write operations under `Policy.spec.approvals.requiredFor` (exact `uses` strings).
+3. Do **not** set `trusted: true` unless you intend every operation on that tool to run without safety-derived approval; per-action gating remains `requiredFor` only (exact match at runtime).
+
+### Not yet wired
+
+- MCP discovery does **not** yet apply [spec.SafetyFromMCPMeta] / [spec.MergeToolSafety]; author-set `spec.safety` in YAML is the source of truth until MCP merge lands (tracked separately from #103).
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -22,6 +22,8 @@ Run **`make`** or **`make help`** for a full list of targets.
 
 ## Before you open a pull request
 
+User-visible behavior changes should include an entry under **[Unreleased]** in [`CHANGELOG.md`](CHANGELOG.md) (especially breaking changes and migrations).
+
 1. **Format** — `make fmt` or ensure `gofmt -l .` prints nothing (same check as CI).
 2. **Static analysis** — `make vet` (or `go vet ./...`).
 3. **Tests** — `make test` (`go test ./... -race`).
diff --git a/internal/cli/run_test.go b/internal/cli/run_test.go
@@ -29,6 +29,16 @@ func runPolicyRoot(t *testing.T) string {
 	return abs
 }
 
+func runSafetyRoot(t *testing.T) string {
+	t.Helper()
+	p := filepath.Join("testdata", "run_safety")
+	abs, err := filepath.Abs(p)
+	if err != nil {
+		t.Fatal(err)
+	}
+	return abs
+}
+
 func TestRun_demo_integration_succeeds(t *testing.T) {
 	db := filepath.Join(t.TempDir(), "run-cli.db")
 	root := runProjRoot(t)
@@ -54,6 +64,53 @@ func TestRun_demo_integration_succeeds(t *testing.T) {
 	}
 }
 
+func TestRun_safetyOnlyDenial_exit5(t *testing.T) {
+	db := filepath.Join(t.TempDir(), "run-safety.db")
+	root := runSafetyRoot(t)
+
+	ResetGlobalsForTest()
+	cmd := NewRootCmd()
+	cmd.SetOut(io.Discard)
+	cmd.SetErr(io.Discard)
+	cmd.SetArgs([]string{
+		"run", "workflow/echo",
+		"--project", root,
+		"--state", db,
+		"--input", "topic=x",
+	})
+	err := cmd.Execute()
+	if err == nil {
+		t.Fatal("expected safety-derived policy denial")
+	}
+	if ExitCodeOf(err) != ExitPolicyDenied {
+		t.Fatalf("exit=%d want %d err=%v", ExitCodeOf(err), ExitPolicyDenied, err)
+	}
+}
+
+func TestRun_safetyOnly_withApprove_succeeds(t *testing.T) {
+	db := filepath.Join(t.TempDir(), "run-safety-ok.db")
+	root := runSafetyRoot(t)
+
+	ResetGlobalsForTest()
+	var out bytes.Buffer
+	cmd := NewRootCmd()
+	cmd.SetOut(&out)
+	cmd.SetErr(&out)
+	cmd.SetArgs([]string{
+		"run", "workflow/echo",
+		"--project", root,
+		"--state", db,
+		"--input", "topic=x",
+		"--approve", "tool.helper.echo",
+	})
+	if err := cmd.Execute(); err != nil {
+		t.Fatal(err)
+	}
+	if !strings.Contains(out.String(), "Status: succeeded") {
+		t.Fatalf("output:\n%s", out.String())
+	}
+}
+
 func TestRun_policyDenial_exit5(t *testing.T) {
 	db := filepath.Join(t.TempDir(), "run-pol.db")
 	root := runPolicyRoot(t)
diff --git a/internal/cli/testdata/run_safety/project.yaml b/internal/cli/testdata/run_safety/project.yaml
@@ -0,0 +1,12 @@
+apiVersion: agentic.dev/v0
+kind: Project
+metadata:
+  name: safetyproj
+spec:
+  imports:
+    - ./tools.yaml
+    - ./workflow.yaml
+  providers:
+    models:
+      mock:
+        type: mock
diff --git a/internal/cli/testdata/run_safety/schemas/in.json b/internal/cli/testdata/run_safety/schemas/in.json
@@ -0,0 +1,9 @@
+{
+  "$schema": "https://json-schema.org/draft/2020-12/schema",
+  "type": "object",
+  "required": ["topic"],
+  "properties": {
+    "topic": { "type": "string" }
+  },
+  "additionalProperties": true
+}
diff --git a/internal/cli/testdata/run_safety/tools.yaml b/internal/cli/testdata/run_safety/tools.yaml
@@ -0,0 +1,6 @@
+apiVersion: agentic.dev/v0
+kind: Tool
+metadata:
+  name: helper
+spec:
+  type: native
diff --git a/internal/cli/testdata/run_safety/workflow.yaml b/internal/cli/testdata/run_safety/workflow.yaml
@@ -0,0 +1,15 @@
+apiVersion: agentic.dev/v0
+kind: Workflow
+metadata:
+  name: echo
+spec:
+  input:
+    schema: ./schemas/in.json
+  steps:
+    - id: s1
+      uses: tool.helper.echo
+      with:
+        topic: "${input.topic}"
+  output:
+    value:
+      out: "${steps.s1.output.echo}"
diff --git a/internal/engine/execution_test.go b/internal/engine/execution_test.go
@@ -16,8 +16,6 @@ import (
 	"github.com/LAA-Software-Engineering/agentic-control-plane/internal/trace"
 )
 
-func boolPtr(b bool) *bool { v := b; return &v }
-
 func testProjectRoot(t *testing.T) string {
 	t.Helper()
 	_, file, _, ok := runtime.Caller(0)
@@ -51,7 +49,7 @@ func TestRun_sequentialToolAndAgent_mockModel(t *testing.T) {
 				Metadata:   spec.Metadata{Name: "helper"},
 				Spec: spec.ToolSpec{
 					Type:   "native",
-					Safety: &spec.ToolSafety{SideEffects: boolPtr(false)},
+					Safety: &spec.ToolSafety{SideEffects: spec.BoolPtr(false)},
 				},
 			},
 		},
diff --git a/internal/plan/risk.go b/internal/plan/risk.go
@@ -260,6 +260,7 @@ func addToolSafetyRisk(add func(string), toolName string, cur policy.ToolDecisio
 	if prev != nil && prev.Decision == policy.DecisionRequireApproval {
 		return
 	}
+	// Plan uses prefix match on tool.<name>. for explicit requiredFor (conservative); runtime matches exact uses.
 	add(fmt.Sprintf(
 		"Tool/%s will require approval at run (decision=%s, source=%s).",
 		toolName, cur.Decision, cur.Source,
diff --git a/internal/policy/derive.go b/internal/policy/derive.go
@@ -50,7 +50,7 @@ func EffectiveToolDecision(graph *spec.ProjectGraph, pol *spec.PolicySpec, toolN
 	toolName = strings.TrimSpace(toolName)
 	safety := resolvedSafetyForTool(graph, toolName)
 	if pol != nil && pol.Approvals != nil {
-		prefix := "tool." + toolName + "."
+		prefix := toolUsesPrefix(toolName)
 		for _, r := range pol.Approvals.RequiredFor {
 			r = strings.TrimSpace(r)
 			if r == prefix || strings.HasPrefix(r, prefix) {
@@ -109,15 +109,21 @@ func checkSafetyDerived(graph *spec.ProjectGraph, call ToolCallContext) error {
 			},
 		)
 	default:
+		// Derive never returns DecisionDeny; reserved for future explicit denylists.
 		return denied(
 			ReasonDenied,
-			"policy: tool denied by safety metadata",
+			fmt.Sprintf("policy: unexpected tool decision %q", td.Decision),
 			call.Uses,
 			map[string]any{"tool": toolName},
 		)
 	}
 }
 
+// toolUsesPrefix is the plan-risk prefix for tool.<name>. (conservative; runtime uses exact uses).
+func toolUsesPrefix(toolName string) string {
+	return "tool." + strings.TrimSpace(toolName) + "."
+}
+
 func actionApproved(uses string, approved []string) bool {
 	u := strings.TrimSpace(uses)
 	for _, a := range approved {
diff --git a/internal/policy/derive_test.go b/internal/policy/derive_test.go
@@ -7,8 +7,6 @@ import (
 	"github.com/LAA-Software-Engineering/agentic-control-plane/internal/spec"
 )
 
-func boolPtr(b bool) *bool { v := b; return &v }
-
 func TestDerive_truthTable(t *testing.T) {
 	tests := []struct {
 		name string
@@ -31,7 +29,7 @@ func TestDerive_truthTable(t *testing.T) {
 
 func TestCheckToolCall_safetyFallback_requiresApprovalWithoutApprove(t *testing.T) {
 	g := testGraphWithTools("slack")
-	g.Tools["slack"].Spec.Safety = &spec.ToolSafety{Trusted: boolPtr(false), SideEffects: boolPtr(true)}
+	g.Tools["slack"].Spec.Safety = &spec.ToolSafety{Trusted: spec.BoolPtr(false), SideEffects: spec.BoolPtr(true)}
 	ev := NewEvaluator(g, nil)
 	err := ev.CheckToolCall(context.Background(), ToolCallContext{
 		Run:  RunContext{},
@@ -48,7 +46,7 @@ func TestCheckToolCall_safetyFallback_requiresApprovalWithoutApprove(t *testing.
 
 func TestCheckToolCall_safetyFallback_trustedAllows(t *testing.T) {
 	g := testGraphWithTools("slack")
-	g.Tools["slack"].Spec.Safety = &spec.ToolSafety{Trusted: boolPtr(true)}
+	g.Tools["slack"].Spec.Safety = &spec.ToolSafety{Trusted: spec.BoolPtr(true)}
 	ev := NewEvaluator(g, nil)
 	err := ev.CheckToolCall(context.Background(), ToolCallContext{
 		Run:  RunContext{},
@@ -73,7 +71,7 @@ func TestCheckToolCall_safetyFallback_approveGrants(t *testing.T) {
 
 func TestCheckToolCall_explicitPolicyRuleBeforeSafety(t *testing.T) {
 	g := testGraphWithTools("github")
-	g.Tools["github"].Spec.Safety = &spec.ToolSafety{Trusted: boolPtr(true)}
+	g.Tools["github"].Spec.Safety = &spec.ToolSafety{Trusted: spec.BoolPtr(true)}
 	pol := &spec.PolicySpec{
 		Approvals: &spec.PolicyApprovals{
 			RequiredFor: []string{"tool.github.pull_request.merge"},
@@ -101,9 +99,36 @@ func TestCheckToolCall_explicitPolicyRuleBeforeSafety(t *testing.T) {
 	}
 }
 
+func TestApprovalRequired_exactUsesNotPrefix(t *testing.T) {
+	g := testGraphWithTools("github")
+	g.Tools["github"].Spec.Safety = &spec.ToolSafety{Trusted: spec.BoolPtr(true)}
+	pol := &spec.PolicySpec{
+		Approvals: &spec.PolicyApprovals{
+			RequiredFor: []string{"tool.github.pull_request.merge"},
+		},
+	}
+	ev := NewEvaluator(g, pol)
+	if approvalRequired("tool.github.pull_request.get", pol.Approvals) {
+		t.Fatal("approvalRequired must not match by prefix")
+	}
+	if !approvalRequired("tool.github.pull_request.merge", pol.Approvals) {
+		t.Fatal("exact uses should require approval")
+	}
+	td := EffectiveToolDecision(g, pol, "github")
+	if td.Source != SourceExplicitPolicyRule {
+		t.Fatalf("plan uses prefix conservatively: %+v", td)
+	}
+	err := ev.CheckToolCall(context.Background(), ToolCallContext{
+		Run: RunContext{}, Uses: "tool.github.pull_request.get",
+	})
+	if err != nil {
+		t.Fatalf("trusted + no exact requiredFor: %v", err)
+	}
+}
+
 func TestEffectiveToolDecision_explicitVsDerived(t *testing.T) {
 	g := testGraphWithTools("github")
-	g.Tools["github"].Spec.Safety = &spec.ToolSafety{Trusted: boolPtr(true)}
+	g.Tools["github"].Spec.Safety = &spec.ToolSafety{Trusted: spec.BoolPtr(true)}
 	pol := &spec.PolicySpec{
 		Approvals: &spec.PolicyApprovals{
 			RequiredFor: []string{"tool.github.pull_request.merge"},
diff --git a/internal/policy/doc.go b/internal/policy/doc.go
@@ -7,4 +7,17 @@
 //
 // When no explicit approvals.requiredFor rule matches a tool call, [Derive] consults
 // [spec.ResolveToolSafety] metadata (fail-closed defaults; issue #103).
+//
+// # Tool-level safety vs per-action policy
+//
+// [spec.ToolSafety] applies to the whole Tool resource, not individual operations. Setting
+// trusted: true allows unattended calls for every tool.<name>.<operation> unless an exact
+// approvals.requiredFor entry blocks that full uses string. Gate writes with requiredFor, not
+// by assuming trusted means "read-only only."
+//
+// # Plan vs runtime
+//
+// [EffectiveToolDecision] uses prefix matching on tool.<name>. for plan risk (conservative:
+// any listed action under the tool flags the whole Tool). Runtime [approvalRequired] matches
+// the full uses string exactly.
 package policy
diff --git a/internal/policy/engine.go b/internal/policy/engine.go
@@ -17,7 +17,8 @@ func NewEngine(g *spec.ProjectGraph) *Engine {
 }
 
 // Evaluator returns a [PolicyEvaluator] for the named Policy resource in the graph.
-// If the policy is missing, returns a no-op evaluator (nil spec).
+// If the policy name is missing or unknown, pol is nil: run/step budget checks are skipped, but
+// tool calls still use safety-metadata fallback from the graph.
 func (e *Engine) Evaluator(policyName string) PolicyEvaluator {
 	if e == nil {
 		return NewEvaluator(nil, nil)
diff --git a/internal/policy/evaluator.go b/internal/policy/evaluator.go
@@ -19,7 +19,9 @@ type evaluator struct {
 }
 
 // NewEvaluator returns a [PolicyEvaluator] for the given merged policy spec and project graph.
-// A nil policy spec applies no limits (all checks no-op).
+//
+// When pol is nil, [PolicyEvaluator.CheckRun] and [PolicyEvaluator.CheckStep] are no-ops, but
+// [PolicyEvaluator.CheckToolCall] still enforces fail-closed [spec.ToolSafety] from graph (issue #103).
 func NewEvaluator(graph *spec.ProjectGraph, pol *spec.PolicySpec) PolicyEvaluator {
 	return &evaluator{graph: graph, policy: pol}
 }
diff --git a/internal/policy/evaluator_test.go b/internal/policy/evaluator_test.go
@@ -14,16 +14,12 @@ import (
 
 func testGraphWithTools(names ...string) *spec.ProjectGraph {
 	tools := make(map[string]*spec.ToolResource)
-	trusted := true
 	for _, n := range names {
 		tools[n] = &spec.ToolResource{
 			APIVersion: spec.APIVersionV0,
 			Kind:       spec.KindTool,
 			Metadata:   spec.Metadata{Name: n},
-			Spec: spec.ToolSpec{
-				Type:   "mock",
-				Safety: &spec.ToolSafety{Trusted: &trusted},
-			},
+			Spec:       spec.ToolSpec{Type: "mock"},
 		}
 	}
 	return &spec.ProjectGraph{Tools: tools}
@@ -54,6 +50,7 @@ func TestCheckToolCall_forbidUnknownTools_unknownToolDenied(t *testing.T) {
 
 func TestCheckToolCall_forbidUnknownTools_knownToolOK(t *testing.T) {
 	g := testGraphWithTools("slack")
+	g.Tools["slack"].Spec.Safety = &spec.ToolSafety{SideEffects: spec.BoolPtr(false)}
 	pol := &spec.PolicySpec{
 		Tools: &spec.PolicyTools{ForbidUnknownTools: true},
 	}
diff --git a/internal/spec/safety.go b/internal/spec/safety.go
diff --git a/internal/spec/validator.go b/internal/spec/validator.go
diff --git a/internal/spec/validator_test.go b/internal/spec/validator_test.go

Original file line number	Diff line number	Diff line change
`@@ -260,6 +260,7 @@ func addToolSafetyRisk(add func(string), toolName string, cur policy.ToolDecisio`
`260`	`260`	`if prev != nil && prev.Decision == policy.DecisionRequireApproval {`
`261`	`261`	`return`
`262`	`262`	`}`
	`263`	`+ // Plan uses prefix match on tool.<name>. for explicit requiredFor (conservative); runtime matches exact uses.`
`263`	`264`	`add(fmt.Sprintf(`
`264`	`265`	`"Tool/%s will require approval at run (decision=%s, source=%s).",`
`265`	`266`	`toolName, cur.Decision, cur.Source,`