diff --git a/README.md b/README.md index 9928e6d..33f29d3 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ The full product vision, YAML spec v0, and architecture are documented in [**`do - **`agentctl validate`** — load project, apply **project defaults** (`spec.defaults`), then **environment overlays** (`-e` / `--env`, `Environment` resources §7.6), then validate graph, schemas, and references - **`agentctl plan`** — diff desired graph vs SQLite **deployment** state; risk hints; JSON/YAML output includes a **`deploymentBaseline`** digest for the store snapshot - **`agentctl apply`** — persist plan (TTY confirm or `--auto-approve` / `AGENTCTL_AUTO_APPROVE`); **optimistic concurrency** — if the deployment store changed after the plan snapshot (e.g. another process applied the same `--state` file while this run waited at the prompt), apply fails with **exit code 3**; re-run **plan** then **apply** -- **`agentctl run`** — execute a workflow locally; JSON Schema for inputs where configured; policy gates +- **`agentctl run`** — execute a workflow locally; JSON Schema for inputs where configured; policy gates pause for **human-in-the-loop (HITL)** approval when a tool call requires it - **`agentctl logs`** — read **trace events** from SQLite (`--run`, `--workflow`, or recent runs) - **Tools** — **`native`**, **`http`**, **`mock`**, and **`mcp`** — MCP supports **stdio** (subprocess) or **streamable HTTP** (`spec.mcp.transport: http`, `url`, optional `headers` with `env:` tokens) - **Project defaults** — besides **`model`** and **`policy`**, optional **`runtime`** flows to **`spec.runtime`** on agents/workflows when omitted (MVP: **`local`** or unset; see spec validation) @@ -142,6 +142,8 @@ Notes: - **`init`** creates `my-agent-system/` with `apiVersion: agentic.dev/v0` resources and a **`hello`** workflow (native `echo` tool only — **no network**). - **`apply`** in non-interactive environments needs **`--auto-approve`** or **`AGENTCTL_AUTO_APPROVE=1`**. +- **`run`** HITL: gated tool calls exit with **`Status: interrupted`** (exit **0**). Resume with **`--resume --decision approve|reject|edit|switch`** (use **`--decision-edit-json`** / **`--decision-switch-target`** when needed), or skip prompts with **`--auto-approve`** / **`AGENTCTL_AUTO_APPROVE=1`**. Pre-approve a specific call with repeated **`--approve `**. Set **`AGENTCTL_HITL_ACTOR`** to attribute decisions in trace logs. +- **`Policy.spec.hitl.interruptOn`** keys are **Tool metadata.name** values; they configure review options (edit rules, switch targets) for calls already gated by **`approvals.requiredFor`** or safety metadata — they do not gate tools on their own. - **`run`** stores traces in the **same** SQLite file used for plan/apply (default **`.agentic/state.db`** under `--project`). - If **`spec.traces.retentionDays`** is a positive integer, runs older than that many **UTC calendar days** (by `runs.started_at`) are deleted lazily on **`run`** and **`logs`** (child trace rows cascade). Unset or non-positive means no pruning. - Use **`logs --run `** after a run if you want a single run’s trace (IDs are printed by **`run`**). diff --git a/internal/cli/hitl.go b/internal/cli/hitl.go new file mode 100644 index 0000000..c00cdfd --- /dev/null +++ b/internal/cli/hitl.go @@ -0,0 +1,192 @@ +package cli + +import ( + "bufio" + "context" + "encoding/json" + "fmt" + "io" + "os" + "strings" + + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/engine" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/policy" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/runtime" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/spec" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/state" + "github.com/mattn/go-isatty" +) + +// EnvHitlActor overrides the actor recorded on approval trace events. +const EnvHitlActor = "AGENTCTL_HITL_ACTOR" + +// maxDecisionEditJSONBytes caps --decision-edit-json size (well below checkpoint limits). +const maxDecisionEditJSONBytes = 1 << 20 + +func hitlActorFromEnv() string { + if v := strings.TrimSpace(os.Getenv(EnvHitlActor)); v != "" { + return v + } + if u := strings.TrimSpace(os.Getenv("USER")); u != "" { + return u + } + return policy.DefaultHitlActor +} + +func applyHitlRunOptions(opts *runtime.WorkflowRunOptions, resuming bool, autoApprove bool, decision string, editJSON string, switchTarget string) error { + opts.AutoApprove = autoApprove || envAutoApproveEnabled() + opts.HitlActor = hitlActorFromEnv() + decision = strings.TrimSpace(decision) + editJSON = strings.TrimSpace(editJSON) + switchTarget = strings.TrimSpace(switchTarget) + + if decision == "" { + if editJSON != "" || switchTarget != "" { + return fmt.Errorf("run: --decision-edit-json and --decision-switch-target require --decision") + } + return nil + } + if !resuming { + return fmt.Errorf("run: --decision requires --resume ") + } + kind, err := spec.ParseHitlDecisionKind(decision) + if err != nil { + return err + } + hd := &runtime.HitlDecisionOptions{Kind: kind} + switch kind { + case spec.HitlDecisionEdit: + if editJSON == "" { + return fmt.Errorf("run: --decision edit requires --decision-edit-json") + } + if len(editJSON) > maxDecisionEditJSONBytes { + return fmt.Errorf("run: --decision-edit-json exceeds %d bytes", maxDecisionEditJSONBytes) + } + var m map[string]any + if err := json.Unmarshal([]byte(editJSON), &m); err != nil { + return fmt.Errorf("run: --decision-edit-json: %w", err) + } + if m == nil { + return fmt.Errorf("run: --decision-edit-json must be a JSON object") + } + hd.EditedWith = m + case spec.HitlDecisionSwitch: + hd.SwitchTarget = switchTarget + if hd.SwitchTarget == "" { + return fmt.Errorf("run: --decision switch requires --decision-switch-target") + } + } + opts.HitlDecision = hd + return nil +} + +func maybePromptHitlDecision(in io.Reader, out io.Writer, gate policy.HitlGate) (*policy.HitlDecisionInput, error) { + if !isatty.IsTerminal(os.Stdin.Fd()) { + return nil, nil + } + actor := hitlActorFromEnv() + display := policy.RedactHitlArgs(gate.With, gate.Review.RedactKeys) + fmt.Fprintf(out, "\n%s\n", gate.Review.Description) + fmt.Fprintf(out, "Tool: %s\nArguments: %v\n", gate.Uses, display) + fmt.Fprintf(out, "Allowed decisions: %v\n", gate.Review.AllowedDecisions) + if len(gate.Review.SwitchTargets) > 0 { + fmt.Fprintf(out, "Switch targets: %v\n", gate.Review.SwitchTargets) + } + for { + fmt.Fprintf(out, "Decision [approve/reject/edit/switch]: ") + line, err := readLine(in) + if err != nil { + return nil, err + } + kind, err := spec.ParseHitlDecisionKind(line) + if err != nil { + fmt.Fprintf(out, "Unknown decision %q\n", line) + continue + } + if !policy.IsDecisionAllowed(kind, gate.Review.AllowedDecisions) { + fmt.Fprintf(out, "Decision %q is not allowed for this call\n", kind) + continue + } + dec := &policy.HitlDecisionInput{Kind: kind, Actor: actor} + switch kind { + case spec.HitlDecisionEdit: + fmt.Fprintf(out, "Edited args JSON: ") + editLine, err := readLine(in) + if err != nil { + return nil, err + } + if len(editLine) > maxDecisionEditJSONBytes { + fmt.Fprintf(out, "Edited args exceed %d bytes\n", maxDecisionEditJSONBytes) + continue + } + var m map[string]any + if err := json.Unmarshal([]byte(editLine), &m); err != nil { + fmt.Fprintf(out, "Invalid JSON: %v\n", err) + continue + } + if err := policy.ValidateHitlEdit(gate.With, m, gate.Review); err != nil { + fmt.Fprintf(out, "%v\n", err) + continue + } + dec.EditedWith = m + case spec.HitlDecisionSwitch: + fmt.Fprintf(out, "Switch target operation: ") + target, err := readLine(in) + if err != nil { + return nil, err + } + dec.SwitchTarget = target + } + return dec, nil + } +} + +func readLine(r io.Reader) (string, error) { + sc := bufio.NewScanner(r) + if !sc.Scan() { + if err := sc.Err(); err != nil { + return "", err + } + return "", fmt.Errorf("run: unexpected EOF reading hitl decision") + } + return strings.TrimSpace(sc.Text()), nil +} + +func hitlGateFromCheckpoint(contextJSON string) (*policy.HitlGate, error) { + var payload struct { + PendingHitl *engine.PendingHitlState `json:"pendingHitl,omitempty"` + } + if err := json.Unmarshal([]byte(contextJSON), &payload); err != nil { + return nil, fmt.Errorf("unmarshal checkpoint: %w", err) + } + if payload.PendingHitl == nil { + return nil, nil + } + p := payload.PendingHitl + return &policy.HitlGate{ + Uses: p.Uses, + With: p.With, + Review: p.Review, + }, nil +} + +// loadPendingHitlGate reads the latest checkpoint for a run awaiting HITL input. +func loadPendingHitlGate(ctx context.Context, st state.RuntimeStore, runID string) (*policy.HitlGate, error) { + cp, err := st.GetLatestCheckpoint(ctx, runID) + if err != nil { + return nil, err + } + return hitlGateFromCheckpoint(cp.ContextJSON) +} + +// requirePendingHitlGate returns the pending gate or an error when interrupted without one. +func requirePendingHitlGate(ctx context.Context, st state.RuntimeStore, runID string) (*policy.HitlGate, error) { + gate, err := loadPendingHitlGate(ctx, st, runID) + if err != nil { + return nil, err + } + if gate == nil { + return nil, fmt.Errorf("run: run %q is interrupted but checkpoint has no pending approval gate", runID) + } + return gate, nil +} diff --git a/internal/cli/run.go b/internal/cli/run.go index 31c5535..c6ecf9c 100644 --- a/internal/cli/run.go +++ b/internal/cli/run.go @@ -14,8 +14,10 @@ import ( "github.com/LAA-Software-Engineering/agentic-control-plane/internal/render" "github.com/LAA-Software-Engineering/agentic-control-plane/internal/runtime" "github.com/LAA-Software-Engineering/agentic-control-plane/internal/runtime/local" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/spec" "github.com/LAA-Software-Engineering/agentic-control-plane/internal/state" "github.com/LAA-Software-Engineering/agentic-control-plane/internal/state/sqlite" + "github.com/mattn/go-isatty" "github.com/spf13/cobra" ) @@ -23,6 +25,10 @@ func newRunCmd() *cobra.Command { var inputFile string var inputPairs []string var approves []string + var autoApprove bool + var decision string + var decisionEditJSON string + var decisionSwitchTarget string var resumeRunID string cmd := &cobra.Command{ @@ -37,6 +43,8 @@ Workflow input is built from optional --input-file (JSON object) plus repeated - --approve using the full uses string (e.g. tool.helper.echo). Resume an interrupted or incomplete run with --resume (no workflow argument). +When a run pauses for human approval, resume with --decision and related flags, or use +--auto-approve / AGENTCTL_AUTO_APPROVE=1 for non-interactive approval. Examples: agentctl run workflow/demo --input topic=hello @@ -71,12 +79,16 @@ Exit codes (section 11.2): return NewExitError(ExitValidationError, err) } } - return runRun(cmd, wfName, resumeRunID, inputFile, inputPairs, approves) + return runRun(cmd, wfName, resumeRunID, inputFile, inputPairs, approves, autoApprove, decision, decisionEditJSON, decisionSwitchTarget) }, } cmd.Flags().StringVar(&inputFile, "input-file", "", "path to JSON file with workflow input object") cmd.Flags().StringArrayVar(&inputPairs, "input", nil, "workflow input as key=value (repeatable; values are strings)") cmd.Flags().StringArrayVar(&approves, "approve", nil, "approve a policy-gated tool uses string (repeatable)") + cmd.Flags().BoolVar(&autoApprove, "auto-approve", false, "auto-approve human-in-the-loop gates (or set AGENTCTL_AUTO_APPROVE=1)") + cmd.Flags().StringVar(&decision, "decision", "", "HITL decision when resuming: approve, reject, edit, or switch") + cmd.Flags().StringVar(&decisionEditJSON, "decision-edit-json", "", "JSON object of edited tool args when --decision edit") + cmd.Flags().StringVar(&decisionSwitchTarget, "decision-switch-target", "", "target operation when --decision switch") cmd.Flags().StringVar(&resumeRunID, "resume", "", "resume an interrupted or incomplete run by id") return cmd } @@ -165,7 +177,7 @@ func classifyRunError(err error) int { } } -func runRun(cmd *cobra.Command, wfName, resumeRunID, inputFile string, inputPairs, approves []string) error { +func runRun(cmd *cobra.Command, wfName, resumeRunID, inputFile string, inputPairs, approves []string, autoApprove bool, decision, decisionEditJSON, decisionSwitchTarget string) error { ctx := context.Background() g := Globals() @@ -203,33 +215,71 @@ func runRun(cmd *cobra.Command, wfName, resumeRunID, inputFile string, inputPair defer func() { _ = st.Close() }() rt := local.NewRuntime(root, st) - opts := runtime.WorkflowRunOptions{ - EnvironmentName: strings.TrimSpace(g.Env), - Env: env, - InputJSON: inputJSON, - ApprovedActions: approves, - Resume: resumeID != "", - RunID: resumeID, - } - if !opts.Resume { - opts.WorkflowName = wfName - } - runID, runErr := rt.ExecuteWorkflow(ctx, opts) - outWfName := wfName - if opts.Resume && runID != "" { - if r, gerr := st.GetRun(ctx, runID); gerr == nil && r != nil { - outWfName = r.WorkflowName + for { + opts := runtime.WorkflowRunOptions{ + EnvironmentName: strings.TrimSpace(g.Env), + Env: env, + InputJSON: inputJSON, + ApprovedActions: approves, + Resume: resumeID != "", + RunID: resumeID, } - } + if err := applyHitlRunOptions(&opts, resumeID != "", autoApprove, decision, decisionEditJSON, decisionSwitchTarget); err != nil { + return NewExitError(ExitValidationError, err) + } + if !opts.Resume { + opts.WorkflowName = wfName + } + runID, runErr := rt.ExecuteWorkflow(ctx, opts) - if werr := writeRunOutput(cmd, ctx, st, env, dsn, outWfName, runID, runErr, g); werr != nil { - return werr - } - if runErr != nil { - return NewExitError(classifyRunError(runErr), fmt.Errorf("run: %w", runErr)) + outWfName := wfName + if opts.Resume && runID != "" { + if r, gerr := st.GetRun(ctx, runID); gerr == nil && r != nil { + outWfName = r.WorkflowName + } + } + + if runErr == nil && runID != "" { + if r, gerr := st.GetRun(ctx, runID); gerr == nil && r != nil && r.Status == state.RunStatusInterrupted { + if opts.AutoApprove || strings.TrimSpace(decision) != "" { + if _, gerr := requirePendingHitlGate(ctx, st, runID); gerr != nil { + return gerr + } + resumeID = runID + continue + } + gate, gerr := requirePendingHitlGate(ctx, st, runID) + if gerr != nil { + return gerr + } + if isatty.IsTerminal(os.Stdin.Fd()) { + dec, perr := maybePromptHitlDecision(cmd.InOrStdin(), cmd.OutOrStdout(), *gate) + if perr != nil { + return perr + } + if dec != nil { + resumeID = runID + decision = string(dec.Kind) + if dec.Kind == spec.HitlDecisionEdit { + b, _ := json.Marshal(dec.EditedWith) + decisionEditJSON = string(b) + } + decisionSwitchTarget = dec.SwitchTarget + continue + } + } + } + } + + if werr := writeRunOutput(cmd, ctx, st, env, dsn, outWfName, runID, runErr, g); werr != nil { + return werr + } + if runErr != nil { + return NewExitError(classifyRunError(runErr), fmt.Errorf("run: %w", runErr)) + } + return nil } - return nil } func writeRunOutput(cmd *cobra.Command, ctx context.Context, st *sqlite.Store, env, dsn, wfName, runID string, runErr error, g *Global) error { @@ -286,6 +336,9 @@ func writeRunOutput(cmd *cobra.Command, ctx context.Context, st *sqlite.Store, e fmt.Fprintf(&b, "\nRun ID: %s\n", runID) if got != nil { fmt.Fprintf(&b, "Status: %s\n", got.Status) + if got.Status == state.RunStatusInterrupted { + fmt.Fprintf(&b, "Resume with: agentctl run --resume %s --decision approve|reject|edit|switch ...\n", runID) + } if got.ErrorText != "" { fmt.Fprintf(&b, "Error: %s\n", got.ErrorText) } diff --git a/internal/cli/run_test.go b/internal/cli/run_test.go index 5553ac2..eb3bcf6 100644 --- a/internal/cli/run_test.go +++ b/internal/cli/run_test.go @@ -79,26 +79,26 @@ func TestRun_demo_integration_succeeds(t *testing.T) { } } -func TestRun_safetyOnlyDenial_exit5(t *testing.T) { +func TestRun_safetyOnly_interruptsAwaitingHitl(t *testing.T) { db := filepath.Join(t.TempDir(), "run-safety.db") root := runSafetyRoot(t) ResetGlobalsForTest() + var out bytes.Buffer cmd := NewRootCmd() - cmd.SetOut(io.Discard) - cmd.SetErr(io.Discard) + cmd.SetOut(&out) + cmd.SetErr(&out) cmd.SetArgs([]string{ "run", "workflow/echo", "--project", root, "--state", db, "--input", "topic=x", }) - err := cmd.Execute() - if err == nil { - t.Fatal("expected safety-derived policy denial") + if err := cmd.Execute(); err != nil { + t.Fatalf("run: %v", err) } - if ExitCodeOf(err) != ExitPolicyDenied { - t.Fatalf("exit=%d want %d err=%v", ExitCodeOf(err), ExitPolicyDenied, err) + if !strings.Contains(out.String(), "Status: interrupted") { + t.Fatalf("expected interrupted:\n%s", out.String()) } } @@ -126,10 +126,58 @@ func TestRun_safetyOnly_withApprove_succeeds(t *testing.T) { } } -func TestRun_policyDenial_exit5(t *testing.T) { +func TestRun_policyGated_interruptThenResumeApprove(t *testing.T) { db := filepath.Join(t.TempDir(), "run-pol.db") root := runPolicyRoot(t) + ResetGlobalsForTest() + var out bytes.Buffer + cmd := NewRootCmd() + cmd.SetOut(&out) + cmd.SetErr(&out) + cmd.SetArgs([]string{ + "run", "workflow/gated", + "--project", root, + "--state", db, + "--input", "topic=x", + }) + if err := cmd.Execute(); err != nil { + t.Fatalf("first run: %v\n%s", err, out.String()) + } + if !strings.Contains(out.String(), "Status: interrupted") { + t.Fatalf("expected interrupted:\n%s", out.String()) + } + runID := "" + for _, line := range strings.Split(out.String(), "\n") { + if strings.HasPrefix(strings.TrimSpace(line), "Run ID:") { + runID = strings.TrimSpace(strings.TrimPrefix(strings.TrimSpace(line), "Run ID:")) + } + } + if runID == "" { + t.Fatal("missing run id") + } + + out.Reset() + cmd2 := NewRootCmd() + cmd2.SetOut(&out) + cmd2.SetErr(&out) + cmd2.SetArgs([]string{ + "run", "--resume", runID, + "--project", root, + "--state", db, + "--decision", "approve", + }) + if err := cmd2.Execute(); err != nil { + t.Fatalf("resume: %v\n%s", err, out.String()) + } + if !strings.Contains(out.String(), "Status: succeeded") { + t.Fatalf("expected succeeded:\n%s", out.String()) + } +} + +func TestRun_decisionWithoutResume_exit2(t *testing.T) { + db := filepath.Join(t.TempDir(), "run-decision.db") + root := runPolicyRoot(t) ResetGlobalsForTest() cmd := NewRootCmd() cmd.SetOut(io.Discard) @@ -139,14 +187,91 @@ func TestRun_policyDenial_exit5(t *testing.T) { "--project", root, "--state", db, "--input", "topic=x", + "--decision", "approve", + }) + err := cmd.Execute() + if err == nil { + t.Fatal("expected validation error") + } + if ExitCodeOf(err) != ExitValidationError { + t.Fatalf("exit=%d want %d err=%v", ExitCodeOf(err), ExitValidationError, err) + } +} + +func TestRun_hitlRejectViaResume(t *testing.T) { + db := filepath.Join(t.TempDir(), "run-reject.db") + root := runPolicyRoot(t) + runID := runPolicyInterrupted(t, root, db) + + ResetGlobalsForTest() + var out bytes.Buffer + cmd := NewRootCmd() + cmd.SetOut(&out) + cmd.SetErr(&out) + cmd.SetArgs([]string{ + "run", "--resume", runID, + "--project", root, + "--state", db, + "--decision", "reject", }) err := cmd.Execute() if err == nil { - t.Fatal("expected policy denial") + t.Fatal("expected rejection error") + } + if ExitCodeOf(err) != ExitExecutionError { + t.Fatalf("exit=%d want %d err=%v", ExitCodeOf(err), ExitExecutionError, err) + } +} + +func TestRun_hitlEditViaResume(t *testing.T) { + db := filepath.Join(t.TempDir(), "run-edit.db") + root := runPolicyRoot(t) + runID := runPolicyInterrupted(t, root, db) + + ResetGlobalsForTest() + var out bytes.Buffer + cmd := NewRootCmd() + cmd.SetOut(&out) + cmd.SetErr(&out) + cmd.SetArgs([]string{ + "run", "--resume", runID, + "--project", root, + "--state", db, + "--decision", "edit", + "--decision-edit-json", `{"topic":"edited"}`, + }) + if err := cmd.Execute(); err != nil { + t.Fatalf("resume edit: %v\n%s", err, out.String()) + } + if !strings.Contains(out.String(), "Status: succeeded") { + t.Fatalf("expected succeeded:\n%s", out.String()) + } +} + +func runPolicyInterrupted(t *testing.T, root, db string) string { + t.Helper() + ResetGlobalsForTest() + var out bytes.Buffer + cmd := NewRootCmd() + cmd.SetOut(&out) + cmd.SetErr(&out) + cmd.SetArgs([]string{ + "run", "workflow/gated", + "--project", root, + "--state", db, + "--input", "topic=x", + }) + if err := cmd.Execute(); err != nil { + t.Fatalf("interrupt run: %v\n%s", err, out.String()) } - if ExitCodeOf(err) != ExitPolicyDenied { - t.Fatalf("exit=%d want %d err=%v", ExitCodeOf(err), ExitPolicyDenied, err) + for _, line := range strings.Split(out.String(), "\n") { + line = strings.TrimSpace(line) + if strings.HasPrefix(line, "Run ID:") { + return strings.TrimSpace(strings.TrimPrefix(line, "Run ID:")) + } } + t.Fatal("missing run id") + return "" } func TestRun_withApprove_succeeds(t *testing.T) { diff --git a/internal/cli/testdata/run_policy/policy.yaml b/internal/cli/testdata/run_policy/policy.yaml index 5e6428e..1836598 100644 --- a/internal/cli/testdata/run_policy/policy.yaml +++ b/internal/cli/testdata/run_policy/policy.yaml @@ -6,3 +6,10 @@ spec: approvals: requiredFor: - "tool.helper.echo" + hitl: + descriptionPrefix: "Echo tool requires operator approval" + interruptOn: + helper: + allowedDecisions: [approve, reject, edit] + allowedEditArgs: [topic] + description: "Review echo call (${uses})" diff --git a/internal/engine/checkpoint.go b/internal/engine/checkpoint.go index 537c839..fadbf32 100644 --- a/internal/engine/checkpoint.go +++ b/internal/engine/checkpoint.go @@ -31,6 +31,7 @@ type checkpointPayload struct { Input map[string]any `json:"input"` Steps map[string]StepResult `json:"steps"` TotalCostUSD float64 `json:"totalCostUsd"` + PendingHitl *PendingHitlState `json:"pendingHitl,omitempty"` } func marshalCheckpointPayload(ictx Context, totalCost float64) (string, error) { @@ -39,6 +40,7 @@ func marshalCheckpointPayload(ictx Context, totalCost float64) (string, error) { Input: ictx.Input, Steps: ictx.Steps, TotalCostUSD: totalCost, + PendingHitl: ictx.PendingHitl, } if payload.Input == nil { payload.Input = map[string]any{} @@ -82,7 +84,7 @@ func unmarshalCheckpointPayload(contextJSON string, wf *spec.WorkflowResource, c if payload.TotalCostUSD < 0 { return Context{}, 0, fmt.Errorf("engine: negative totalCostUsd in checkpoint") } - return Context{Input: payload.Input, Steps: payload.Steps}, payload.TotalCostUSD, nil + return Context{Input: payload.Input, Steps: payload.Steps, PendingHitl: payload.PendingHitl}, payload.TotalCostUSD, nil } func validateCheckpointSteps(steps map[string]StepResult, wf *spec.WorkflowResource, completedStepIndex int) error { @@ -137,7 +139,11 @@ func (e *Executor) loadResumeState(ctx context.Context, in RunInput) (Context, f if err != nil { return Context{}, 0, 0, err } - return ictx, totalCost, cp.StepIndex + 1, nil + startIdx := cp.StepIndex + 1 + if ictx.PendingHitl != nil { + startIdx = cp.StepIndex + } + return ictx, totalCost, startIdx, nil } func (e *Executor) interruptRun(ctx context.Context, in RunInput, stepIndex int, stepID string, ictx Context, totalCost float64) error { diff --git a/internal/engine/execution.go b/internal/engine/execution.go index 8165851..1c94f16 100644 --- a/internal/engine/execution.go +++ b/internal/engine/execution.go @@ -38,6 +38,8 @@ type RunInput struct { ApprovedActions []string // Resume loads the latest checkpoint and continues from the next step (issue #105). Resume bool + // Hitl carries operator decisions for approval gates (issue #106). + Hitl HitlRunOptions // InterruptAfterStepIndex, when non-nil, checkpoints and returns [ErrInterrupted] after // completing the step at this index. Used to simulate approval gates until HITL lands. InterruptAfterStepIndex *int @@ -146,9 +148,40 @@ func (e *Executor) Run(ctx context.Context, in RunInput) error { var out map[string]any var stepCost float64 if uses != "" { - var meta tools.ToolCallMeta - out, meta, err = e.runToolStep(ctx, wfPol, in.RunID, step, with, pctx) - stepCost = meta.CostUSD + toolUses := uses + toolWith := with + pending := ictx.PendingHitl + if pending == nil { + interrupted, ierr := e.maybeInterruptForHitl(ctx, in, i, step, with, wfPol, pctx, ictx, totalCost) + if interrupted { + return ierr + } + if in.Hitl.AutoApprove { + gate, gerr := policy.BuildHitlGate(e.Graph, policySpecFromEvaluator(wfPol), policy.ToolCallContext{ + Run: pctx, StepID: step.ID, Uses: uses, With: with, + }) + if gerr != nil { + err = gerr + } else if gate != nil { + e.recordAutoApproveHitl(ctx, in.RunID, step, i, *gate, in.Hitl.Actor) + pctx.ApprovedActions = append(append([]string(nil), pctx.ApprovedActions...), uses) + } + } + } else { + var rerr error + toolUses, toolWith, rerr = e.resolvePendingHitl(ctx, in, step, wfPol, pctx, pending) + if rerr != nil { + err = rerr + } else { + ictx.PendingHitl = nil + pctx.ApprovedActions = append(append([]string(nil), pctx.ApprovedActions...), toolUses) + } + } + if err == nil { + var meta tools.ToolCallMeta + out, meta, err = e.runToolStep(ctx, wfPol, in.RunID, step, with, pctx, toolUses, toolWith) + stepCost = meta.CostUSD + } } else { ar, ok := e.Graph.Agents[agentName] if !ok || ar == nil { diff --git a/internal/engine/hitl.go b/internal/engine/hitl.go new file mode 100644 index 0000000..b8ddf2a --- /dev/null +++ b/internal/engine/hitl.go @@ -0,0 +1,188 @@ +package engine + +import ( + "context" + "fmt" + "strings" + + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/policy" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/spec" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/state" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/trace" +) + +const traceInterruptReasonHITL = "hitl" + +// PendingHitlState is persisted in checkpoint context while awaiting operator input. +type PendingHitlState struct { + StepID string `json:"stepId"` + Uses string `json:"uses"` + With map[string]any `json:"with"` + Review policy.ResolvedHitlReview `json:"review"` +} + +// HitlRunOptions configures human-in-the-loop resolution for a run (issue #106). +type HitlRunOptions struct { + AutoApprove bool + Actor string + Decision *policy.HitlDecisionInput +} + +func (e *Executor) maybeInterruptForHitl( + ctx context.Context, + in RunInput, + stepIndex int, + step spec.WorkflowStep, + with map[string]any, + pol policy.PolicyEvaluator, + pctx policy.RunContext, + ictx Context, + totalCost float64, +) (bool, error) { + if in.Hitl.AutoApprove { + return false, nil + } + polSpec := policySpecFromEvaluator(pol) + gate, err := policy.BuildHitlGate(e.Graph, polSpec, policy.ToolCallContext{ + Run: pctx, StepID: step.ID, Uses: strings.TrimSpace(step.Uses), With: with, + }) + if err != nil { + return false, err + } + if gate == nil { + return false, nil + } + if in.Hitl.Decision != nil && in.Resume { + return false, nil + } + ictx.PendingHitl = &PendingHitlState{ + StepID: step.ID, + Uses: gate.Uses, + With: gate.With, + Review: gate.Review, + } + if err := e.saveCheckpoint(ctx, in.RunID, stepIndex, step.ID, ictx, totalCost, state.CheckpointStatusInterrupted); err != nil { + return false, fmt.Errorf("engine: save hitl checkpoint: %w", err) + } + if err := e.Store.UpdateRunStatus(ctx, in.RunID, state.RunStatusInterrupted); err != nil { + return false, fmt.Errorf("engine: mark run interrupted: %w", err) + } + if e.Trace != nil { + redacted := policy.RedactHitlArgs(gate.With, gate.Review.RedactKeys) + _, _ = e.Trace.Append(ctx, in.RunID, step.ID, trace.EventApprovalRequested, map[string]any{ + "uses": gate.Uses, + "with": redacted, + "description": gate.Review.Description, + "allowedDecisions": gate.Review.AllowedDecisions, + "allowedSwitchTo": gate.Review.SwitchTargets, + "stepIndex": stepIndex, + }) + _, _ = e.Trace.Append(ctx, in.RunID, step.ID, trace.EventRunInterrupted, map[string]any{ + "stepIndex": stepIndex, "stepId": step.ID, "reason": traceInterruptReasonHITL, + }) + } + return true, ErrInterrupted +} + +func (e *Executor) resolvePendingHitl( + ctx context.Context, + in RunInput, + step spec.WorkflowStep, + pol policy.PolicyEvaluator, + pctx policy.RunContext, + pending *PendingHitlState, +) (uses string, with map[string]any, err error) { + if pending == nil { + return strings.TrimSpace(step.Uses), nil, nil + } + actor := strings.TrimSpace(in.Hitl.Actor) + if actor == "" { + actor = policy.DefaultHitlActor + } + var decision policy.HitlDecisionInput + switch { + case in.Hitl.AutoApprove: + decision = policy.HitlDecisionInput{Kind: spec.HitlDecisionApprove, Actor: actor} + case in.Hitl.Decision != nil: + decision = *in.Hitl.Decision + if strings.TrimSpace(decision.Actor) == "" { + decision.Actor = actor + } + default: + return "", nil, fmt.Errorf("engine: run %q awaiting hitl decision; resume with --decision or --auto-approve", in.RunID) + } + gate := policy.HitlGate{Uses: pending.Uses, With: pending.With, Review: pending.Review} + uses, with, err = policy.ApplyHitlDecision(gate, decision) + if err != nil { + if decision.Kind == spec.HitlDecisionReject { + if e.Trace != nil { + _, _ = e.Trace.Append(ctx, in.RunID, step.ID, trace.EventApprovalResolved, map[string]any{ + "decision": spec.HitlDecisionReject, + "actor": decision.Actor, + "uses": pending.Uses, + }) + } + return "", nil, &policy.HitlRejectedError{Actor: decision.Actor, Uses: pending.Uses} + } + return "", nil, err + } + traceData := map[string]any{ + "decision": decision.Kind, + "actor": decision.Actor, + "uses": pending.Uses, + "resolvedUses": uses, + } + if decision.Kind == spec.HitlDecisionEdit { + traceData["argsDiff"] = policy.HitlArgsDiff(pending.With, with) + } + if decision.Kind == spec.HitlDecisionSwitch { + traceData["switchTarget"] = decision.SwitchTarget + } + if e.Trace != nil { + _, _ = e.Trace.Append(ctx, in.RunID, step.ID, trace.EventApprovalResolved, traceData) + } + pctx2 := pctx + pctx2.ApprovedActions = append(append([]string(nil), pctx.ApprovedActions...), uses) + if err := pol.CheckToolCall(ctx, policy.ToolCallContext{ + Run: pctx2, StepID: step.ID, Uses: uses, With: with, + }); err != nil { + return "", nil, err + } + return uses, with, nil +} + +func (e *Executor) recordAutoApproveHitl(ctx context.Context, runID string, step spec.WorkflowStep, stepIndex int, gate policy.HitlGate, actor string) { + if e.Trace == nil { + return + } + if strings.TrimSpace(actor) == "" { + actor = policy.DefaultHitlActor + } + redacted := policy.RedactHitlArgs(gate.With, gate.Review.RedactKeys) + _, _ = e.Trace.Append(ctx, runID, step.ID, trace.EventApprovalRequested, map[string]any{ + "uses": gate.Uses, + "with": redacted, + "description": gate.Review.Description, + "allowedDecisions": gate.Review.AllowedDecisions, + "allowedSwitchTo": gate.Review.SwitchTargets, + "stepIndex": stepIndex, + "auto": true, + }) + _, _ = e.Trace.Append(ctx, runID, step.ID, trace.EventApprovalResolved, map[string]any{ + "decision": spec.HitlDecisionApprove, + "actor": actor, + "uses": gate.Uses, + "resolvedUses": gate.Uses, + "auto": true, + }) +} + +func policySpecFromEvaluator(pol policy.PolicyEvaluator) *spec.PolicySpec { + type specCarrier interface { + PolicySpec() *spec.PolicySpec + } + if c, ok := pol.(specCarrier); ok { + return c.PolicySpec() + } + return nil +} diff --git a/internal/engine/hitl_test.go b/internal/engine/hitl_test.go new file mode 100644 index 0000000..6f206e8 --- /dev/null +++ b/internal/engine/hitl_test.go @@ -0,0 +1,316 @@ +package engine + +import ( + "context" + "encoding/json" + "errors" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/policy" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/spec" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/state" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/state/sqlite" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/tools" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/trace" +) + +func hitlTestGraph() *spec.ProjectGraph { + return &spec.ProjectGraph{ + Tools: map[string]*spec.ToolResource{ + "helper": { + APIVersion: spec.APIVersionV0, + Kind: spec.KindTool, + Metadata: spec.Metadata{Name: "helper"}, + Spec: spec.ToolSpec{ + Type: "native", + Safety: &spec.ToolSafety{SideEffects: spec.BoolPtr(true)}, + }, + }, + }, + Policies: map[string]*spec.PolicyResource{ + "gate": { + Spec: spec.PolicySpec{ + Approvals: &spec.PolicyApprovals{ + RequiredFor: []string{"tool.helper.echo"}, + }, + Hitl: &spec.HitlPolicy{ + InterruptOn: map[string]spec.HitlInterruptValue{ + "helper": { + Enabled: true, + Config: &spec.HitlInterruptConfig{ + AllowedDecisions: []spec.HitlDecisionKind{ + spec.HitlDecisionApprove, + spec.HitlDecisionReject, + spec.HitlDecisionEdit, + spec.HitlDecisionSwitch, + }, + AllowedEditArgs: []string{"topic"}, + SwitchMap: map[string][]string{ + "echo": {"identity"}, + }, + }, + }, + }, + }, + }, + }, + }, + Workflows: map[string]*spec.WorkflowResource{ + "hitl": { + APIVersion: spec.APIVersionV0, + Kind: spec.KindWorkflow, + Metadata: spec.Metadata{Name: "hitl"}, + Spec: spec.WorkflowSpec{ + Policy: "gate", + Steps: []spec.WorkflowStep{{ + ID: "fetch", Uses: "tool.helper.echo", + With: map[string]any{"topic": "hello"}, + }}, + Output: &spec.WorkflowOutput{ + Value: map[string]any{"echo": "${steps.fetch.output.echo}"}, + }, + }, + }, + }, + } +} + +func setupHitlExecutor(t *testing.T) (*Executor, *sqlite.Store, string, time.Time) { + t.Helper() + ctx := context.Background() + st, err := sqlite.Open(ctx, filepath.Join(t.TempDir(), "hitl.db")) + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = st.Close() }) + graph := hitlTestGraph() + runID := "run-hitl" + started := time.Date(2026, 6, 2, 12, 0, 0, 0, time.UTC) + if err := st.StartRun(ctx, state.Run{ + RunID: runID, WorkflowName: "hitl", Env: "local", Status: state.RunStatusRunning, + StartedAt: started, InputJSON: `{}`, WorkflowSpecHash: "test-hash", + }); err != nil { + t.Fatal(err) + } + ex := &Executor{ + Graph: graph, Tools: tools.NewRegistry(graph), Store: st, Trace: trace.NewRecorder(st), + } + return ex, st, runID, started +} + +func TestHitl_interruptThenApprove(t *testing.T) { + ex, _, runID, started := setupHitlExecutor(t) + ctx := context.Background() + + err := ex.Run(ctx, RunInput{ + RunID: runID, WorkflowName: "hitl", Env: "local", StartedAt: started, Input: map[string]any{}, + }) + if !errors.Is(err, ErrInterrupted) { + t.Fatalf("first run: %v", err) + } + run, _ := ex.Store.GetRun(ctx, runID) + if run.Status != state.RunStatusInterrupted { + t.Fatalf("status = %q", run.Status) + } + + err = ex.Run(ctx, RunInput{ + RunID: runID, WorkflowName: "hitl", Env: "local", StartedAt: started, Input: map[string]any{}, + Resume: true, + Hitl: HitlRunOptions{ + Actor: "alice", + Decision: &policy.HitlDecisionInput{ + Kind: spec.HitlDecisionApprove, Actor: "alice", + }, + }, + }) + if err != nil { + t.Fatal(err) + } + run, _ = ex.Store.GetRun(ctx, runID) + if run.Status != state.RunStatusSucceeded { + t.Fatalf("status = %q err=%q", run.Status, run.ErrorText) + } + assertTraceContains(t, ex.Store, runID, trace.EventApprovalRequested, trace.EventApprovalResolved) +} + +func TestHitl_autoApprove(t *testing.T) { + ex, _, runID, started := setupHitlExecutor(t) + ctx := context.Background() + err := ex.Run(ctx, RunInput{ + RunID: runID, WorkflowName: "hitl", Env: "local", StartedAt: started, Input: map[string]any{}, + Hitl: HitlRunOptions{AutoApprove: true, Actor: "ci"}, + }) + if err != nil { + t.Fatal(err) + } + run, _ := ex.Store.GetRun(ctx, runID) + if run.Status != state.RunStatusSucceeded { + t.Fatalf("status = %q", run.Status) + } +} + +func TestHitl_autoApprove_emitsApprovalTrace(t *testing.T) { + ex, _, runID, started := setupHitlExecutor(t) + ctx := context.Background() + if err := ex.Run(ctx, RunInput{ + RunID: runID, WorkflowName: "hitl", Env: "local", StartedAt: started, Input: map[string]any{}, + Hitl: HitlRunOptions{AutoApprove: true, Actor: "ci-bot"}, + }); err != nil { + t.Fatal(err) + } + events, err := trace.NewReader(ex.Store).ListByRunID(ctx, runID) + if err != nil { + t.Fatal(err) + } + var requested, resolved bool + for _, ev := range events { + if ev.Type == trace.EventApprovalRequested { + requested = true + } + if ev.Type == trace.EventApprovalResolved { + resolved = true + } + } + if !requested || !resolved { + t.Fatalf("expected approval trace events, requested=%v resolved=%v", requested, resolved) + } +} + +func TestHitl_rejectFailsStep(t *testing.T) { + ex, _, runID, started := setupHitlExecutor(t) + ctx := context.Background() + _ = ex.Run(ctx, RunInput{ + RunID: runID, WorkflowName: "hitl", Env: "local", StartedAt: started, Input: map[string]any{}, + }) + err := ex.Run(ctx, RunInput{ + RunID: runID, WorkflowName: "hitl", Env: "local", StartedAt: started, Input: map[string]any{}, + Resume: true, + Hitl: HitlRunOptions{ + Decision: &policy.HitlDecisionInput{Kind: spec.HitlDecisionReject, Actor: "bob"}, + }, + }) + if err == nil { + t.Fatal("expected rejection error") + } + if _, ok := policy.AsHitlRejected(err); !ok { + t.Fatalf("expected HitlRejected, got %v", err) + } +} + +func TestHitl_editArgs(t *testing.T) { + ex, _, runID, started := setupHitlExecutor(t) + ctx := context.Background() + _ = ex.Run(ctx, RunInput{ + RunID: runID, WorkflowName: "hitl", Env: "local", StartedAt: started, Input: map[string]any{}, + }) + err := ex.Run(ctx, RunInput{ + RunID: runID, WorkflowName: "hitl", Env: "local", StartedAt: started, Input: map[string]any{}, + Resume: true, + Hitl: HitlRunOptions{ + Decision: &policy.HitlDecisionInput{ + Kind: spec.HitlDecisionEdit, Actor: "alice", + EditedWith: map[string]any{"topic": "edited"}, + }, + }, + }) + if err != nil { + t.Fatal(err) + } + run, _ := ex.Store.GetRun(ctx, runID) + if run.Status != state.RunStatusSucceeded { + t.Fatalf("status = %q", run.Status) + } + var out map[string]any + if err := json.Unmarshal([]byte(run.OutputJSON), &out); err != nil { + t.Fatal(err) + } + if !strings.Contains(run.OutputJSON, "edited") { + t.Fatalf("output = %s", run.OutputJSON) + } +} + +func TestHitl_switchOperation(t *testing.T) { + graph := hitlTestGraph() + graph.Workflows["hitl"].Spec.Output = &spec.WorkflowOutput{ + Value: map[string]any{"value": "${steps.fetch.output.value}"}, + } + ctx := context.Background() + st, err := sqlite.Open(ctx, filepath.Join(t.TempDir(), "hitl-switch.db")) + if err != nil { + t.Fatal(err) + } + t.Cleanup(func() { _ = st.Close() }) + runID := "run-hitl-switch" + started := time.Date(2026, 6, 2, 12, 0, 0, 0, time.UTC) + if err := st.StartRun(ctx, state.Run{ + RunID: runID, WorkflowName: "hitl", Env: "local", Status: state.RunStatusRunning, + StartedAt: started, InputJSON: `{}`, WorkflowSpecHash: "test-hash", + }); err != nil { + t.Fatal(err) + } + ex := &Executor{ + Graph: graph, Tools: tools.NewRegistry(graph), Store: st, Trace: trace.NewRecorder(st), + } + _ = ex.Run(ctx, RunInput{ + RunID: runID, WorkflowName: "hitl", Env: "local", StartedAt: started, Input: map[string]any{}, + }) + err = ex.Run(ctx, RunInput{ + RunID: runID, WorkflowName: "hitl", Env: "local", StartedAt: started, Input: map[string]any{}, + Resume: true, + Hitl: HitlRunOptions{ + Decision: &policy.HitlDecisionInput{ + Kind: spec.HitlDecisionSwitch, Actor: "alice", SwitchTarget: "identity", + }, + }, + }) + if err != nil { + t.Fatal(err) + } + run, _ := ex.Store.GetRun(ctx, runID) + if run.Status != state.RunStatusSucceeded { + t.Fatalf("status = %q err=%q", run.Status, run.ErrorText) + } +} + +func TestHitl_editDeniedArgRejected(t *testing.T) { + ex, _, runID, started := setupHitlExecutor(t) + ctx := context.Background() + _ = ex.Run(ctx, RunInput{ + RunID: runID, WorkflowName: "hitl", Env: "local", StartedAt: started, Input: map[string]any{}, + }) + err := ex.Run(ctx, RunInput{ + RunID: runID, WorkflowName: "hitl", Env: "local", StartedAt: started, Input: map[string]any{}, + Resume: true, + Hitl: HitlRunOptions{ + Decision: &policy.HitlDecisionInput{ + Kind: spec.HitlDecisionEdit, + EditedWith: map[string]any{ + "topic": "ok", "secret": "nope", + }, + }, + }, + }) + if err == nil { + t.Fatal("expected edit validation error") + } +} + +func assertTraceContains(t *testing.T, store state.RuntimeStore, runID string, types ...string) { + t.Helper() + events, err := trace.NewReader(store).ListByRunID(context.Background(), runID) + if err != nil { + t.Fatal(err) + } + found := map[string]bool{} + for _, ev := range events { + found[ev.Type] = true + } + for _, typ := range types { + if !found[typ] { + t.Fatalf("missing trace event %q in %d events", typ, len(events)) + } + } +} diff --git a/internal/engine/interpolation.go b/internal/engine/interpolation.go index c6af68c..1ebf605 100644 --- a/internal/engine/interpolation.go +++ b/internal/engine/interpolation.go @@ -17,8 +17,9 @@ type StepResult struct { // Context holds values for ${input.*} and ${steps.*} interpolation (§13.1). type Context struct { - Input map[string]any - Steps map[string]StepResult + Input map[string]any + Steps map[string]StepResult + PendingHitl *PendingHitlState `json:"pendingHitl,omitempty"` } var tokenRE = regexp.MustCompile(`\$\{([^}]*)\}`) diff --git a/internal/engine/steps.go b/internal/engine/steps.go index f781cdd..5ae2e2a 100644 --- a/internal/engine/steps.go +++ b/internal/engine/steps.go @@ -44,9 +44,16 @@ func parseAgentJSONObject(content string) (map[string]any, error) { return m, nil } -func (e *Executor) runToolStep(ctx context.Context, pol policy.PolicyEvaluator, runID string, step spec.WorkflowStep, with map[string]any, pctx policy.RunContext) (map[string]any, tools.ToolCallMeta, error) { - uses := strings.TrimSpace(step.Uses) - if err := pol.CheckToolCall(ctx, policy.ToolCallContext{Run: pctx, StepID: step.ID, Uses: uses, With: with}); err != nil { +func (e *Executor) runToolStep(ctx context.Context, pol policy.PolicyEvaluator, runID string, step spec.WorkflowStep, with map[string]any, pctx policy.RunContext, usesOverride string, withOverride map[string]any) (map[string]any, tools.ToolCallMeta, error) { + uses := strings.TrimSpace(usesOverride) + if uses == "" { + uses = strings.TrimSpace(step.Uses) + } + withArgs := with + if withOverride != nil { + withArgs = withOverride + } + if err := pol.CheckToolCall(ctx, policy.ToolCallContext{Run: pctx, StepID: step.ID, Uses: uses, With: withArgs}); err != nil { if e.Trace != nil { if d, ok := policy.AsDenied(err); ok { _, _ = e.Trace.Append(ctx, runID, step.ID, trace.EventPolicyDenied, d.TraceData()) @@ -60,7 +67,7 @@ func (e *Executor) runToolStep(ctx context.Context, pol policy.PolicyEvaluator, if e.Tools == nil { return nil, tools.ToolCallMeta{}, fmt.Errorf("engine: nil tool executor") } - resp, err := e.Tools.Call(ctx, tools.ToolCallRequest{Uses: uses, With: with}) + resp, err := e.Tools.Call(ctx, tools.ToolCallRequest{Uses: uses, With: withArgs}) if err != nil { return nil, tools.ToolCallMeta{}, err } diff --git a/internal/policy/doc.go b/internal/policy/doc.go index 17ca584..3c302ff 100644 --- a/internal/policy/doc.go +++ b/internal/policy/doc.go @@ -27,4 +27,10 @@ // [EffectiveToolDecision] uses prefix matching on tool.. for plan risk (conservative: // any listed action under the tool flags the whole Tool). Runtime [approvalRequired] matches // the full uses string exactly. +// +// # Human-in-the-loop (issue #106) +// +// When a tool call would require approval, [BuildHitlGate] and [ResolveHitlReview] supply review +// configuration from Policy.spec.hitl. Operators resolve gates with approve, reject, edit, or switch; +// [ApplyHitlDecision] and [ValidateHitlEdit] enforce per-call edit and switch rules. package policy diff --git a/internal/policy/evaluator.go b/internal/policy/evaluator.go index 2d69b3a..6cfa89e 100644 --- a/internal/policy/evaluator.go +++ b/internal/policy/evaluator.go @@ -33,6 +33,11 @@ func (e *evaluator) spec() *spec.PolicySpec { return e.policy } +// PolicySpec exposes the merged policy spec for HITL review resolution. +func (e *evaluator) PolicySpec() *spec.PolicySpec { + return e.spec() +} + func (e *evaluator) CheckRun(ctx context.Context, run RunContext) error { _ = ctx p := e.spec() diff --git a/internal/policy/hitl.go b/internal/policy/hitl.go new file mode 100644 index 0000000..f4666e0 --- /dev/null +++ b/internal/policy/hitl.go @@ -0,0 +1,189 @@ +package policy + +import ( + "fmt" + "strings" + + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/spec" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/tools" +) + +// ResolvedHitlReview is the merged review configuration for one gated tool call. +type ResolvedHitlReview struct { + Description string + AllowedDecisions []spec.HitlDecisionKind + AllowedEditArgs []string + DeniedEditArgs []string + AllowedEditPaths []string + DeniedEditPaths []string + SwitchTargets []string + RedactKeys []string +} + +// HitlGate describes a tool call that requires human approval before execution. +type HitlGate struct { + Uses string + With map[string]any + Review ResolvedHitlReview +} + +// ResolveHitlReview merges policy-level and per-tool HITL config for a tool call. +func ResolveHitlReview(graph *spec.ProjectGraph, pol *spec.PolicySpec, uses string) (ResolvedHitlReview, error) { + toolName, operation, err := tools.ParseUses(uses) + if err != nil { + return ResolvedHitlReview{}, err + } + var cfg *spec.HitlInterruptConfig + if pol != nil && pol.Hitl != nil { + if iv, ok := pol.Hitl.InterruptOn[toolName]; ok && iv.Enabled { + cfg = iv.Config + } + } + prefix := spec.DefaultHitlDescriptionPrefix + var redact []string + var switchMap map[string][]string + if pol != nil && pol.Hitl != nil { + if p := strings.TrimSpace(pol.Hitl.DescriptionPrefix); p != "" { + prefix = p + } + redact = append(redact, pol.Hitl.RedactKeys...) + switchMap = pol.Hitl.ToolSwitchMap + } + desc := prefix + if cfg != nil && strings.TrimSpace(cfg.Description) != "" { + desc = renderHitlDescription(cfg.Description, uses, toolName, operation) + } else { + desc = fmt.Sprintf("%s: %s", prefix, uses) + } + review := ResolvedHitlReview{ + Description: desc, + AllowedDecisions: defaultHitlDecisions(cfg, switchMap, operation), + RedactKeys: uniqueStrings(append(redact, hitlConfigRedact(cfg)...)), + } + if cfg != nil { + if len(cfg.AllowedDecisions) > 0 { + review.AllowedDecisions = append([]spec.HitlDecisionKind(nil), cfg.AllowedDecisions...) + } + review.AllowedEditArgs = append([]string(nil), cfg.AllowedEditArgs...) + review.DeniedEditArgs = append([]string(nil), cfg.DeniedEditArgs...) + review.AllowedEditPaths = append([]string(nil), cfg.AllowedEditPaths...) + review.DeniedEditPaths = append([]string(nil), cfg.DeniedEditPaths...) + review.RedactKeys = uniqueStrings(append(review.RedactKeys, cfg.RedactKeys...)) + } + review.SwitchTargets = resolveSwitchTargets(cfg, switchMap, operation) + if cfg != nil && len(cfg.AllowedEditTools) > 0 { + review.SwitchTargets = uniqueStrings(append(review.SwitchTargets, cfg.AllowedEditTools...)) + } + return review, nil +} + +func hitlConfigRedact(cfg *spec.HitlInterruptConfig) []string { + if cfg == nil { + return nil + } + return cfg.RedactKeys +} + +func defaultHitlDecisions(cfg *spec.HitlInterruptConfig, policySwitch map[string][]string, operation string) []spec.HitlDecisionKind { + if cfg != nil && len(cfg.AllowedDecisions) > 0 { + return append([]spec.HitlDecisionKind(nil), cfg.AllowedDecisions...) + } + out := []spec.HitlDecisionKind{spec.HitlDecisionApprove, spec.HitlDecisionReject} + if cfg != nil && (len(cfg.AllowedEditArgs) > 0 || len(cfg.AllowedEditPaths) > 0 || len(cfg.DeniedEditArgs) > 0 || len(cfg.DeniedEditPaths) > 0) { + out = append(out, spec.HitlDecisionEdit) + } + if hasSwitchTargets(cfg, policySwitch, operation) { + out = append(out, spec.HitlDecisionSwitch) + } + return out +} + +func hasSwitchTargets(cfg *spec.HitlInterruptConfig, policySwitch map[string][]string, operation string) bool { + if cfg != nil { + if len(cfg.AllowedEditTools) > 0 { + return true + } + if t, ok := cfg.SwitchMap[operation]; ok && len(t) > 0 { + return true + } + } + if t, ok := policySwitch[operation]; ok && len(t) > 0 { + return true + } + return false +} + +func resolveSwitchTargets(cfg *spec.HitlInterruptConfig, policySwitch map[string][]string, operation string) []string { + var out []string + if cfg != nil { + out = append(out, cfg.AllowedEditTools...) + if t, ok := cfg.SwitchMap[operation]; ok { + out = append(out, t...) + } + } + if t, ok := policySwitch[operation]; ok { + out = append(out, t...) + } + return uniqueStrings(out) +} + +func renderHitlDescription(tmpl, uses, toolName, operation string) string { + r := strings.NewReplacer( + "${tool}", toolName, + "${uses}", uses, + "${operation}", operation, + ) + return r.Replace(tmpl) +} + +func uniqueStrings(in []string) []string { + seen := make(map[string]struct{}, len(in)) + var out []string + for _, s := range in { + s = strings.TrimSpace(s) + if s == "" { + continue + } + if _, ok := seen[s]; ok { + continue + } + seen[s] = struct{}{} + out = append(out, s) + } + return out +} + +// ToolCallNeedsHitl reports whether call requires human approval and is not pre-approved. +func ToolCallNeedsHitl(graph *spec.ProjectGraph, pol *spec.PolicySpec, call ToolCallContext) (bool, error) { + if actionApproved(call.Uses, call.Run.ApprovedActions) { + return false, nil + } + ev := NewEvaluator(graph, pol) + if err := ev.CheckToolCall(nil, call); err == nil { + return false, nil + } else if d, ok := AsDenied(err); ok && d.Reason == ReasonApprovalRequired { + return true, nil + } else { + return false, err + } +} + +// BuildHitlGate constructs a [HitlGate] when the call is approval-gated and not pre-approved. +func BuildHitlGate(graph *spec.ProjectGraph, pol *spec.PolicySpec, call ToolCallContext) (*HitlGate, error) { + need, err := ToolCallNeedsHitl(graph, pol, call) + if err != nil { + return nil, err + } + if !need { + return nil, nil + } + review, err := ResolveHitlReview(graph, pol, call.Uses) + if err != nil { + return nil, err + } + with := call.With + if with == nil { + with = map[string]any{} + } + return &HitlGate{Uses: call.Uses, With: with, Review: review}, nil +} diff --git a/internal/policy/hitl_edit.go b/internal/policy/hitl_edit.go new file mode 100644 index 0000000..8b10382 --- /dev/null +++ b/internal/policy/hitl_edit.go @@ -0,0 +1,307 @@ +package policy + +import ( + "encoding/json" + "errors" + "fmt" + "reflect" + "strings" + + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/spec" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/tools" +) + +// DefaultHitlActor is recorded on approval trace events when no actor is supplied. +const DefaultHitlActor = "operator" + +// RedactedSecretPlaceholder masks sensitive values in HITL approval prompts. +const RedactedSecretPlaceholder = "••••••" + +// HitlDecisionInput is an operator resolution submitted at resume or from an interactive prompt. +type HitlDecisionInput struct { + Kind spec.HitlDecisionKind + Actor string + EditedWith map[string]any + SwitchTarget string +} + +// ApplyHitlDecision resolves the effective uses/with for a gated call after operator input. +func ApplyHitlDecision(gate HitlGate, in HitlDecisionInput) (uses string, with map[string]any, err error) { + if !IsDecisionAllowed(in.Kind, gate.Review.AllowedDecisions) { + return "", nil, fmt.Errorf("policy: decision %q is not allowed (allowed: %v)", in.Kind, gate.Review.AllowedDecisions) + } + switch in.Kind { + case spec.HitlDecisionApprove: + return gate.Uses, shallowCopyMap(gate.With), nil + case spec.HitlDecisionReject: + return "", nil, ErrHitlRejected + case spec.HitlDecisionEdit: + if in.EditedWith == nil { + return "", nil, fmt.Errorf("policy: edit decision requires edited arguments") + } + if err := ValidateHitlEdit(gate.With, in.EditedWith, gate.Review); err != nil { + return "", nil, err + } + return gate.Uses, shallowCopyMap(in.EditedWith), nil + case spec.HitlDecisionSwitch: + target := strings.TrimSpace(in.SwitchTarget) + if target == "" { + return "", nil, fmt.Errorf("policy: switch decision requires a target operation") + } + if !switchTargetAllowed(target, gate.Review.SwitchTargets) { + return "", nil, fmt.Errorf("policy: switch target %q is not allowed (allowed: %v)", target, gate.Review.SwitchTargets) + } + newUses, err := switchUses(gate.Uses, target) + if err != nil { + return "", nil, err + } + return newUses, shallowCopyMap(gate.With), nil + default: + return "", nil, fmt.Errorf("policy: unknown hitl decision %q", in.Kind) + } +} + +// ErrHitlRejected indicates the operator rejected a gated tool call. +var ErrHitlRejected = fmt.Errorf("policy: hitl decision rejected") + +// HitlRejectedError wraps rejection with actor attribution for trace events. +type HitlRejectedError struct { + Actor string + Uses string +} + +func (e *HitlRejectedError) Error() string { + if e == nil { + return ErrHitlRejected.Error() + } + return fmt.Sprintf("policy: operator %q rejected tool call %q", e.Actor, e.Uses) +} + +func (e *HitlRejectedError) Unwrap() error { return ErrHitlRejected } + +// AsHitlRejected unwraps a [HitlRejectedError]. +func AsHitlRejected(err error) (*HitlRejectedError, bool) { + var r *HitlRejectedError + if errors.As(err, &r) { + return r, true + } + return nil, false +} + +// IsDecisionAllowed reports whether kind is permitted for a gated call. +func IsDecisionAllowed(kind spec.HitlDecisionKind, allowed []spec.HitlDecisionKind) bool { + for _, a := range allowed { + if a == kind { + return true + } + } + return false +} + +func switchTargetAllowed(target string, allowed []string) bool { + target = strings.TrimSpace(target) + for _, a := range allowed { + if strings.TrimSpace(a) == target { + return true + } + } + return false +} + +func switchUses(sourceUses, targetOperation string) (string, error) { + toolName, _, err := tools.ParseUses(sourceUses) + if err != nil { + return "", err + } + targetOperation = strings.TrimSpace(targetOperation) + if targetOperation == "" { + return "", fmt.Errorf("policy: empty switch target operation") + } + return fmt.Sprintf("tool.%s.%s", toolName, targetOperation), nil +} + +// ValidateHitlEdit ensures edited args respect allow/deny path rules (deny wins). +func ValidateHitlEdit(original, edited map[string]any, review ResolvedHitlReview) error { + if original == nil { + original = map[string]any{} + } + if edited == nil { + return fmt.Errorf("policy: edited args must be a non-nil object") + } + origFlat := flattenArgs(original) + editFlat := flattenArgs(edited) + for path, origVal := range origFlat { + newVal, ok := editFlat[path] + if !ok { + if pathAllowed(path, review.AllowedEditPaths, review.AllowedEditArgs) { + continue + } + return fmt.Errorf("policy: missing required path %q in edited args", path) + } + if pathDenied(path, review.DeniedEditPaths, review.DeniedEditArgs) && !valuesEqual(origVal, newVal) { + return fmt.Errorf("policy: cannot edit denied path %q", path) + } + if !pathAllowed(path, review.AllowedEditPaths, review.AllowedEditArgs) && !valuesEqual(origVal, newVal) { + return fmt.Errorf("policy: cannot change path %q without an allow rule", path) + } + } + for path := range editFlat { + if _, ok := origFlat[path]; ok { + continue + } + if pathDenied(path, review.DeniedEditPaths, review.DeniedEditArgs) { + return fmt.Errorf("policy: cannot edit denied path %q", path) + } + if !pathAllowed(path, review.AllowedEditPaths, review.AllowedEditArgs) { + return fmt.Errorf("policy: cannot add path %q without an allow rule", path) + } + } + return nil +} + +// HitlArgsDiff returns changed paths between original and edited args for audit traces. +func HitlArgsDiff(original, edited map[string]any) map[string]any { + origFlat := flattenArgs(original) + editFlat := flattenArgs(edited) + diff := map[string]any{} + for path, newVal := range editFlat { + oldVal, ok := origFlat[path] + if !ok { + diff[path] = map[string]any{"from": nil, "to": newVal} + continue + } + if !valuesEqual(oldVal, newVal) { + diff[path] = map[string]any{"from": oldVal, "to": newVal} + } + } + for path, oldVal := range origFlat { + if _, ok := editFlat[path]; !ok { + diff[path] = map[string]any{"from": oldVal, "to": nil} + } + } + return diff +} + +func pathAllowed(path string, allowedPaths, allowedArgs []string) bool { + if len(allowedPaths) == 0 && len(allowedArgs) == 0 { + return false + } + for _, a := range allowedArgs { + a = strings.TrimSpace(a) + if a == "*" || a == path || topLevelKey(path) == a { + return true + } + } + for _, p := range allowedPaths { + p = strings.TrimSpace(p) + if p == "*" || p == path || strings.HasPrefix(path, p+".") { + return true + } + } + return false +} + +func pathDenied(path string, deniedPaths, deniedArgs []string) bool { + for _, d := range deniedArgs { + d = strings.TrimSpace(d) + if d == "*" || d == path || topLevelKey(path) == d { + return true + } + } + for _, d := range deniedPaths { + d = strings.TrimSpace(d) + if d == "*" || d == path || strings.HasPrefix(path, d+".") { + return true + } + } + return false +} + +func topLevelKey(path string) string { + if i := strings.IndexByte(path, '.'); i >= 0 { + return path[:i] + } + return path +} + +func flattenArgs(m map[string]any) map[string]any { + out := map[string]any{} + flattenInto("", m, out) + return out +} + +func flattenInto(prefix string, v any, out map[string]any) { + switch val := v.(type) { + case map[string]any: + for k, child := range val { + key := k + if prefix != "" { + key = prefix + "." + k + } + if childMap, ok := child.(map[string]any); ok { + flattenInto(key, childMap, out) + } else { + out[key] = child + } + } + default: + if prefix != "" { + out[prefix] = v + } + } +} + +func valuesEqual(a, b any) bool { + if reflect.DeepEqual(a, b) { + return true + } + ab, errA := json.Marshal(a) + bb, errB := json.Marshal(b) + if errA != nil || errB != nil { + return false + } + return string(ab) == string(bb) +} + +func shallowCopyMap(m map[string]any) map[string]any { + if m == nil { + return map[string]any{} + } + out := make(map[string]any, len(m)) + for k, v := range m { + out[k] = v + } + return out +} + +// RedactHitlArgs returns a copy of args with sensitive keys masked for prompts. +func RedactHitlArgs(args map[string]any, redactKeys []string) map[string]any { + if len(redactKeys) == 0 { + return shallowCopyMap(args) + } + flat := flattenArgs(args) + redacted := shallowCopyMap(args) + for path := range flat { + if pathDenied(path, nil, redactKeys) { + setNestedValue(redacted, path, RedactedSecretPlaceholder) + } + } + return redacted +} + +func setNestedValue(root map[string]any, path string, value any) { + parts := strings.Split(path, ".") + cur := root + for i, p := range parts { + if i == len(parts)-1 { + cur[p] = value + return + } + next, ok := cur[p].(map[string]any) + if !ok { + next = map[string]any{} + cur[p] = next + } + cur = next + } +} diff --git a/internal/policy/hitl_test.go b/internal/policy/hitl_test.go new file mode 100644 index 0000000..125a566 --- /dev/null +++ b/internal/policy/hitl_test.go @@ -0,0 +1,220 @@ +package policy + +import ( + "errors" + "strings" + "testing" + + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/spec" +) + +func TestResolveHitlReview_defaults(t *testing.T) { + t.Helper() + pol := &spec.PolicySpec{ + Hitl: &spec.HitlPolicy{ + InterruptOn: map[string]spec.HitlInterruptValue{ + "helper": {Enabled: true}, + }, + }, + } + review, err := ResolveHitlReview(nil, pol, "tool.helper.echo") + if err != nil { + t.Fatal(err) + } + if len(review.AllowedDecisions) != 2 { + t.Fatalf("decisions = %v", review.AllowedDecisions) + } + if !strings.Contains(review.Description, "tool.helper.echo") { + t.Fatalf("description = %q", review.Description) + } +} + +func TestResolveHitlReview_templateAndSwitch(t *testing.T) { + t.Helper() + pol := &spec.PolicySpec{ + Hitl: &spec.HitlPolicy{ + ToolSwitchMap: map[string][]string{ + "deploy_to_production": {"deploy_to_staging"}, + }, + InterruptOn: map[string]spec.HitlInterruptValue{ + "deploy": { + Enabled: true, + Config: &spec.HitlInterruptConfig{ + Description: "Deploy ${operation} via ${tool}", + AllowedDecisions: []spec.HitlDecisionKind{spec.HitlDecisionApprove, spec.HitlDecisionSwitch}, + SwitchMap: map[string][]string{ + "deploy_to_production": {"canary_deploy"}, + }, + }, + }, + }, + }, + } + review, err := ResolveHitlReview(nil, pol, "tool.deploy.deploy_to_production") + if err != nil { + t.Fatal(err) + } + if review.Description != "Deploy deploy_to_production via deploy" { + t.Fatalf("description = %q", review.Description) + } + targets := map[string]struct{}{} + for _, s := range review.SwitchTargets { + targets[s] = struct{}{} + } + for _, want := range []string{"deploy_to_staging", "canary_deploy"} { + if _, ok := targets[want]; !ok { + t.Fatalf("missing switch target %q in %v", want, review.SwitchTargets) + } + } +} + +func TestValidateHitlEdit_denyWins(t *testing.T) { + t.Helper() + review := ResolvedHitlReview{ + AllowedEditArgs: []string{"*"}, + DeniedEditArgs: []string{"secret"}, + } + orig := map[string]any{"topic": "a", "secret": "x"} + edited := map[string]any{"topic": "b", "secret": "x"} + if err := ValidateHitlEdit(orig, edited, review); err != nil { + t.Fatalf("unchanged secret should pass: %v", err) + } + edited["secret"] = "y" + if err := ValidateHitlEdit(orig, edited, review); err == nil || !strings.Contains(err.Error(), "denied") { + t.Fatalf("expected denied path error, got %v", err) + } +} + +func TestValidateHitlEdit_allowedPathOnly(t *testing.T) { + t.Helper() + review := ResolvedHitlReview{AllowedEditArgs: []string{"topic"}} + orig := map[string]any{"topic": "a", "mode": "prod"} + edited := map[string]any{"topic": "b", "mode": "prod"} + if err := ValidateHitlEdit(orig, edited, review); err != nil { + t.Fatal(err) + } + edited["mode"] = "staging" + if err := ValidateHitlEdit(orig, edited, review); err == nil { + t.Fatal("expected mode change rejection") + } +} + +func TestApplyHitlDecision_branches(t *testing.T) { + t.Helper() + gate := HitlGate{ + Uses: "tool.helper.echo", + With: map[string]any{"topic": "hi"}, + Review: ResolvedHitlReview{ + AllowedDecisions: []spec.HitlDecisionKind{ + spec.HitlDecisionApprove, + spec.HitlDecisionReject, + spec.HitlDecisionEdit, + spec.HitlDecisionSwitch, + }, + AllowedEditArgs: []string{"topic"}, + SwitchTargets: []string{"identity"}, + }, + } + + uses, with, err := ApplyHitlDecision(gate, HitlDecisionInput{Kind: spec.HitlDecisionApprove, Actor: "alice"}) + if err != nil || uses != gate.Uses || with["topic"] != "hi" { + t.Fatalf("approve: uses=%q with=%v err=%v", uses, with, err) + } + + _, _, err = ApplyHitlDecision(gate, HitlDecisionInput{Kind: spec.HitlDecisionReject, Actor: "alice"}) + if !errors.Is(err, ErrHitlRejected) { + t.Fatalf("reject: %v", err) + } + + uses, with, err = ApplyHitlDecision(gate, HitlDecisionInput{ + Kind: spec.HitlDecisionEdit, Actor: "alice", EditedWith: map[string]any{"topic": "bye"}, + }) + if err != nil || with["topic"] != "bye" { + t.Fatalf("edit: %v uses=%q with=%v", err, uses, with) + } + + uses, _, err = ApplyHitlDecision(gate, HitlDecisionInput{ + Kind: spec.HitlDecisionSwitch, Actor: "alice", SwitchTarget: "identity", + }) + if err != nil || uses != "tool.helper.identity" { + t.Fatalf("switch: uses=%q err=%v", uses, err) + } +} + +func TestApplyHitlDecision_disallowedDecision(t *testing.T) { + t.Helper() + gate := HitlGate{ + Uses: "tool.helper.echo", + Review: ResolvedHitlReview{ + AllowedDecisions: []spec.HitlDecisionKind{spec.HitlDecisionApprove}, + }, + } + _, _, err := ApplyHitlDecision(gate, HitlDecisionInput{Kind: spec.HitlDecisionEdit}) + if err == nil || !strings.Contains(err.Error(), "not allowed") { + t.Fatalf("got %v", err) + } +} + +func TestToolCallNeedsHitl_preApprovedSkips(t *testing.T) { + t.Helper() + graph := testGraphForHitl(t) + pol := graph.Policies["gate"].Spec + call := ToolCallContext{ + Run: RunContext{ApprovedActions: []string{"tool.helper.echo"}}, + Uses: "tool.helper.echo", + With: map[string]any{}, + StepID: "s1", + } + need, err := ToolCallNeedsHitl(graph, &pol, call) + if err != nil || need { + t.Fatalf("need=%v err=%v", need, err) + } +} + +func TestToolCallNeedsHitl_requiredWithoutApprove(t *testing.T) { + t.Helper() + graph := testGraphForHitl(t) + pol := graph.Policies["gate"].Spec + call := ToolCallContext{ + Run: RunContext{}, + Uses: "tool.helper.echo", + With: map[string]any{}, + StepID: "s1", + } + need, err := ToolCallNeedsHitl(graph, &pol, call) + if err != nil || !need { + t.Fatalf("need=%v err=%v", need, err) + } +} + +func TestRedactHitlArgs_masksKeys(t *testing.T) { + t.Helper() + args := map[string]any{"topic": "x", "token": "secret"} + out := RedactHitlArgs(args, []string{"token"}) + if out["topic"] != "x" { + t.Fatalf("topic leaked change: %v", out) + } + if out["token"] != RedactedSecretPlaceholder { + t.Fatalf("token not redacted: %v", out["token"]) + } +} + +func testGraphForHitl(t *testing.T) *spec.ProjectGraph { + t.Helper() + return &spec.ProjectGraph{ + Tools: map[string]*spec.ToolResource{ + "helper": { + Spec: spec.ToolSpec{Type: "native", Safety: &spec.ToolSafety{SideEffects: spec.BoolPtr(true)}}, + }, + }, + Policies: map[string]*spec.PolicyResource{ + "gate": { + Spec: spec.PolicySpec{ + Approvals: &spec.PolicyApprovals{ + RequiredFor: []string{"tool.helper.echo"}, + }, + }, + }, + }, + } +} diff --git a/internal/runtime/local/hitl.go b/internal/runtime/local/hitl.go new file mode 100644 index 0000000..fe09e2c --- /dev/null +++ b/internal/runtime/local/hitl.go @@ -0,0 +1,30 @@ +package local + +import ( + "fmt" + + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/engine" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/policy" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/runtime" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/spec" +) + +func buildEngineHitlOptions(opts runtime.WorkflowRunOptions) (engine.HitlRunOptions, error) { + out := engine.HitlRunOptions{ + AutoApprove: opts.AutoApprove, + Actor: opts.HitlActor, + } + if opts.HitlDecision == nil { + return out, nil + } + if !spec.IsValidHitlDecisionKind(opts.HitlDecision.Kind) { + return out, fmt.Errorf("local: invalid hitl decision kind %q", opts.HitlDecision.Kind) + } + out.Decision = &policy.HitlDecisionInput{ + Kind: opts.HitlDecision.Kind, + Actor: opts.HitlActor, + EditedWith: opts.HitlDecision.EditedWith, + SwitchTarget: opts.HitlDecision.SwitchTarget, + } + return out, nil +} diff --git a/internal/runtime/local/hitl_test.go b/internal/runtime/local/hitl_test.go new file mode 100644 index 0000000..9bb5e12 --- /dev/null +++ b/internal/runtime/local/hitl_test.go @@ -0,0 +1,31 @@ +package local + +import ( + "testing" + + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/runtime" + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/spec" +) + +func TestBuildEngineHitlOptions_invalidKind(t *testing.T) { + t.Helper() + _, err := buildEngineHitlOptions(runtime.WorkflowRunOptions{ + HitlDecision: &runtime.HitlDecisionOptions{Kind: spec.HitlDecisionKind("maybe")}, + }) + if err == nil { + t.Fatal("expected error for invalid decision kind") + } +} + +func TestBuildEngineHitlOptions_validDecision(t *testing.T) { + t.Helper() + out, err := buildEngineHitlOptions(runtime.WorkflowRunOptions{ + HitlDecision: &runtime.HitlDecisionOptions{Kind: spec.HitlDecisionApprove}, + }) + if err != nil { + t.Fatal(err) + } + if out.Decision == nil || out.Decision.Kind != spec.HitlDecisionApprove { + t.Fatalf("decision = %+v", out.Decision) + } +} diff --git a/internal/runtime/local/runner.go b/internal/runtime/local/runner.go index 2dfe381..ceb0aaa 100644 --- a/internal/runtime/local/runner.go +++ b/internal/runtime/local/runner.go @@ -104,7 +104,9 @@ func (r *Runtime) startWorkflow(ctx context.Context, opts runtime.WorkflowRunOpt return runID, fmt.Errorf("local: trace run.started: %w", err) } - return r.executeEngine(ctx, prep, runID, wfName, envLabel, started, input, opts.ApprovedActions, false, rec) + opts.RunID = runID + opts.Resume = false + return r.executeEngine(ctx, prep, runID, wfName, envLabel, started, input, opts, rec) } func (r *Runtime) resumeWorkflow(ctx context.Context, opts runtime.WorkflowRunOptions) (string, error) { @@ -179,7 +181,8 @@ func (r *Runtime) resumeWorkflow(ctx context.Context, opts runtime.WorkflowRunOp envLabel = "local" } - return r.executeEngine(ctx, prep, runID, wfName, envLabel, run.StartedAt, input, opts.ApprovedActions, true, rec) + opts.Resume = true + return r.executeEngine(ctx, prep, runID, wfName, envLabel, run.StartedAt, input, opts, rec) } func (r *Runtime) executeEngine( @@ -188,8 +191,7 @@ func (r *Runtime) executeEngine( runID, wfName, envLabel string, started time.Time, input map[string]any, - approved []string, - resume bool, + opts runtime.WorkflowRunOptions, rec *trace.Recorder, ) (string, error) { ex := &engine.Executor{ @@ -201,14 +203,19 @@ func (r *Runtime) executeEngine( Trace: rec, Now: r.Now, } + hitl, err := buildEngineHitlOptions(opts) + if err != nil { + return runID, err + } runErr := ex.Run(ctx, engine.RunInput{ RunID: runID, WorkflowName: wfName, Env: envLabel, StartedAt: started, Input: input, - ApprovedActions: approved, - Resume: resume, + ApprovedActions: opts.ApprovedActions, + Resume: opts.Resume, + Hitl: hitl, }) finData := map[string]any{} diff --git a/internal/runtime/runtime.go b/internal/runtime/runtime.go index 7b45858..c87588b 100644 --- a/internal/runtime/runtime.go +++ b/internal/runtime/runtime.go @@ -1,6 +1,10 @@ package runtime -import "context" +import ( + "context" + + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/spec" +) // WorkflowRunOptions configures a single workflow execution for [WorkflowRunner]. type WorkflowRunOptions struct { @@ -19,6 +23,19 @@ type WorkflowRunOptions struct { // Resume continues an existing run from its latest checkpoint (issue #105). // RunID must be set; InputJSON and WorkflowName are loaded from the persisted run. Resume bool + // AutoApprove skips interactive HITL prompts and approves gated tool calls (issue #106). + AutoApprove bool + // HitlActor attributes approval decisions in trace events (default: operator or $USER). + HitlActor string + // HitlDecision supplies an explicit decision when resuming an interrupted run (issue #106). + HitlDecision *HitlDecisionOptions +} + +// HitlDecisionOptions configures a non-interactive HITL resolution on resume. +type HitlDecisionOptions struct { + Kind spec.HitlDecisionKind + EditedWith map[string]any + SwitchTarget string } // WorkflowRunner loads declarative state and executes a workflow locally (design doc section 16 MVP). diff --git a/internal/spec/hitl.go b/internal/spec/hitl.go new file mode 100644 index 0000000..ff75cb0 --- /dev/null +++ b/internal/spec/hitl.go @@ -0,0 +1,126 @@ +package spec + +import ( + "fmt" + "strings" + + "gopkg.in/yaml.v3" +) + +// HitlDecisionKind names an operator resolution at an approval gate (issue #106). +type HitlDecisionKind string + +const ( + HitlDecisionApprove HitlDecisionKind = "approve" + HitlDecisionReject HitlDecisionKind = "reject" + HitlDecisionEdit HitlDecisionKind = "edit" + HitlDecisionSwitch HitlDecisionKind = "switch" +) + +// AllHitlDecisionKinds lists every supported decision in stable order. +var AllHitlDecisionKinds = []HitlDecisionKind{ + HitlDecisionApprove, + HitlDecisionReject, + HitlDecisionEdit, + HitlDecisionSwitch, +} + +// DefaultHitlDescriptionPrefix is shown before per-call review text when policy omits descriptionPrefix. +const DefaultHitlDescriptionPrefix = "Tool execution requires approval" + +// HitlPolicy configures human-in-the-loop approval gates (issue #106). +// +// interruptOn keys are Tool metadata.name values. They do not independently gate tools; +// they supply per-tool review configuration (allowed decisions, edit rules, switchMap) +// when a call already requires approval via approvals.requiredFor or safety metadata. +type HitlPolicy struct { + // InterruptOn maps Tool metadata.name to true (defaults) or per-tool review config. + // Does not gate tools by itself; see [HitlPolicy] package comment. + InterruptOn map[string]HitlInterruptValue `yaml:"interruptOn,omitempty" json:"interruptOn,omitempty"` + // DescriptionPrefix prefixes every approval prompt (default [DefaultHitlDescriptionPrefix]). + DescriptionPrefix string `yaml:"descriptionPrefix,omitempty" json:"descriptionPrefix,omitempty"` + // ToolSwitchMap maps source operation to allowed target operations for switch decisions. + ToolSwitchMap map[string][]string `yaml:"toolSwitchMap,omitempty" json:"toolSwitchMap,omitempty"` + // RedactKeys masks top-level arg keys in approval prompts (merged with per-call redactKeys). + RedactKeys []string `yaml:"redactKeys,omitempty" json:"redactKeys,omitempty"` +} + +// HitlInterruptConfig is per-tool review configuration at an approval gate. +type HitlInterruptConfig struct { + AllowedDecisions []HitlDecisionKind `yaml:"allowedDecisions,omitempty" json:"allowedDecisions,omitempty"` + Description string `yaml:"description,omitempty" json:"description,omitempty"` + AllowedEditArgs []string `yaml:"allowedEditArgs,omitempty" json:"allowedEditArgs,omitempty"` + DeniedEditArgs []string `yaml:"deniedEditArgs,omitempty" json:"deniedEditArgs,omitempty"` + AllowedEditPaths []string `yaml:"allowedEditPaths,omitempty" json:"allowedEditPaths,omitempty"` + DeniedEditPaths []string `yaml:"deniedEditPaths,omitempty" json:"deniedEditPaths,omitempty"` + AllowedEditTools []string `yaml:"allowedEditTools,omitempty" json:"allowedEditTools,omitempty"` + SwitchMap map[string][]string `yaml:"switchMap,omitempty" json:"switchMap,omitempty"` + RedactKeys []string `yaml:"redactKeys,omitempty" json:"redactKeys,omitempty"` +} + +// HitlInterruptValue is either enabled-with-defaults (true) or an explicit [HitlInterruptConfig]. +type HitlInterruptValue struct { + Enabled bool + Config *HitlInterruptConfig +} + +// UnmarshalYAML accepts `true` or a mapping for interruptOn entries. +func (v *HitlInterruptValue) UnmarshalYAML(value *yaml.Node) error { + if value == nil { + return nil + } + switch value.Kind { + case yaml.ScalarNode: + var b bool + if err := value.Decode(&b); err != nil { + return fmt.Errorf("spec: interruptOn entry must be true or a config object: %w", err) + } + if !b { + return fmt.Errorf("spec: interruptOn entry must be true or a config object, not false") + } + v.Enabled = true + return nil + case yaml.MappingNode: + var cfg HitlInterruptConfig + if err := value.Decode(&cfg); err != nil { + return fmt.Errorf("spec: interruptOn config: %w", err) + } + v.Enabled = true + v.Config = &cfg + return nil + default: + return fmt.Errorf("spec: interruptOn entry must be true or a config object") + } +} + +// MarshalYAML encodes as true or the config object. +func (v HitlInterruptValue) MarshalYAML() (any, error) { + if v.Config != nil { + return v.Config, nil + } + if v.Enabled { + return true, nil + } + return nil, nil +} + +// ParseHitlDecisionKind normalizes a CLI decision string. +func ParseHitlDecisionKind(s string) (HitlDecisionKind, error) { + k := HitlDecisionKind(strings.ToLower(strings.TrimSpace(s))) + switch k { + case HitlDecisionApprove, HitlDecisionReject, HitlDecisionEdit, HitlDecisionSwitch: + return k, nil + default: + return "", fmt.Errorf("spec: unknown hitl decision %q (want approve, reject, edit, or switch)", s) + } +} + +// IsValidHitlDecisionKind reports whether k is a known decision kind. +func IsValidHitlDecisionKind(k HitlDecisionKind) bool { + for _, known := range AllHitlDecisionKinds { + if k == known { + return true + } + } + return false +} diff --git a/internal/spec/hitl_test.go b/internal/spec/hitl_test.go new file mode 100644 index 0000000..0b6c51e --- /dev/null +++ b/internal/spec/hitl_test.go @@ -0,0 +1,151 @@ +package spec_test + +import ( + "strings" + "testing" + + "github.com/LAA-Software-Engineering/agentic-control-plane/internal/spec" + "gopkg.in/yaml.v3" +) + +func TestHitlInterruptValue_unmarshalTrue(t *testing.T) { + t.Helper() + var m map[string]spec.HitlInterruptValue + if err := yaml.Unmarshal([]byte("helper: true"), &m); err != nil { + t.Fatal(err) + } + v, ok := m["helper"] + if !ok || !v.Enabled || v.Config != nil { + t.Fatalf("got %+v", v) + } +} + +func TestHitlInterruptValue_unmarshalConfig(t *testing.T) { + t.Helper() + raw := ` +helper: + allowedDecisions: [approve, reject, edit] + deniedEditArgs: [secret] +` + var m map[string]spec.HitlInterruptValue + if err := yaml.Unmarshal([]byte(raw), &m); err != nil { + t.Fatal(err) + } + v := m["helper"] + if v.Config == nil { + t.Fatal("expected config") + } + if len(v.Config.AllowedDecisions) != 3 { + t.Fatalf("decisions: %v", v.Config.AllowedDecisions) + } +} + +func TestHitlInterruptValue_unmarshalFalseRejected(t *testing.T) { + t.Helper() + var m map[string]spec.HitlInterruptValue + err := yaml.Unmarshal([]byte("helper: false"), &m) + if err == nil || !strings.Contains(err.Error(), "false") { + t.Fatalf("expected false rejection, got %v", err) + } +} + +func TestParseHitlDecisionKind(t *testing.T) { + t.Helper() + for _, tc := range []struct { + in string + want spec.HitlDecisionKind + ok bool + }{ + {"approve", spec.HitlDecisionApprove, true}, + {"REJECT", spec.HitlDecisionReject, true}, + {"edit", spec.HitlDecisionEdit, true}, + {"switch", spec.HitlDecisionSwitch, true}, + {"nope", "", false}, + } { + got, err := spec.ParseHitlDecisionKind(tc.in) + if tc.ok && err != nil { + t.Fatalf("%q: %v", tc.in, err) + } + if !tc.ok && err == nil { + t.Fatalf("%q: expected error", tc.in) + } + if tc.ok && got != tc.want { + t.Fatalf("%q: got %q want %q", tc.in, got, tc.want) + } + } +} + +func TestValidatePolicySpecs_hitlUnknownTool(t *testing.T) { + t.Helper() + g := &spec.ProjectGraph{ + Tools: map[string]*spec.ToolResource{}, + Policies: map[string]*spec.PolicyResource{ + "bad": { + Spec: spec.PolicySpec{ + Hitl: &spec.HitlPolicy{ + InterruptOn: map[string]spec.HitlInterruptValue{ + "missing": {Enabled: true}, + }, + }, + }, + }, + }, + } + err := spec.ValidateProjectGraph(g, t.TempDir()) + if err == nil || !strings.Contains(err.Error(), "no Tool/missing") { + t.Fatalf("expected unknown tool error, got %v", err) + } +} + +func TestValidatePolicySpecs_hitlOverlap(t *testing.T) { + t.Helper() + g := &spec.ProjectGraph{ + Policies: map[string]*spec.PolicyResource{ + "bad": { + Spec: spec.PolicySpec{ + Hitl: &spec.HitlPolicy{ + InterruptOn: map[string]spec.HitlInterruptValue{ + "helper": { + Enabled: true, + Config: &spec.HitlInterruptConfig{ + AllowedEditArgs: []string{"x", "y"}, + DeniedEditArgs: []string{"y"}, + }, + }, + }, + }, + }, + }, + }, + } + err := spec.ValidateProjectGraph(g, t.TempDir()) + if err == nil || !strings.Contains(err.Error(), "overlap") { + t.Fatalf("expected overlap error, got %v", err) + } +} + +func TestValidatePolicySpecs_hitlInvalidDecision(t *testing.T) { + t.Helper() + g := &spec.ProjectGraph{ + Policies: map[string]*spec.PolicyResource{ + "bad": { + Spec: spec.PolicySpec{ + Hitl: &spec.HitlPolicy{ + InterruptOn: map[string]spec.HitlInterruptValue{ + "helper": { + Enabled: true, + Config: &spec.HitlInterruptConfig{ + AllowedDecisions: []spec.HitlDecisionKind{"maybe"}, + }, + }, + }, + }, + }, + }, + }, + } + err := spec.ValidateProjectGraph(g, t.TempDir()) + if err == nil || !strings.Contains(err.Error(), "unknown decision") { + t.Fatalf("expected unknown decision error, got %v", err) + } +} diff --git a/internal/spec/kinds.go b/internal/spec/kinds.go index 9c72461..526188f 100644 --- a/internal/spec/kinds.go +++ b/internal/spec/kinds.go @@ -167,7 +167,9 @@ type PolicySpec struct { Execution *PolicyExecution `yaml:"execution,omitempty" json:"execution,omitempty"` Tools *PolicyTools `yaml:"tools,omitempty" json:"tools,omitempty"` Approvals *PolicyApprovals `yaml:"approvals,omitempty" json:"approvals,omitempty"` - Security *PolicySecurity `yaml:"security,omitempty" json:"security,omitempty"` + // Hitl configures human-in-the-loop approval gates for gated tool calls (issue #106). + Hitl *HitlPolicy `yaml:"hitl,omitempty" json:"hitl,omitempty"` + Security *PolicySecurity `yaml:"security,omitempty" json:"security,omitempty"` } type PolicyExecution struct { diff --git a/internal/spec/validator.go b/internal/spec/validator.go index c794140..acc272d 100644 --- a/internal/spec/validator.go +++ b/internal/spec/validator.go @@ -253,10 +253,118 @@ func validatePolicySpecs(g *ProjectGraph) []error { seen[a] = struct{}{} } } + errs = append(errs, validateHitlPolicy(name, pr.Spec.Hitl, g)...) } return errs } +func validateHitlPolicy(policyName string, hitl *HitlPolicy, g *ProjectGraph) []error { + if hitl == nil { + return nil + } + var errs []error + prefix := fmt.Sprintf("Policy/%s: hitl", policyName) + for toolName, iv := range hitl.InterruptOn { + tn := strings.TrimSpace(toolName) + if tn == "" { + errs = append(errs, fmt.Errorf("%s.interruptOn contains empty tool name", prefix)) + continue + } + if g != nil && g.Tools != nil { + if _, ok := g.Tools[tn]; !ok { + errs = append(errs, fmt.Errorf("%s.interruptOn[%q]: no Tool/%s in project (interruptOn keys must match Tool metadata.name)", prefix, toolName, tn)) + } + } + if !iv.Enabled { + errs = append(errs, fmt.Errorf("%s.interruptOn[%q] must be true or a config object", prefix, toolName)) + continue + } + if iv.Config != nil { + errs = append(errs, validateHitlInterruptConfig(prefix+".interruptOn["+toolName+"]", iv.Config, g)...) + } + } + if hitl.ToolSwitchMap != nil { + for from, targets := range hitl.ToolSwitchMap { + if strings.TrimSpace(from) == "" { + errs = append(errs, fmt.Errorf("%s.toolSwitchMap contains empty source key", prefix)) + } + for i, tgt := range targets { + if strings.TrimSpace(tgt) == "" { + errs = append(errs, fmt.Errorf("%s.toolSwitchMap[%q][%d] must be non-empty", prefix, from, i)) + } + } + } + } + return errs +} + +func validateHitlInterruptConfig(prefix string, cfg *HitlInterruptConfig, g *ProjectGraph) []error { + if cfg == nil { + return nil + } + var errs []error + seenDecisions := make(map[HitlDecisionKind]struct{}) + for i, d := range cfg.AllowedDecisions { + if !IsValidHitlDecisionKind(d) { + errs = append(errs, fmt.Errorf("%s.allowedDecisions[%d]: unknown decision %q", prefix, i, d)) + continue + } + if _, dup := seenDecisions[d]; dup { + errs = append(errs, fmt.Errorf("%s.allowedDecisions: duplicate %q", prefix, d)) + } + seenDecisions[d] = struct{}{} + } + for i, tn := range cfg.AllowedEditTools { + tn = strings.TrimSpace(tn) + if tn == "" { + errs = append(errs, fmt.Errorf("%s.allowedEditTools[%d] must be non-empty", prefix, i)) + continue + } + if g != nil && g.Tools != nil { + if _, ok := g.Tools[tn]; !ok { + errs = append(errs, fmt.Errorf("%s.allowedEditTools[%q]: no Tool/%s in project", prefix, tn, tn)) + } + } + } + if overlap := intersectStringSets(cfg.AllowedEditArgs, cfg.DeniedEditArgs); len(overlap) > 0 { + errs = append(errs, fmt.Errorf("%s: allowedEditArgs and deniedEditArgs overlap: %v", prefix, overlap)) + } + if overlap := intersectStringSets(cfg.AllowedEditPaths, cfg.DeniedEditPaths); len(overlap) > 0 { + errs = append(errs, fmt.Errorf("%s: allowedEditPaths and deniedEditPaths overlap: %v", prefix, overlap)) + } + if cfg.SwitchMap != nil { + for from, targets := range cfg.SwitchMap { + if strings.TrimSpace(from) == "" { + errs = append(errs, fmt.Errorf("%s.switchMap contains empty source key", prefix)) + } + for i, tgt := range targets { + if strings.TrimSpace(tgt) == "" { + errs = append(errs, fmt.Errorf("%s.switchMap[%q][%d] must be non-empty", prefix, from, i)) + } + } + } + } + return errs +} + +func intersectStringSets(a, b []string) []string { + if len(a) == 0 || len(b) == 0 { + return nil + } + setB := make(map[string]struct{}, len(b)) + for _, s := range b { + setB[strings.TrimSpace(s)] = struct{}{} + } + var out []string + for _, s := range a { + s = strings.TrimSpace(s) + if _, ok := setB[s]; ok { + out = append(out, s) + } + } + return out +} + func validatePolicyPresets(g *ProjectGraph) []error { var errs []error if g.Spec.Defaults != nil { diff --git a/internal/trace/events.go b/internal/trace/events.go index 4bc31b8..44bdb60 100644 --- a/internal/trace/events.go +++ b/internal/trace/events.go @@ -7,16 +7,18 @@ type Event = state.TraceEvent // Event type names from design doc §12.2 I (Trace recorder). const ( - EventRunStarted = "run.started" - EventRunFinished = "run.finished" - EventRunInterrupted = "run.interrupted" - EventRunResumed = "run.resumed" - EventStepStarted = "step.started" - EventStepFinished = "step.finished" - EventStepFailed = "step.failed" - EventToolCalled = "tool.called" - EventToolCompleted = "tool.completed" - EventModelCalled = "model.called" - EventModelCompleted = "model.completed" - EventPolicyDenied = "policy.denied" + EventRunStarted = "run.started" + EventRunFinished = "run.finished" + EventRunInterrupted = "run.interrupted" + EventRunResumed = "run.resumed" + EventStepStarted = "step.started" + EventStepFinished = "step.finished" + EventStepFailed = "step.failed" + EventToolCalled = "tool.called" + EventToolCompleted = "tool.completed" + EventModelCalled = "model.called" + EventModelCompleted = "model.completed" + EventPolicyDenied = "policy.denied" + EventApprovalRequested = "approval.requested" + EventApprovalResolved = "approval.resolved" ) diff --git a/test/integration/cli_flow_test.go b/test/integration/cli_flow_test.go index c6f7cda..dd8f672 100644 --- a/test/integration/cli_flow_test.go +++ b/test/integration/cli_flow_test.go @@ -138,24 +138,35 @@ func TestCLI_ExampleMVPFlow(t *testing.T) { } }) - t.Run("policy_denial_exit5", func(t *testing.T) { + t.Run("hitl_interrupt_awaiting_decision", func(t *testing.T) { fixture := filepath.Join(repoRoot(t), "internal", "cli", "testdata", "run_policy") if _, err := os.Stat(filepath.Join(fixture, "project.yaml")); err != nil { t.Fatalf("fixture: %v", err) } db := filepath.Join(t.TempDir(), "policy-denial.db") - _, err := runCLI(t, + out, err := runCLI(t, "run", "workflow/gated", "--project", fixture, "--state", db, "--input", "topic=x", ) - if err == nil { - t.Fatal("expected policy denial error") + if err != nil { + t.Fatalf("run: %v\n%s", err, out) + } + if !strings.Contains(out, "Status: interrupted") { + t.Fatalf("expected interrupted status:\n%s", out) } - if cli.ExitCodeOf(err) != cli.ExitPolicyDenied { - t.Fatalf("exit=%d want %d err=%v", cli.ExitCodeOf(err), cli.ExitPolicyDenied, err) + runID := extractRunID(out) + if runID == "" { + t.Fatal("missing run id") + } + out, err = runCLI(t, "logs", "--project", fixture, "--state", db, "--run", runID) + if err != nil { + t.Fatal(err) + } + if !strings.Contains(out, trace.EventApprovalRequested) { + t.Fatalf("logs missing approval request:\n%s", out) } }) @@ -192,14 +203,14 @@ func TestCLI_ExampleMVPFlow(t *testing.T) { "--state", db, "--input-file", input, ) - if err == nil { - t.Fatalf("expected policy denial, output:\n%s", out) + if err != nil { + t.Fatalf("run: %v\n%s", err, out) } - if cli.ExitCodeOf(err) != cli.ExitPolicyDenied { - t.Fatalf("exit=%d want %d err=%v\n%s", cli.ExitCodeOf(err), cli.ExitPolicyDenied, err, out) + if !strings.Contains(out, "Status: interrupted") { + t.Fatalf("expected interrupted run, output:\n%s", out) } - if !strings.Contains(out, "Policy blocked this run") || !strings.Contains(out, "tool.github.pull_request.post_comment") { - t.Fatalf("expected policy UX in run output:\n%s", out) + if !strings.Contains(out, "Run ID:") { + t.Fatalf("expected run id in output:\n%s", out) } runID := extractRunID(out) if runID == "" { @@ -210,11 +221,11 @@ func TestCLI_ExampleMVPFlow(t *testing.T) { if err != nil { t.Fatalf("logs: %v\n%s", err, out) } - if !strings.Contains(out, trace.EventPolicyDenied) { - t.Fatalf("logs missing %q:\n%s", trace.EventPolicyDenied, out) + if !strings.Contains(out, trace.EventApprovalRequested) { + t.Fatalf("logs missing %q:\n%s", trace.EventApprovalRequested, out) } if !strings.Contains(out, "post_comment") { - t.Fatalf("logs should mention blocked step post_comment:\n%s", out) + t.Fatalf("logs should mention gated step post_comment:\n%s", out) } }) @@ -321,14 +332,11 @@ func TestCLI_PrReviewGithubExample(t *testing.T) { "--state", db, "--input-file", input, ) - if err == nil { - t.Fatalf("expected policy denial, output:\n%s", out) - } - if cli.ExitCodeOf(err) != cli.ExitPolicyDenied { - t.Fatalf("exit=%d want %d err=%v\n%s", cli.ExitCodeOf(err), cli.ExitPolicyDenied, err, out) + if err != nil { + t.Fatalf("run: %v\n%s", err, out) } - if !strings.Contains(out, "tool.github.pull_request.post_comment") { - t.Fatalf("expected gated uses in output:\n%s", out) + if !strings.Contains(out, "Status: interrupted") { + t.Fatalf("expected interrupted run, output:\n%s", out) } runID := extractRunID(out) if runID == "" { @@ -339,8 +347,8 @@ func TestCLI_PrReviewGithubExample(t *testing.T) { if err != nil { t.Fatalf("logs: %v\n%s", err, out) } - if !strings.Contains(out, trace.EventPolicyDenied) { - t.Fatalf("logs missing %q:\n%s", trace.EventPolicyDenied, out) + if !strings.Contains(out, trace.EventApprovalRequested) { + t.Fatalf("logs missing %q:\n%s", trace.EventApprovalRequested, out) } }