Skip to content

Commit ebbe84b

Browse files
leo-aa88cursoragent
andcommitted
fix(hitl): address PR #128 review blockers
Propagate invalid decision kinds from buildEngineHitlOptions, reject --decision without --resume, and error when interrupted checkpoints lack pendingHitl. Auto-approve now emits approval trace events; document interruptOn semantics; validate interruptOn tool refs at load time. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 8a15888 commit ebbe84b

19 files changed

Lines changed: 359 additions & 107 deletions

File tree

README.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ The full product vision, YAML spec v0, and architecture are documented in [**`do
4949
- **`agentctl validate`** — load project, apply **project defaults** (`spec.defaults`), then **environment overlays** (`-e` / `--env`, `Environment` resources §7.6), then validate graph, schemas, and references
5050
- **`agentctl plan`** — diff desired graph vs SQLite **deployment** state; risk hints; JSON/YAML output includes a **`deploymentBaseline`** digest for the store snapshot
5151
- **`agentctl apply`** — persist plan (TTY confirm or `--auto-approve` / `AGENTCTL_AUTO_APPROVE`); **optimistic concurrency** — if the deployment store changed after the plan snapshot (e.g. another process applied the same `--state` file while this run waited at the prompt), apply fails with **exit code 3**; re-run **plan** then **apply**
52-
- **`agentctl run`** — execute a workflow locally; JSON Schema for inputs where configured; policy gates
52+
- **`agentctl run`** — execute a workflow locally; JSON Schema for inputs where configured; policy gates pause for **human-in-the-loop (HITL)** approval when a tool call requires it
5353
- **`agentctl logs`** — read **trace events** from SQLite (`--run`, `--workflow`, or recent runs)
5454
- **Tools****`native`**, **`http`**, **`mock`**, and **`mcp`** — MCP supports **stdio** (subprocess) or **streamable HTTP** (`spec.mcp.transport: http`, `url`, optional `headers` with `env:` tokens)
5555
- **Project defaults** — besides **`model`** and **`policy`**, optional **`runtime`** flows to **`spec.runtime`** on agents/workflows when omitted (MVP: **`local`** or unset; see spec validation)
@@ -142,6 +142,8 @@ Notes:
142142

143143
- **`init`** creates `my-agent-system/` with `apiVersion: agentic.dev/v0` resources and a **`hello`** workflow (native `echo` tool only — **no network**).
144144
- **`apply`** in non-interactive environments needs **`--auto-approve`** or **`AGENTCTL_AUTO_APPROVE=1`**.
145+
- **`run`** HITL: gated tool calls exit with **`Status: interrupted`** (exit **0**). Resume with **`--resume <run-id> --decision approve|reject|edit|switch`** (use **`--decision-edit-json`** / **`--decision-switch-target`** when needed), or skip prompts with **`--auto-approve`** / **`AGENTCTL_AUTO_APPROVE=1`**. Pre-approve a specific call with repeated **`--approve <uses>`**. Set **`AGENTCTL_HITL_ACTOR`** to attribute decisions in trace logs.
146+
- **`Policy.spec.hitl.interruptOn`** keys are **Tool metadata.name** values; they configure review options (edit rules, switch targets) for calls already gated by **`approvals.requiredFor`** or safety metadata — they do not gate tools on their own.
145147
- **`run`** stores traces in the **same** SQLite file used for plan/apply (default **`.agentic/state.db`** under `--project`).
146148
- If **`spec.traces.retentionDays`** is a positive integer, runs older than that many **UTC calendar days** (by `runs.started_at`) are deleted lazily on **`run`** and **`logs`** (child trace rows cascade). Unset or non-positive means no pruning.
147149
- Use **`logs --run <id>`** after a run if you want a single run’s trace (IDs are printed by **`run`**).

internal/cli/hitl.go

Lines changed: 70 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -2,55 +2,76 @@ package cli
22

33
import (
44
"bufio"
5+
"context"
56
"encoding/json"
67
"fmt"
78
"io"
89
"os"
910
"strings"
1011

12+
"github.com/LAA-Software-Engineering/agentic-control-plane/internal/engine"
1113
"github.com/LAA-Software-Engineering/agentic-control-plane/internal/policy"
1214
"github.com/LAA-Software-Engineering/agentic-control-plane/internal/runtime"
1315
"github.com/LAA-Software-Engineering/agentic-control-plane/internal/spec"
16+
"github.com/LAA-Software-Engineering/agentic-control-plane/internal/state"
1417
"github.com/mattn/go-isatty"
1518
)
1619

1720
// EnvHitlActor overrides the actor recorded on approval trace events.
1821
const EnvHitlActor = "AGENTCTL_HITL_ACTOR"
1922

23+
// maxDecisionEditJSONBytes caps --decision-edit-json size (well below checkpoint limits).
24+
const maxDecisionEditJSONBytes = 1 << 20
25+
2026
func hitlActorFromEnv() string {
2127
if v := strings.TrimSpace(os.Getenv(EnvHitlActor)); v != "" {
2228
return v
2329
}
2430
if u := strings.TrimSpace(os.Getenv("USER")); u != "" {
2531
return u
2632
}
27-
return "operator"
33+
return policy.DefaultHitlActor
2834
}
2935

30-
func applyHitlRunOptions(opts *runtime.WorkflowRunOptions, autoApprove bool, decision string, editJSON string, switchTarget string) error {
36+
func applyHitlRunOptions(opts *runtime.WorkflowRunOptions, resuming bool, autoApprove bool, decision string, editJSON string, switchTarget string) error {
3137
opts.AutoApprove = autoApprove || envAutoApproveEnabled()
3238
opts.HitlActor = hitlActorFromEnv()
3339
decision = strings.TrimSpace(decision)
40+
editJSON = strings.TrimSpace(editJSON)
41+
switchTarget = strings.TrimSpace(switchTarget)
42+
3443
if decision == "" {
44+
if editJSON != "" || switchTarget != "" {
45+
return fmt.Errorf("run: --decision-edit-json and --decision-switch-target require --decision")
46+
}
3547
return nil
3648
}
49+
if !resuming {
50+
return fmt.Errorf("run: --decision requires --resume <run-id>")
51+
}
3752
kind, err := spec.ParseHitlDecisionKind(decision)
3853
if err != nil {
3954
return err
4055
}
41-
hd := &runtime.HitlDecisionOptions{Kind: string(kind)}
56+
hd := &runtime.HitlDecisionOptions{Kind: kind}
4257
switch kind {
4358
case spec.HitlDecisionEdit:
44-
if strings.TrimSpace(editJSON) == "" {
59+
if editJSON == "" {
4560
return fmt.Errorf("run: --decision edit requires --decision-edit-json")
4661
}
62+
if len(editJSON) > maxDecisionEditJSONBytes {
63+
return fmt.Errorf("run: --decision-edit-json exceeds %d bytes", maxDecisionEditJSONBytes)
64+
}
4765
var m map[string]any
4866
if err := json.Unmarshal([]byte(editJSON), &m); err != nil {
4967
return fmt.Errorf("run: --decision-edit-json: %w", err)
5068
}
69+
if m == nil {
70+
return fmt.Errorf("run: --decision-edit-json must be a JSON object")
71+
}
5172
hd.EditedWith = m
5273
case spec.HitlDecisionSwitch:
53-
hd.SwitchTarget = strings.TrimSpace(switchTarget)
74+
hd.SwitchTarget = switchTarget
5475
if hd.SwitchTarget == "" {
5576
return fmt.Errorf("run: --decision switch requires --decision-switch-target")
5677
}
@@ -82,7 +103,7 @@ func maybePromptHitlDecision(in io.Reader, out io.Writer, gate policy.HitlGate)
82103
fmt.Fprintf(out, "Unknown decision %q\n", line)
83104
continue
84105
}
85-
if !decisionAllowed(kind, gate.Review.AllowedDecisions) {
106+
if !policy.IsDecisionAllowed(kind, gate.Review.AllowedDecisions) {
86107
fmt.Fprintf(out, "Decision %q is not allowed for this call\n", kind)
87108
continue
88109
}
@@ -94,6 +115,10 @@ func maybePromptHitlDecision(in io.Reader, out io.Writer, gate policy.HitlGate)
94115
if err != nil {
95116
return nil, err
96117
}
118+
if len(editLine) > maxDecisionEditJSONBytes {
119+
fmt.Fprintf(out, "Edited args exceed %d bytes\n", maxDecisionEditJSONBytes)
120+
continue
121+
}
97122
var m map[string]any
98123
if err := json.Unmarshal([]byte(editLine), &m); err != nil {
99124
fmt.Fprintf(out, "Invalid JSON: %v\n", err)
@@ -116,15 +141,6 @@ func maybePromptHitlDecision(in io.Reader, out io.Writer, gate policy.HitlGate)
116141
}
117142
}
118143

119-
func decisionAllowed(kind spec.HitlDecisionKind, allowed []spec.HitlDecisionKind) bool {
120-
for _, a := range allowed {
121-
if a == kind {
122-
return true
123-
}
124-
}
125-
return false
126-
}
127-
128144
func readLine(r io.Reader) (string, error) {
129145
sc := bufio.NewScanner(r)
130146
if !sc.Scan() {
@@ -135,3 +151,42 @@ func readLine(r io.Reader) (string, error) {
135151
}
136152
return strings.TrimSpace(sc.Text()), nil
137153
}
154+
155+
func hitlGateFromCheckpoint(contextJSON string) (*policy.HitlGate, error) {
156+
var payload struct {
157+
PendingHitl *engine.PendingHitlState `json:"pendingHitl,omitempty"`
158+
}
159+
if err := json.Unmarshal([]byte(contextJSON), &payload); err != nil {
160+
return nil, fmt.Errorf("unmarshal checkpoint: %w", err)
161+
}
162+
if payload.PendingHitl == nil {
163+
return nil, nil
164+
}
165+
p := payload.PendingHitl
166+
return &policy.HitlGate{
167+
Uses: p.Uses,
168+
With: p.With,
169+
Review: p.Review,
170+
}, nil
171+
}
172+
173+
// loadPendingHitlGate reads the latest checkpoint for a run awaiting HITL input.
174+
func loadPendingHitlGate(ctx context.Context, st state.RuntimeStore, runID string) (*policy.HitlGate, error) {
175+
cp, err := st.GetLatestCheckpoint(ctx, runID)
176+
if err != nil {
177+
return nil, err
178+
}
179+
return hitlGateFromCheckpoint(cp.ContextJSON)
180+
}
181+
182+
// requirePendingHitlGate returns the pending gate or an error when interrupted without one.
183+
func requirePendingHitlGate(ctx context.Context, st state.RuntimeStore, runID string) (*policy.HitlGate, error) {
184+
gate, err := loadPendingHitlGate(ctx, st, runID)
185+
if err != nil {
186+
return nil, err
187+
}
188+
if gate == nil {
189+
return nil, fmt.Errorf("run: run %q is interrupted but checkpoint has no pending approval gate", runID)
190+
}
191+
return gate, nil
192+
}

internal/cli/hitl_load.go

Lines changed: 0 additions & 42 deletions
This file was deleted.

internal/cli/run.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ func runRun(cmd *cobra.Command, wfName, resumeRunID, inputFile string, inputPair
225225
Resume: resumeID != "",
226226
RunID: resumeID,
227227
}
228-
if err := applyHitlRunOptions(&opts, autoApprove, decision, decisionEditJSON, decisionSwitchTarget); err != nil {
228+
if err := applyHitlRunOptions(&opts, resumeID != "", autoApprove, decision, decisionEditJSON, decisionSwitchTarget); err != nil {
229229
return NewExitError(ExitValidationError, err)
230230
}
231231
if !opts.Resume {
@@ -243,14 +243,17 @@ func runRun(cmd *cobra.Command, wfName, resumeRunID, inputFile string, inputPair
243243
if runErr == nil && runID != "" {
244244
if r, gerr := st.GetRun(ctx, runID); gerr == nil && r != nil && r.Status == state.RunStatusInterrupted {
245245
if opts.AutoApprove || strings.TrimSpace(decision) != "" {
246+
if _, gerr := requirePendingHitlGate(ctx, st, runID); gerr != nil {
247+
return gerr
248+
}
246249
resumeID = runID
247250
continue
248251
}
249-
gate, gerr := loadPendingHitlGate(ctx, st, runID)
252+
gate, gerr := requirePendingHitlGate(ctx, st, runID)
250253
if gerr != nil {
251-
return fmt.Errorf("run: load hitl gate: %w", gerr)
254+
return gerr
252255
}
253-
if gate != nil && isatty.IsTerminal(os.Stdin.Fd()) {
256+
if isatty.IsTerminal(os.Stdin.Fd()) {
254257
dec, perr := maybePromptHitlDecision(cmd.InOrStdin(), cmd.OutOrStdout(), *gate)
255258
if perr != nil {
256259
return perr
@@ -333,6 +336,9 @@ func writeRunOutput(cmd *cobra.Command, ctx context.Context, st *sqlite.Store, e
333336
fmt.Fprintf(&b, "\nRun ID: %s\n", runID)
334337
if got != nil {
335338
fmt.Fprintf(&b, "Status: %s\n", got.Status)
339+
if got.Status == state.RunStatusInterrupted {
340+
fmt.Fprintf(&b, "Resume with: agentctl run --resume %s --decision approve|reject|edit|switch ...\n", runID)
341+
}
336342
if got.ErrorText != "" {
337343
fmt.Fprintf(&b, "Error: %s\n", got.ErrorText)
338344
}

internal/cli/run_test.go

Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,105 @@ func TestRun_policyGated_interruptThenResumeApprove(t *testing.T) {
175175
}
176176
}
177177

178+
func TestRun_decisionWithoutResume_exit2(t *testing.T) {
179+
db := filepath.Join(t.TempDir(), "run-decision.db")
180+
root := runPolicyRoot(t)
181+
ResetGlobalsForTest()
182+
cmd := NewRootCmd()
183+
cmd.SetOut(io.Discard)
184+
cmd.SetErr(io.Discard)
185+
cmd.SetArgs([]string{
186+
"run", "workflow/gated",
187+
"--project", root,
188+
"--state", db,
189+
"--input", "topic=x",
190+
"--decision", "approve",
191+
})
192+
err := cmd.Execute()
193+
if err == nil {
194+
t.Fatal("expected validation error")
195+
}
196+
if ExitCodeOf(err) != ExitValidationError {
197+
t.Fatalf("exit=%d want %d err=%v", ExitCodeOf(err), ExitValidationError, err)
198+
}
199+
}
200+
201+
func TestRun_hitlRejectViaResume(t *testing.T) {
202+
db := filepath.Join(t.TempDir(), "run-reject.db")
203+
root := runPolicyRoot(t)
204+
runID := runPolicyInterrupted(t, root, db)
205+
206+
ResetGlobalsForTest()
207+
var out bytes.Buffer
208+
cmd := NewRootCmd()
209+
cmd.SetOut(&out)
210+
cmd.SetErr(&out)
211+
cmd.SetArgs([]string{
212+
"run", "--resume", runID,
213+
"--project", root,
214+
"--state", db,
215+
"--decision", "reject",
216+
})
217+
err := cmd.Execute()
218+
if err == nil {
219+
t.Fatal("expected rejection error")
220+
}
221+
if ExitCodeOf(err) != ExitExecutionError {
222+
t.Fatalf("exit=%d want %d err=%v", ExitCodeOf(err), ExitExecutionError, err)
223+
}
224+
}
225+
226+
func TestRun_hitlEditViaResume(t *testing.T) {
227+
db := filepath.Join(t.TempDir(), "run-edit.db")
228+
root := runPolicyRoot(t)
229+
runID := runPolicyInterrupted(t, root, db)
230+
231+
ResetGlobalsForTest()
232+
var out bytes.Buffer
233+
cmd := NewRootCmd()
234+
cmd.SetOut(&out)
235+
cmd.SetErr(&out)
236+
cmd.SetArgs([]string{
237+
"run", "--resume", runID,
238+
"--project", root,
239+
"--state", db,
240+
"--decision", "edit",
241+
"--decision-edit-json", `{"topic":"edited"}`,
242+
})
243+
if err := cmd.Execute(); err != nil {
244+
t.Fatalf("resume edit: %v\n%s", err, out.String())
245+
}
246+
if !strings.Contains(out.String(), "Status: succeeded") {
247+
t.Fatalf("expected succeeded:\n%s", out.String())
248+
}
249+
}
250+
251+
func runPolicyInterrupted(t *testing.T, root, db string) string {
252+
t.Helper()
253+
ResetGlobalsForTest()
254+
var out bytes.Buffer
255+
cmd := NewRootCmd()
256+
cmd.SetOut(&out)
257+
cmd.SetErr(&out)
258+
cmd.SetArgs([]string{
259+
"run", "workflow/gated",
260+
"--project", root,
261+
"--state", db,
262+
"--input", "topic=x",
263+
})
264+
if err := cmd.Execute(); err != nil {
265+
t.Fatalf("interrupt run: %v\n%s", err, out.String())
266+
}
267+
for _, line := range strings.Split(out.String(), "\n") {
268+
line = strings.TrimSpace(line)
269+
if strings.HasPrefix(line, "Run ID:") {
270+
return strings.TrimSpace(strings.TrimPrefix(line, "Run ID:"))
271+
}
272+
}
273+
t.Fatal("missing run id")
274+
return ""
275+
}
276+
178277
func TestRun_withApprove_succeeds(t *testing.T) {
179278
db := filepath.Join(t.TempDir(), "run-ok.db")
180279
root := runPolicyRoot(t)

internal/cli/testdata/run_policy/policy.yaml

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,10 @@ spec:
66
approvals:
77
requiredFor:
88
- "tool.helper.echo"
9+
hitl:
10+
descriptionPrefix: "Echo tool requires operator approval"
11+
interruptOn:
12+
helper:
13+
allowedDecisions: [approve, reject, edit]
14+
allowedEditArgs: [topic]
15+
description: "Review echo call (${uses})"

internal/engine/execution.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -163,6 +163,7 @@ func (e *Executor) Run(ctx context.Context, in RunInput) error {
163163
if gerr != nil {
164164
err = gerr
165165
} else if gate != nil {
166+
e.recordAutoApproveHitl(ctx, in.RunID, step, i, *gate, in.Hitl.Actor)
166167
pctx.ApprovedActions = append(append([]string(nil), pctx.ApprovedActions...), uses)
167168
}
168169
}

0 commit comments

Comments
 (0)