3131)
3232from .planner import Planner
3333from .policy import PolicyEngine
34+ from .sandbox import (
35+ recycle_worker_state ,
36+ revalidate_step_capability ,
37+ sign_step ,
38+ verify_step_signature ,
39+ )
3440from .storage import StorageGateway
3541
3642log = logging .getLogger ("agent" )
@@ -176,7 +182,7 @@ def submit_task():
176182 task .budgets = budgets
177183
178184 # Verify token immediately (proves signing is consistent)
179- token_valid , token_reason = verify_token (cap )
185+ token_valid , _ = verify_token (cap , consume_nonce = False )
180186
181187 _audit_log ("task_submitted" , {
182188 "task_id" : task .task_id ,
@@ -224,6 +230,10 @@ def submit_task():
224230 step .status = StepStatus .DENIED
225231 step .error = reason
226232
233+ # Bind the evaluated step to the capability and policy state so any
234+ # mutation between planning, approval, and execution is detected.
235+ step .signature = sign_step (step , cap )
236+
227237 if needs_approval :
228238 task .status = TaskStatus .PENDING_APPROVAL
229239 else :
@@ -282,7 +292,10 @@ def approve_steps(task_id: str):
282292
283293 # Verify capability token is still valid before approving
284294 if task .capability :
285- token_valid , token_reason = verify_token (task .capability )
295+ token_valid , token_reason = verify_token (
296+ task .capability ,
297+ consume_nonce = False ,
298+ )
286299 if not token_valid :
287300 _audit_log ("approval_rejected" , {
288301 "task_id" : task_id ,
@@ -297,6 +310,25 @@ def approve_steps(task_id: str):
297310 if step .status != StepStatus .PENDING :
298311 continue
299312 if approve_all or step .step_id in step_ids :
313+ assert task .capability is not None
314+ sig_valid , sig_reason = verify_step_signature (
315+ step ,
316+ task .capability ,
317+ step .signature ,
318+ )
319+ if not sig_valid :
320+ step .status = StepStatus .FAILED
321+ step .error = sig_reason
322+ task .status = TaskStatus .FAILED
323+ _audit_log ("approval_rejected" , {
324+ "task_id" : task_id ,
325+ "step_id" : step .step_id ,
326+ "reason" : sig_reason ,
327+ })
328+ return jsonify ({
329+ "error" : f"step integrity check failed: { sig_reason } " ,
330+ "step_id" : step .step_id ,
331+ }), 409
300332 step .status = StepStatus .APPROVED
301333 approved_count += 1
302334
@@ -441,72 +473,117 @@ def _execute_task(task: Task):
441473 """Execute approved steps sequentially in a background thread."""
442474 log .info ("executing task %s (%d steps)" , task .task_id , len (task .steps ))
443475
444- for step in task .steps :
445- # Only execute approved steps
446- if step .status != StepStatus .APPROVED :
447- continue
448-
449- # Check if task was cancelled
450- if task .status == TaskStatus .CANCELLED :
451- step .status = StepStatus .SKIPPED
452- continue
453-
454- # Token expiry check before each step
455- if task .capability and task .capability .is_expired ():
456- step .status = StepStatus .FAILED
457- step .error = "capability token expired during execution"
458- task .status = TaskStatus .FAILED
459- _audit_log ("token_expired_during_execution" , {
476+ try :
477+ for step in task .steps :
478+ # Only execute approved steps
479+ if step .status != StepStatus .APPROVED :
480+ continue
481+
482+ # Check if task was cancelled
483+ if task .status == TaskStatus .CANCELLED :
484+ step .status = StepStatus .SKIPPED
485+ continue
486+
487+ # Token expiry/signature check before each step
488+ if task .capability and task .capability .is_expired ():
489+ step .status = StepStatus .FAILED
490+ step .error = "capability token expired during execution"
491+ task .status = TaskStatus .FAILED
492+ _audit_log ("token_expired_during_execution" , {
493+ "task_id" : task .task_id ,
494+ "step_id" : step .step_id ,
495+ })
496+ break
497+
498+ # Budget check
499+ budget_err = task .budgets .check ()
500+ if budget_err :
501+ step .status = StepStatus .FAILED
502+ step .error = budget_err
503+ task .status = TaskStatus .FAILED
504+ _audit_log ("budget_exceeded" , {
505+ "task_id" : task .task_id ,
506+ "error" : budget_err ,
507+ })
508+ break
509+
510+ assert task .capability is not None
511+ token_valid , token_reason = verify_token (
512+ task .capability ,
513+ consume_nonce = False ,
514+ )
515+ if not token_valid :
516+ step .status = StepStatus .FAILED
517+ step .error = token_reason
518+ task .status = TaskStatus .FAILED
519+ _audit_log ("token_integrity_violation" , {
520+ "task_id" : task .task_id ,
521+ "step_id" : step .step_id ,
522+ "reason" : token_reason ,
523+ })
524+ break
525+
526+ sig_valid , sig_reason = verify_step_signature (
527+ step ,
528+ task .capability ,
529+ step .signature ,
530+ )
531+ if not sig_valid :
532+ step .status = StepStatus .FAILED
533+ step .error = sig_reason
534+ task .status = TaskStatus .FAILED
535+ _audit_log ("step_integrity_violation" , {
536+ "task_id" : task .task_id ,
537+ "step_id" : step .step_id ,
538+ "reason" : sig_reason ,
539+ })
540+ break
541+
542+ cap_valid , cap_reason = revalidate_step_capability (step , task .capability )
543+ if not cap_valid :
544+ step .status = StepStatus .FAILED
545+ step .error = cap_reason
546+ task .status = TaskStatus .FAILED
547+ _audit_log ("step_capability_violation" , {
548+ "task_id" : task .task_id ,
549+ "step_id" : step .step_id ,
550+ "reason" : cap_reason ,
551+ })
552+ break
553+
554+ _executor .execute (step , task .capability , task .budgets )
555+
556+ _audit_log ("step_executed" , {
460557 "task_id" : task .task_id ,
461558 "step_id" : step .step_id ,
559+ "action" : step .action .value ,
560+ "status" : step .status .value ,
561+ "token_id" : task .capability .token_id if task .capability else "" ,
462562 })
463- break
464-
465- # Budget check
466- budget_err = task .budgets .check ()
467- if budget_err :
468- step .status = StepStatus .FAILED
469- step .error = budget_err
470- task .status = TaskStatus .FAILED
471- _audit_log ("budget_exceeded" , {
472- "task_id" : task .task_id ,
473- "error" : budget_err ,
474- })
475- break
476-
477- # Execute step (capability guaranteed non-None by expiry check above)
478- assert task .capability is not None
479- _executor .execute (step , task .capability , task .budgets )
480-
481- _audit_log ("step_executed" , {
482- "task_id" : task .task_id ,
483- "step_id" : step .step_id ,
484- "action" : step .action .value ,
485- "status" : step .status .value ,
486- "token_id" : task .capability .token_id if task .capability else "" ,
487- })
488563
489- # If step failed and it's critical, stop the task
490- if step .status == StepStatus .FAILED :
491- task .status = TaskStatus .FAILED
492- break
564+ # If step failed and it's critical, stop the task
565+ if step .status == StepStatus .FAILED :
566+ task .status = TaskStatus .FAILED
567+ break
493568
494- # Finalise task status
495- if task .status == TaskStatus .RUNNING :
496- failed = any (s .status == StepStatus .FAILED for s in task .steps )
497- task .status = TaskStatus .FAILED if failed else TaskStatus .COMPLETED
569+ # Finalise task status
570+ if task .status == TaskStatus .RUNNING :
571+ failed = any (s .status == StepStatus .FAILED for s in task .steps )
572+ task .status = TaskStatus .FAILED if failed else TaskStatus .COMPLETED
498573
499- task .completed_at = time .time ()
574+ task .completed_at = time .time ()
500575
501- _audit_log ("task_completed" , {
502- "task_id" : task .task_id ,
503- "status" : task .status .value ,
504- "steps_completed" : sum (1 for s in task .steps if s .status == StepStatus .COMPLETED ),
505- "steps_failed" : sum (1 for s in task .steps if s .status == StepStatus .FAILED ),
506- "steps_denied" : sum (1 for s in task .steps if s .status == StepStatus .DENIED ),
507- })
576+ _audit_log ("task_completed" , {
577+ "task_id" : task .task_id ,
578+ "status" : task .status .value ,
579+ "steps_completed" : sum (1 for s in task .steps if s .status == StepStatus .COMPLETED ),
580+ "steps_failed" : sum (1 for s in task .steps if s .status == StepStatus .FAILED ),
581+ "steps_denied" : sum (1 for s in task .steps if s .status == StepStatus .DENIED ),
582+ })
508583
509- log .info ("task %s finished: %s" , task .task_id , task .status .value )
584+ log .info ("task %s finished: %s" , task .task_id , task .status .value )
585+ finally :
586+ recycle_worker_state (task .task_id )
510587
511588
512589# --- Security headers ------------------------------------------------------
0 commit comments