|
8 | 8 | import subprocess |
9 | 9 | import sys |
10 | 10 | import time |
| 11 | +from typing import TYPE_CHECKING |
11 | 12 |
|
12 | 13 | from pydantic import ValidationError |
13 | 14 |
|
|
38 | 39 | upload_trace_to_s3, |
39 | 40 | ) |
40 | 41 |
|
| 42 | +if TYPE_CHECKING: |
| 43 | + from workflow import Workflow |
| 44 | + |
41 | 45 | _SDK_NO_RESULT_MESSAGE = ( |
42 | 46 | "Agent SDK stream ended without a ResultMessage (agent_status=unknown). " |
43 | 47 | "Treat as failure: possible SDK bug, network interruption, or protocol mismatch." |
@@ -366,6 +370,84 @@ def _run_repoless_task( |
366 | 370 | return result_dict |
367 | 371 |
|
368 | 372 |
|
| 373 | +def _apply_post_hook_gates( |
| 374 | + workflow: Workflow | None, |
| 375 | + *, |
| 376 | + read_only: bool, |
| 377 | + build_passed: bool, |
| 378 | + lint_passed: bool, |
| 379 | + build_before: bool, |
| 380 | + lint_before: bool, |
| 381 | +) -> bool: |
| 382 | + """Resolve the coding lane's post-hook verify gates against the workflow (#301). |
| 383 | +
|
| 384 | + Decision (issue #301 acceptance criteria): the inline post-hook path |
| 385 | + CONSULTS each declared ``verify_build`` / ``verify_lint`` step's ``gate`` |
| 386 | + through the runner's ``gate_status`` — the single place gate semantics live — |
| 387 | + rather than routing the post-hooks through the runner's step handlers. |
| 388 | + Routing through the runner would also change failure-path side effects (a |
| 389 | + gating ``verify_build`` with ``on_failure: fail`` stops the runner *before* |
| 390 | + ``ensure_pr``, stranding committed work with no PR), which is the broader |
| 391 | + half-migrated-runner unification the issue defers. Here the inline ordering |
| 392 | + (verify → ensure_pr always runs) is preserved; only the task verdict honors |
| 393 | + the declared gate. |
| 394 | +
|
| 395 | + Per-step semantics: |
| 396 | +
|
| 397 | + - A declared step gates per its ``gate`` (``strict`` | ``regression_only`` | |
| 398 | + ``informational``; unset = ``regression_only``), but only when its |
| 399 | + ``on_failure`` is ``fail`` — ``continue``/``skip_remaining`` steps are |
| 400 | + advisory for the task verdict, matching the runner. |
| 401 | + - An undeclared ``verify_build`` keeps the legacy regression-only gating |
| 402 | + (identical to ``gate_status`` with ``gate=None``). |
| 403 | + - An undeclared ``verify_lint`` never gates (legacy: lint is not used for |
| 404 | + terminal status unless a workflow opts in by declaring the step). |
| 405 | + - ``workflow is None`` (post-hook reload failed) falls back to the legacy |
| 406 | + gating for both, so a corrupt file cannot strand the agent's work. |
| 407 | + """ |
| 408 | + from workflow import gate_status |
| 409 | + |
| 410 | + steps = list(workflow.steps) if workflow is not None else [] |
| 411 | + gates_ok = True |
| 412 | + for kind, passed, was_passing_before in ( |
| 413 | + ("verify_build", build_passed, build_before), |
| 414 | + ("verify_lint", lint_passed, lint_before), |
| 415 | + ): |
| 416 | + step = next((s for s in steps if s.kind == kind), None) |
| 417 | + if step is None: |
| 418 | + if kind == "verify_lint": |
| 419 | + continue |
| 420 | + gate, gating, on_failure = None, True, "fail" |
| 421 | + else: |
| 422 | + gate, gating, on_failure = step.gate, step.on_failure == "fail", step.on_failure |
| 423 | + status = gate_status( |
| 424 | + passed=passed, |
| 425 | + gate=gate, |
| 426 | + read_only=read_only, |
| 427 | + was_passing_before=was_passing_before, |
| 428 | + ) |
| 429 | + if passed: |
| 430 | + continue |
| 431 | + label = gate or "regression_only" |
| 432 | + if status == "succeeded": |
| 433 | + if read_only: |
| 434 | + log("INFO", f"read-only workflow: {kind} failed — informational only, not gating") |
| 435 | + elif gate == "informational": |
| 436 | + log("INFO", f"{kind} failed — gate=informational, not gating") |
| 437 | + else: |
| 438 | + log( |
| 439 | + "WARN", |
| 440 | + f"Post-agent {kind} failed, but it was already failing before " |
| 441 | + "agent changes — not counting as regression", |
| 442 | + ) |
| 443 | + elif gating: |
| 444 | + log("WARN", f"{kind} failed — gate={label} gates the task") |
| 445 | + gates_ok = False |
| 446 | + else: |
| 447 | + log("INFO", f"{kind} failed — gate={label} but on_failure={on_failure}, not gating") |
| 448 | + return gates_ok |
| 449 | + |
| 450 | + |
369 | 451 | def _resolve_overall_task_status( |
370 | 452 | agent_result: AgentResult, |
371 | 453 | *, |
@@ -870,22 +952,25 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None: |
870 | 952 | "turns_attempted": agent_result.num_turns or agent_result.turns, |
871 | 953 | } |
872 | 954 |
|
873 | | - # Resolve the post-hook gating inputs: read_only and the ensure_pr |
874 | | - # strategy (create / push_resolve / resolve) the workflow declares. |
| 955 | + # Resolve the post-hook gating inputs: read_only, the ensure_pr |
| 956 | + # strategy (create / push_resolve / resolve), and the verify steps' |
| 957 | + # declared gates (#301) the workflow declares. |
875 | 958 | # |
876 | 959 | # ``read_only`` comes from ``config`` — build_config already computed |
877 | 960 | # it (with its own fail-soft fallback) and it drove Cedar during the |
878 | 961 | # run, so reusing it keeps the post-hook on the SAME verdict rather |
879 | | - # than re-deriving a possibly-divergent one. The workflow file is only |
880 | | - # reloaded for the ensure_pr STRATEGY, and that reload is wrapped in |
881 | | - # the same WorkflowValidationError fallback build_config uses |
882 | | - # (config.py): this code path runs AFTER run_agent has already mutated |
883 | | - # / committed the tree, so a load failure here must NOT strand the work |
884 | | - # as FAILED with no PR — it falls back to the default "create" strategy |
885 | | - # and still opens the PR (PR review #296 finding #5). |
| 962 | + # than re-deriving a possibly-divergent one. The workflow file is |
| 963 | + # reloaded for the ensure_pr STRATEGY and the verify-step GATES, and |
| 964 | + # that reload is wrapped in the same WorkflowValidationError fallback |
| 965 | + # build_config uses (config.py): this code path runs AFTER run_agent |
| 966 | + # has already mutated / committed the tree, so a load failure here |
| 967 | + # must NOT strand the work as FAILED with no PR — it falls back to |
| 968 | + # the default "create" strategy + legacy regression-only gating and |
| 969 | + # still opens the PR (PR review #296 finding #5). |
886 | 970 | from workflow import WorkflowValidationError, load_workflow |
887 | 971 |
|
888 | 972 | workflow_read_only = config.read_only |
| 973 | + _workflow = None |
889 | 974 | try: |
890 | 975 | _workflow = load_workflow( |
891 | 976 | (config.resolved_workflow or {}).get("id", "coding/new-task-v1") |
@@ -944,23 +1029,19 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None: |
944 | 1029 |
|
945 | 1030 | # Overall status: do not infer success from PR/build when the SDK never |
946 | 1031 | # emitted ResultMessage (agent_status=unknown) — that masks protocol gaps. |
947 | | - # NOTE: lint_passed is intentionally NOT used for terminal status. |
| 1032 | + # Gating honors each verify step's declared ``gate`` via the runner's |
| 1033 | + # gate_status (#301); an undeclared verify_lint never gates (legacy). |
948 | 1034 | agent_status = agent_result.status |
949 | | - # Default True = assume build was green before, so a post-agent |
950 | | - # failure IS counted as a regression (conservative). |
951 | | - build_before = setup.build_before |
952 | | - if workflow_read_only: |
953 | | - build_ok = True # Read-only review — build status is informational only |
954 | | - if not build_passed: |
955 | | - log("INFO", "read-only workflow: build failed — informational only, not gating") |
956 | | - else: |
957 | | - build_ok = build_passed or not build_before |
958 | | - if not build_passed and not build_before and not workflow_read_only: |
959 | | - log( |
960 | | - "WARN", |
961 | | - "Post-agent build failed, but build was already failing before " |
962 | | - "agent changes — not counting as regression", |
963 | | - ) |
| 1035 | + build_ok = _apply_post_hook_gates( |
| 1036 | + _workflow, |
| 1037 | + read_only=workflow_read_only, |
| 1038 | + build_passed=build_passed, |
| 1039 | + lint_passed=lint_passed, |
| 1040 | + # setup defaults assume green-before, so a post-agent failure IS |
| 1041 | + # counted as a regression (conservative). |
| 1042 | + build_before=setup.build_before, |
| 1043 | + lint_before=setup.lint_before, |
| 1044 | + ) |
964 | 1045 | overall_status, result_error = _resolve_overall_task_status( |
965 | 1046 | agent_result, |
966 | 1047 | build_ok=build_ok, |
|
0 commit comments