Skip to content

Commit 14820c8

Browse files
krokokobgagentisadeks
authored
feat(agent-runtime): honor per-step gate on the coding lane (#301) (#307)
The coding lane's inline post-hook gating now consults each declared verify_build / verify_lint step's `gate` through the runner's gate_status (made public from _gate_status) — one gate implementation for both lanes. A coding workflow declaring `gate: strict` now fails the task on a build failure; `informational` never gates. The three shipped coding workflows keep their exact pre-change effective gating, locked by a parity test matrix (inline == runner == legacy verdicts). The inline ordering is preserved: ensure_pr still runs after a gating verify failure so the agent's work surfaces as a reviewable PR. Refs #301 Co-authored-by: bgagent <bgagent@noreply.github.com> Co-authored-by: Sphia Sadek <isadeks@gmail.com>
1 parent f57b931 commit 14820c8

7 files changed

Lines changed: 512 additions & 51 deletions

File tree

agent/src/pipeline.py

Lines changed: 106 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import subprocess
99
import sys
1010
import time
11+
from typing import TYPE_CHECKING
1112

1213
from pydantic import ValidationError
1314

@@ -38,6 +39,9 @@
3839
upload_trace_to_s3,
3940
)
4041

42+
if TYPE_CHECKING:
43+
from workflow import Workflow
44+
4145
_SDK_NO_RESULT_MESSAGE = (
4246
"Agent SDK stream ended without a ResultMessage (agent_status=unknown). "
4347
"Treat as failure: possible SDK bug, network interruption, or protocol mismatch."
@@ -366,6 +370,84 @@ def _run_repoless_task(
366370
return result_dict
367371

368372

373+
def _apply_post_hook_gates(
374+
workflow: Workflow | None,
375+
*,
376+
read_only: bool,
377+
build_passed: bool,
378+
lint_passed: bool,
379+
build_before: bool,
380+
lint_before: bool,
381+
) -> bool:
382+
"""Resolve the coding lane's post-hook verify gates against the workflow (#301).
383+
384+
Decision (issue #301 acceptance criteria): the inline post-hook path
385+
CONSULTS each declared ``verify_build`` / ``verify_lint`` step's ``gate``
386+
through the runner's ``gate_status`` — the single place gate semantics live —
387+
rather than routing the post-hooks through the runner's step handlers.
388+
Routing through the runner would also change failure-path side effects (a
389+
gating ``verify_build`` with ``on_failure: fail`` stops the runner *before*
390+
``ensure_pr``, stranding committed work with no PR), which is the broader
391+
half-migrated-runner unification the issue defers. Here the inline ordering
392+
(verify → ensure_pr always runs) is preserved; only the task verdict honors
393+
the declared gate.
394+
395+
Per-step semantics:
396+
397+
- A declared step gates per its ``gate`` (``strict`` | ``regression_only`` |
398+
``informational``; unset = ``regression_only``), but only when its
399+
``on_failure`` is ``fail`` — ``continue``/``skip_remaining`` steps are
400+
advisory for the task verdict, matching the runner.
401+
- An undeclared ``verify_build`` keeps the legacy regression-only gating
402+
(identical to ``gate_status`` with ``gate=None``).
403+
- An undeclared ``verify_lint`` never gates (legacy: lint is not used for
404+
terminal status unless a workflow opts in by declaring the step).
405+
- ``workflow is None`` (post-hook reload failed) falls back to the legacy
406+
gating for both, so a corrupt file cannot strand the agent's work.
407+
"""
408+
from workflow import gate_status
409+
410+
steps = list(workflow.steps) if workflow is not None else []
411+
gates_ok = True
412+
for kind, passed, was_passing_before in (
413+
("verify_build", build_passed, build_before),
414+
("verify_lint", lint_passed, lint_before),
415+
):
416+
step = next((s for s in steps if s.kind == kind), None)
417+
if step is None:
418+
if kind == "verify_lint":
419+
continue
420+
gate, gating, on_failure = None, True, "fail"
421+
else:
422+
gate, gating, on_failure = step.gate, step.on_failure == "fail", step.on_failure
423+
status = gate_status(
424+
passed=passed,
425+
gate=gate,
426+
read_only=read_only,
427+
was_passing_before=was_passing_before,
428+
)
429+
if passed:
430+
continue
431+
label = gate or "regression_only"
432+
if status == "succeeded":
433+
if read_only:
434+
log("INFO", f"read-only workflow: {kind} failed — informational only, not gating")
435+
elif gate == "informational":
436+
log("INFO", f"{kind} failed — gate=informational, not gating")
437+
else:
438+
log(
439+
"WARN",
440+
f"Post-agent {kind} failed, but it was already failing before "
441+
"agent changes — not counting as regression",
442+
)
443+
elif gating:
444+
log("WARN", f"{kind} failed — gate={label} gates the task")
445+
gates_ok = False
446+
else:
447+
log("INFO", f"{kind} failed — gate={label} but on_failure={on_failure}, not gating")
448+
return gates_ok
449+
450+
369451
def _resolve_overall_task_status(
370452
agent_result: AgentResult,
371453
*,
@@ -870,22 +952,25 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None:
870952
"turns_attempted": agent_result.num_turns or agent_result.turns,
871953
}
872954

873-
# Resolve the post-hook gating inputs: read_only and the ensure_pr
874-
# strategy (create / push_resolve / resolve) the workflow declares.
955+
# Resolve the post-hook gating inputs: read_only, the ensure_pr
956+
# strategy (create / push_resolve / resolve), and the verify steps'
957+
# declared gates (#301) the workflow declares.
875958
#
876959
# ``read_only`` comes from ``config`` — build_config already computed
877960
# it (with its own fail-soft fallback) and it drove Cedar during the
878961
# run, so reusing it keeps the post-hook on the SAME verdict rather
879-
# than re-deriving a possibly-divergent one. The workflow file is only
880-
# reloaded for the ensure_pr STRATEGY, and that reload is wrapped in
881-
# the same WorkflowValidationError fallback build_config uses
882-
# (config.py): this code path runs AFTER run_agent has already mutated
883-
# / committed the tree, so a load failure here must NOT strand the work
884-
# as FAILED with no PR — it falls back to the default "create" strategy
885-
# and still opens the PR (PR review #296 finding #5).
962+
# than re-deriving a possibly-divergent one. The workflow file is
963+
# reloaded for the ensure_pr STRATEGY and the verify-step GATES, and
964+
# that reload is wrapped in the same WorkflowValidationError fallback
965+
# build_config uses (config.py): this code path runs AFTER run_agent
966+
# has already mutated / committed the tree, so a load failure here
967+
# must NOT strand the work as FAILED with no PR — it falls back to
968+
# the default "create" strategy + legacy regression-only gating and
969+
# still opens the PR (PR review #296 finding #5).
886970
from workflow import WorkflowValidationError, load_workflow
887971

888972
workflow_read_only = config.read_only
973+
_workflow = None
889974
try:
890975
_workflow = load_workflow(
891976
(config.resolved_workflow or {}).get("id", "coding/new-task-v1")
@@ -944,23 +1029,19 @@ def _on_trace_truncated(max_bytes: int, first_dropped: int) -> None:
9441029

9451030
# Overall status: do not infer success from PR/build when the SDK never
9461031
# emitted ResultMessage (agent_status=unknown) — that masks protocol gaps.
947-
# NOTE: lint_passed is intentionally NOT used for terminal status.
1032+
# Gating honors each verify step's declared ``gate`` via the runner's
1033+
# gate_status (#301); an undeclared verify_lint never gates (legacy).
9481034
agent_status = agent_result.status
949-
# Default True = assume build was green before, so a post-agent
950-
# failure IS counted as a regression (conservative).
951-
build_before = setup.build_before
952-
if workflow_read_only:
953-
build_ok = True # Read-only review — build status is informational only
954-
if not build_passed:
955-
log("INFO", "read-only workflow: build failed — informational only, not gating")
956-
else:
957-
build_ok = build_passed or not build_before
958-
if not build_passed and not build_before and not workflow_read_only:
959-
log(
960-
"WARN",
961-
"Post-agent build failed, but build was already failing before "
962-
"agent changes — not counting as regression",
963-
)
1035+
build_ok = _apply_post_hook_gates(
1036+
_workflow,
1037+
read_only=workflow_read_only,
1038+
build_passed=build_passed,
1039+
lint_passed=lint_passed,
1040+
# setup defaults assume green-before, so a post-agent failure IS
1041+
# counted as a regression (conservative).
1042+
build_before=setup.build_before,
1043+
lint_before=setup.lint_before,
1044+
)
9641045
overall_status, result_error = _resolve_overall_task_status(
9651046
agent_result,
9661047
build_ok=build_ok,

agent/src/workflow/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939
StepOutcome,
4040
WorkflowCheckpoint,
4141
WorkflowResult,
42+
gate_status,
4243
run_workflow,
4344
)
4445
from .validator import assert_valid, validate_workflow
@@ -60,6 +61,7 @@
6061
"WorkflowResult",
6162
"WorkflowValidationError",
6263
"assert_valid",
64+
"gate_status",
6365
"load_workflow",
6466
"load_workflow_file",
6567
"policy_principal_for",

agent/src/workflow/runner.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -483,22 +483,24 @@ def _handle_run_agent(step: Step, ctx: StepContext) -> StepOutcome:
483483
)
484484

485485

486-
def _gate_status(
486+
def gate_status(
487487
*, passed: bool, gate: str | None, read_only: bool, was_passing_before: bool
488488
) -> StepStatus:
489489
"""Map a verify result + the step's ``gate`` to a step status.
490490
491491
Single place the verify-gate semantics live, shared by ``verify_build`` and
492492
``verify_lint`` (the two were near-identical twins that drifted on the
493-
``read_only`` rule — see the code-review finding). Mirrors ``pipeline.py``'s
494-
terminal-status logic so the workflow path and the legacy path agree:
493+
``read_only`` rule — see the code-review finding). Since #301 it is also the
494+
implementation behind the coding lane's inline post-hook gating
495+
(``pipeline._apply_post_hook_gates``), so both lanes honor a step's
496+
declared ``gate`` through this one function:
495497
496498
- ``informational`` (or a ``read_only`` workflow) — never gates.
497499
- ``strict`` — any failure gates.
498500
- ``regression_only`` **and the unset default** — fail only on a *regression*
499501
(was passing before, fails now); a check that was already red before the
500-
agent ran is not a regression and does not gate. This matches pipeline.py's
501-
unconditional ``build_ok = passed or not build_before`` — which is
502+
agent ran is not a regression and does not gate. This matches the legacy
503+
pipeline behavior of ``build_ok = passed or not build_before`` — which was
502504
regression-only for *every* task — so an unset gate agrees with the legacy
503505
path rather than defaulting to the stricter ``strict``.
504506
"""
@@ -521,7 +523,7 @@ def _handle_verify_build(step: Step, ctx: StepContext) -> StepOutcome:
521523
# was_passing_before defaults True (assume green-before, so a post-agent
522524
# failure IS a regression) — the same conservative default pipeline.py uses.
523525
was_passing_before = ctx.setup.build_before if ctx.setup else True
524-
status = _gate_status(
526+
status = gate_status(
525527
passed=passed,
526528
gate=step.gate,
527529
read_only=ctx.workflow.read_only,
@@ -543,7 +545,7 @@ def _handle_verify_lint(step: Step, ctx: StepContext) -> StepOutcome:
543545
repo_dir = ctx.setup.repo_dir if ctx.setup else ""
544546
passed = verify_lint(repo_dir)
545547
was_passing_before = ctx.setup.lint_before if ctx.setup else True
546-
status = _gate_status(
548+
status = gate_status(
547549
passed=passed,
548550
gate=step.gate,
549551
read_only=ctx.workflow.read_only,

0 commit comments

Comments
 (0)