From 7b2f03252e9b51c2c7b4124b17ec80e99c6e2d52 Mon Sep 17 00:00:00 2001
From: rioloc <criolo@redhat.com>
Date: Fri, 8 May 2026 16:40:19 +0200
Subject: [PATCH 1/6] feat: add SubprocessDriver for CRD-based agent execution
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce SubprocessDriver — a new AgentDriver that manages OpenShift
Proposal CR lifecycle via oc/kubectl CLI. The driver builds and applies
Proposal CRs, polls status conditions until terminal state, handles
auto-approval via ProposalApproval resources, and cleans up on completion.

Key design decisions:
- Works directly with CRD conditions (the stable API contract) instead
  of replicating the operator's internal DerivePhase() logic
- TerminalOutcome enum for driver-level terminal states
- TurnData extended with proposal_spec, proposal_status, description,
  and expected_proposal_status fields (backward-compatible, all Optional)

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/lightspeed_evaluation/core/models/data.py |  15 +
 .../pipeline/evaluation/subprocess_driver.py  | 296 ++++++++
 .../evaluation/test_subprocess_driver.py      | 662 ++++++++++++++++++
 3 files changed, 973 insertions(+)
 create mode 100644 src/lightspeed_evaluation/pipeline/evaluation/subprocess_driver.py
 create mode 100644 tests/unit/pipeline/evaluation/test_subprocess_driver.py

diff --git a/src/lightspeed_evaluation/core/models/data.py b/src/lightspeed_evaluation/core/models/data.py
index aeabbdc0..1509f989 100644
--- a/src/lightspeed_evaluation/core/models/data.py
+++ b/src/lightspeed_evaluation/core/models/data.py
@@ -104,6 +104,21 @@ class TurnData(StreamingMetricsMixin):
         default=None, description="Path to verify script for script-based evaluation"
     )
 
+    # Subprocess driver fields
+    description: Optional[str] = Field(
+        default=None, description="Human-readable label for reports"
+    )
+    proposal_spec: Optional[dict[str, Any]] = Field(
+        default=None, description="Inline proposal spec for CRD-based agents"
+    )
+    expected_proposal_status: Optional[dict[str, Any]] = Field(
+        default=None,
+        description="Expected proposal status for assertion metrics",
+    )
+    proposal_status: Optional[dict[str, Any]] = Field(
+        default=None, description="Raw CRD status populated by SubprocessDriver"
+    )
+
     # Set of turn metrics that don't pass the validation to ignore them later
     _invalid_metrics: set[str] = set()
 
diff --git a/src/lightspeed_evaluation/pipeline/evaluation/subprocess_driver.py b/src/lightspeed_evaluation/pipeline/evaluation/subprocess_driver.py
new file mode 100644
index 00000000..72694eba
--- /dev/null
+++ b/src/lightspeed_evaluation/pipeline/evaluation/subprocess_driver.py
@@ -0,0 +1,296 @@
+"""Subprocess-based agent driver for CRD lifecycle management."""
+
+from __future__ import annotations
+
+import json
+import logging
+import os
+import shutil
+import subprocess
+import time
+import uuid
+from enum import StrEnum
+from typing import Any, Literal, Optional
+
+from pydantic import BaseModel, ConfigDict, Field
+
+from lightspeed_evaluation.core.models import TurnData
+from lightspeed_evaluation.core.system.exceptions import ConfigurationError
+from lightspeed_evaluation.pipeline.evaluation.driver import AgentDriver
+
+logger = logging.getLogger(__name__)
+
+
+class TerminalOutcome(StrEnum):
+    """Terminal outcomes for a Proposal CR lifecycle.
+
+    These are driver-level labels, not CRD API values. The CRD exposes
+    conditions with statuses that the driver interprets:
+
+    - Analyzed:  True = analysis succeeded, False = failed, Unknown = in progress
+    - Executed:  True = execution succeeded, False = failed, Unknown = in progress
+    - Verified:  True = verification passed, False = failed, Unknown = in progress
+    - Denied:    True = user denied a step (terminal)
+    - Escalated: True = escalation complete (terminal), False = failed, Unknown = in progress
+
+    Special reason: RetryingExecution (Verified=False triggers retry, not failure).
+    """
+
+    COMPLETED = "Completed"
+    FAILED = "Failed"
+    DENIED = "Denied"
+    ESCALATED = "Escalated"
+
+
+CRD_GROUP = "agentic.openshift.io"
+CRD_VERSION = "v1alpha1"
+CRD_KIND = "Proposal"
+CRD_PLURAL = "proposals"
+CRD_API_VERSION = f"{CRD_GROUP}/{CRD_VERSION}"
+
+CLI_COMMAND_TIMEOUT = 30
+
+
+class SubprocessAgentConfig(BaseModel):
+    """Configuration for a subprocess-based CRD agent."""
+
+    model_config = ConfigDict(extra="forbid")
+
+    type: Literal["subprocess"] = "subprocess"
+    namespace: str
+    auto_approve: bool = True
+    cleanup_proposals: bool = True
+    timeout: int = Field(default=900, gt=0)
+    poll_interval: int = Field(default=2, gt=0)
+
+
+class SubprocessDriver(AgentDriver):
+    """Driver that manages Proposal CR lifecycle via oc/kubectl CLI."""
+
+    def __init__(self, config: dict[str, Any]) -> None:
+        """Initialize the subprocess driver."""
+        super().__init__(config)
+        self._config = SubprocessAgentConfig.model_validate(config)
+        self._cli = self._resolve_cli()
+
+    def validate_config(self, config: dict[str, Any]) -> None:
+        """Validate subprocess driver configuration."""
+        SubprocessAgentConfig.model_validate(config)
+        if not shutil.which("oc") and not shutil.which("kubectl"):
+            raise ConfigurationError("Neither 'oc' nor 'kubectl' found on PATH")
+
+    def execute_turn(
+        self, turn_data: TurnData, conversation_id: Optional[str] = None
+    ) -> tuple[Optional[str], Optional[str]]:
+        """Execute a Proposal CR lifecycle for a single turn."""
+        # Proposal CR lifecycle:
+        # 1. Build Proposal CR from TurnData fields
+        # 2. Apply CR to cluster via oc/kubectl
+        # 3. Poll status — read .status.conditions each interval
+        # 4. Auto-approve — create ProposalApproval when Analyzed=True
+        # 5. Terminal detection — break on completed/failed/denied/escalated
+        # 6. Amend TurnData — set response and proposal_status in-place
+        # 7. Cleanup — delete Proposal CR if cleanup_proposals enabled
+        suffix = uuid.uuid4().hex[:8]
+        cr_name = (
+            f"eval-{conversation_id}-{suffix}" if conversation_id else f"eval-{suffix}"
+        )
+        proposal_spec = turn_data.proposal_spec or {}
+        manifest = self._build_proposal_cr(turn_data, cr_name)
+
+        result = self._apply(manifest)
+        if result.returncode != 0:
+            return (
+                f"Failed to apply Proposal CR: {result.stderr.strip()}",
+                None,
+            )
+
+        approved = False
+        outcome: Optional[TerminalOutcome] = None
+        status_dict: dict[str, Any] = {}
+        start = time.monotonic()
+
+        while time.monotonic() - start < self._config.timeout:
+            time.sleep(self._config.poll_interval)
+            status_dict, err = self._get_status(cr_name)
+            if err:
+                self._cleanup(cr_name)
+                return (err, None)
+            conditions = status_dict.get("conditions", [])
+
+            if (
+                self._config.auto_approve
+                and not approved
+                and self._should_approve(conditions)
+            ):
+                self._apply(self._build_approval_cr(cr_name, proposal_spec))
+                approved = True
+
+            outcome = self._is_terminal(conditions, proposal_spec)
+            if outcome is not None:
+                break
+        else:
+            self._cleanup(cr_name)
+            return (
+                f"Timeout after {self._config.timeout}s for '{cr_name}'",
+                None,
+            )
+
+        turn_data.response = self._extract_summary(status_dict)
+        turn_data.proposal_status = status_dict
+        self._cleanup(cr_name)
+
+        if outcome == TerminalOutcome.COMPLETED:
+            return (None, None)
+        return (
+            f"Proposal '{cr_name}' terminated with outcome: {outcome}",
+            None,
+        )
+
+    @staticmethod
+    def _resolve_cli() -> str:
+        """Resolve oc or kubectl binary path."""
+        return shutil.which("oc") or shutil.which("kubectl") or ""
+
+    def _run_cli(
+        self,
+        args: list[str],
+        stdin: Optional[str] = None,
+    ) -> subprocess.CompletedProcess[str]:
+        """Run a CLI command and return the result."""
+        return subprocess.run(
+            [self._cli, *args],
+            input=stdin,
+            text=True,
+            capture_output=True,
+            env=os.environ.copy(),
+            timeout=CLI_COMMAND_TIMEOUT,
+            check=False,
+        )
+
+    def _apply(self, manifest: dict[str, Any]) -> subprocess.CompletedProcess[str]:
+        """Apply a CR manifest via stdin."""
+        return self._run_cli(["apply", "-f", "-"], stdin=json.dumps(manifest))
+
+    def _get_status(self, cr_name: str) -> tuple[dict[str, Any], Optional[str]]:
+        """Get Proposal CR status."""
+        result = self._run_cli(
+            [
+                "get",
+                CRD_PLURAL,
+                cr_name,
+                "-n",
+                self._config.namespace,
+                "-o",
+                "json",
+            ]
+        )
+        if result.returncode != 0:
+            return {}, f"Failed to get status for '{cr_name}': {result.stderr.strip()}"
+        try:
+            cr = json.loads(result.stdout)
+        except json.JSONDecodeError as exc:
+            return {}, f"Failed to parse status JSON for '{cr_name}': {exc}"
+        return cr.get("status", {}), None
+
+    def _delete(self, cr_name: str) -> None:
+        """Delete a Proposal CR."""
+        self._run_cli(
+            [
+                "delete",
+                CRD_PLURAL,
+                cr_name,
+                "-n",
+                self._config.namespace,
+                "--ignore-not-found",
+            ]
+        )
+
+    def _cleanup(self, cr_name: str) -> None:
+        """Delete the Proposal CR if cleanup is enabled."""
+        if not self._config.cleanup_proposals:
+            return
+        try:
+            self._delete(cr_name)
+        except Exception:  # pylint: disable=broad-exception-caught
+            logger.warning("Failed to clean up Proposal CR '%s'", cr_name)
+
+    def _build_proposal_cr(self, turn_data: TurnData, cr_name: str) -> dict[str, Any]:
+        """Build Proposal CR manifest from TurnData."""
+        spec: dict[str, Any] = {"request": turn_data.query}
+        if turn_data.proposal_spec:
+            spec.update(turn_data.proposal_spec)
+        spec.setdefault("analysis", {})
+        return {
+            "apiVersion": CRD_API_VERSION,
+            "kind": CRD_KIND,
+            "metadata": {
+                "name": cr_name,
+                "namespace": self._config.namespace,
+            },
+            "spec": spec,
+        }
+
+    def _build_approval_cr(
+        self, cr_name: str, proposal_spec: dict[str, Any]
+    ) -> dict[str, Any]:
+        """Build ProposalApproval CR manifest."""
+        stages: list[dict[str, Any]] = [
+            {"type": "Analysis", "decision": "Approved"},
+        ]
+        if "execution" in proposal_spec:
+            stages.append(
+                {
+                    "type": "Execution",
+                    "decision": "Approved",
+                    "execution": {"option": 0},
+                }
+            )
+        if "verification" in proposal_spec:
+            stages.append({"type": "Verification", "decision": "Approved"})
+        return {
+            "apiVersion": CRD_API_VERSION,
+            "kind": "ProposalApproval",
+            "metadata": {
+                "name": cr_name,
+                "namespace": self._config.namespace,
+            },
+            "spec": {"stages": stages},
+        }
+
+    @staticmethod
+    def _should_approve(conditions: list[dict[str, Any]]) -> bool:
+        """Check if conditions indicate the proposal is ready for approval."""
+        by_type = {c["type"]: c for c in conditions}
+        analyzed = by_type.get("Analyzed")
+        return analyzed is not None and analyzed.get("status") == "True"
+
+    @staticmethod
+    def _is_terminal(
+        conditions: list[dict[str, Any]], proposal_spec: dict[str, Any]
+    ) -> Optional[TerminalOutcome]:
+        """Check if conditions indicate a terminal state."""
+        by_type = {c["type"]: c for c in conditions}
+        if by_type.get("Denied", {}).get("status") == "True":
+            return TerminalOutcome.DENIED
+        if by_type.get("Escalated", {}).get("status") == "True":
+            return TerminalOutcome.ESCALATED
+        for c in conditions:
+            if c.get("status") == "False" and c.get("reason") != "RetryingExecution":
+                return TerminalOutcome.FAILED
+        if "verification" in proposal_spec:
+            last = "Verified"
+        elif "execution" in proposal_spec:
+            last = "Executed"
+        else:
+            last = "Analyzed"
+        if by_type.get(last, {}).get("status") == "True":
+            return TerminalOutcome.COMPLETED
+        return None
+
+    @staticmethod
+    def _extract_summary(status_dict: dict[str, Any]) -> str:
+        """Extract a human-readable summary from analysis results."""
+        conditions = status_dict.get("conditions", [])
+        messages = [c["message"] for c in conditions if c.get("message")]
+        return "; ".join(messages) if messages else "No summary available"
diff --git a/tests/unit/pipeline/evaluation/test_subprocess_driver.py b/tests/unit/pipeline/evaluation/test_subprocess_driver.py
new file mode 100644
index 00000000..e2176c90
--- /dev/null
+++ b/tests/unit/pipeline/evaluation/test_subprocess_driver.py
@@ -0,0 +1,662 @@
+# pylint: disable=protected-access
+
+"""Unit tests for subprocess agent driver module."""
+
+from typing import Any
+
+import pytest
+from pydantic import ValidationError
+from pytest_mock import MockerFixture
+
+from lightspeed_evaluation.core.models import TurnData
+from lightspeed_evaluation.core.system.exceptions import ConfigurationError
+from lightspeed_evaluation.pipeline.evaluation.subprocess_driver import (
+    SubprocessAgentConfig,
+    SubprocessDriver,
+    TerminalOutcome,
+)
+
+MODULE = "lightspeed_evaluation.pipeline.evaluation.subprocess_driver"
+
+VALID_CONFIG: dict[str, Any] = {
+    "type": "subprocess",
+    "namespace": "openshift-lightspeed",
+}
+
+SPEC_ANALYSIS_ONLY: dict[str, Any] = {"analysis": {}}
+SPEC_WITH_EXEC: dict[str, Any] = {"analysis": {}, "execution": {}}
+SPEC_FULL: dict[str, Any] = {"analysis": {}, "execution": {}, "verification": {}}
+
+
+def _cond(
+    cond_type: str,
+    status: str,
+    reason: str = "",
+    message: str = "",
+) -> dict[str, Any]:
+    """Build a CRD condition dict for tests."""
+    c: dict[str, Any] = {"type": cond_type, "status": status}
+    if reason:
+        c["reason"] = reason
+    if message:
+        c["message"] = message
+    return c
+
+
+# ── Config validation ────────────────────────────────────────────────
+
+
+class TestSubprocessAgentConfig:
+    """Unit tests for SubprocessAgentConfig Pydantic model."""
+
+    def test_valid_config_all_fields(self) -> None:
+        """Test config with all fields explicit."""
+        config = SubprocessAgentConfig.model_validate(
+            {
+                "type": "subprocess",
+                "namespace": "ns",
+                "auto_approve": False,
+                "cleanup_proposals": False,
+                "timeout": 60,
+                "poll_interval": 5,
+            }
+        )
+        assert config.namespace == "ns"
+        assert config.auto_approve is False
+        assert config.cleanup_proposals is False
+        assert config.timeout == 60
+        assert config.poll_interval == 5
+
+    def test_valid_config_defaults(self) -> None:
+        """Test config with only required fields uses defaults."""
+        config = SubprocessAgentConfig.model_validate(VALID_CONFIG)
+        assert config.auto_approve is True
+        assert config.cleanup_proposals is True
+        assert config.timeout == 900
+        assert config.poll_interval == 2
+
+    def test_missing_namespace(self) -> None:
+        """Test missing required namespace raises ValidationError."""
+        with pytest.raises(ValidationError):
+            SubprocessAgentConfig.model_validate({"type": "subprocess"})
+
+    def test_extra_field_rejected(self) -> None:
+        """Test extra fields raise ValidationError."""
+        with pytest.raises(ValidationError):
+            SubprocessAgentConfig.model_validate({**VALID_CONFIG, "extra": "bad"})
+
+    def test_invalid_timeout_zero(self) -> None:
+        """Test timeout=0 raises ValidationError."""
+        with pytest.raises(ValidationError):
+            SubprocessAgentConfig.model_validate({**VALID_CONFIG, "timeout": 0})
+
+    def test_invalid_timeout_negative(self) -> None:
+        """Test negative timeout raises ValidationError."""
+        with pytest.raises(ValidationError):
+            SubprocessAgentConfig.model_validate({**VALID_CONFIG, "timeout": -1})
+
+    def test_invalid_poll_interval_zero(self) -> None:
+        """Test poll_interval=0 raises ValidationError."""
+        with pytest.raises(ValidationError):
+            SubprocessAgentConfig.model_validate({**VALID_CONFIG, "poll_interval": 0})
+
+
+# ── Condition helpers ────────────────────────────────────────────────
+
+
+class TestConditionHelpers:
+    """Unit tests for SubprocessDriver._should_approve and _is_terminal."""
+
+    @pytest.mark.parametrize(
+        "conditions, expected",
+        [
+            ([], False),
+            ([_cond("Analyzed", "Unknown")], False),
+            ([_cond("Analyzed", "True")], True),
+            ([_cond("Analyzed", "False")], False),
+            ([_cond("Executed", "True")], False),
+        ],
+        ids=[
+            "no-conditions",
+            "analyzing",
+            "analyzed",
+            "analysis-failed",
+            "wrong-condition-type",
+        ],
+    )
+    def test_should_approve(
+        self, conditions: list[dict[str, Any]], expected: bool
+    ) -> None:
+        """Test _should_approve returns correct result."""
+        assert SubprocessDriver._should_approve(conditions) == expected
+
+    @pytest.mark.parametrize(
+        "conditions, spec, expected",
+        [
+            # Not terminal — keep polling
+            ([], SPEC_FULL, None),
+            ([_cond("Analyzed", "Unknown")], SPEC_FULL, None),
+            ([_cond("Analyzed", "True")], SPEC_FULL, None),
+            (
+                [_cond("Analyzed", "True"), _cond("Executed", "Unknown")],
+                SPEC_FULL,
+                None,
+            ),
+            (
+                [_cond("Analyzed", "True"), _cond("Executed", "True")],
+                SPEC_FULL,
+                None,
+            ),
+            (
+                [
+                    _cond("Analyzed", "True"),
+                    _cond("Executed", "True"),
+                    _cond("Verified", "Unknown"),
+                ],
+                SPEC_FULL,
+                None,
+            ),
+            # RetryingExecution — not a failure
+            (
+                [
+                    _cond("Analyzed", "True"),
+                    _cond("Executed", "True"),
+                    _cond("Verified", "False", "RetryingExecution"),
+                ],
+                SPEC_FULL,
+                None,
+            ),
+            # Completed — last expected step True
+            (
+                [_cond("Analyzed", "True")],
+                SPEC_ANALYSIS_ONLY,
+                TerminalOutcome.COMPLETED,
+            ),
+            (
+                [_cond("Analyzed", "True"), _cond("Executed", "True")],
+                SPEC_WITH_EXEC,
+                TerminalOutcome.COMPLETED,
+            ),
+            (
+                [
+                    _cond("Analyzed", "True"),
+                    _cond("Executed", "True"),
+                    _cond("Verified", "True"),
+                ],
+                SPEC_FULL,
+                TerminalOutcome.COMPLETED,
+            ),
+            # Failed — any condition False (no RetryingExecution)
+            (
+                [_cond("Analyzed", "False")],
+                SPEC_FULL,
+                TerminalOutcome.FAILED,
+            ),
+            (
+                [_cond("Analyzed", "True"), _cond("Executed", "False")],
+                SPEC_FULL,
+                TerminalOutcome.FAILED,
+            ),
+            (
+                [
+                    _cond("Analyzed", "True"),
+                    _cond("Executed", "True"),
+                    _cond("Verified", "False"),
+                ],
+                SPEC_FULL,
+                TerminalOutcome.FAILED,
+            ),
+            # Denied — priority over other conditions
+            ([_cond("Denied", "True")], SPEC_FULL, TerminalOutcome.DENIED),
+            (
+                [_cond("Denied", "True"), _cond("Analyzed", "True")],
+                SPEC_FULL,
+                TerminalOutcome.DENIED,
+            ),
+            # Escalated — priority over other conditions
+            (
+                [_cond("Escalated", "True")],
+                SPEC_FULL,
+                TerminalOutcome.ESCALATED,
+            ),
+            (
+                [_cond("Escalated", "True"), _cond("Verified", "True")],
+                SPEC_FULL,
+                TerminalOutcome.ESCALATED,
+            ),
+        ],
+        ids=[
+            "no-conditions",
+            "analyzing",
+            "analyzed-not-terminal-full",
+            "executing",
+            "executed-not-terminal-full",
+            "verifying",
+            "retrying-execution",
+            "completed-analysis-only",
+            "completed-with-exec",
+            "completed-full",
+            "failed-analysis",
+            "failed-execution",
+            "failed-verification",
+            "denied",
+            "denied-beats-analyzed",
+            "escalated",
+            "escalated-beats-verified",
+        ],
+    )
+    def test_is_terminal(
+        self,
+        conditions: list[dict[str, Any]],
+        spec: dict[str, Any],
+        expected: TerminalOutcome | None,
+    ) -> None:
+        """Test _is_terminal returns correct terminal outcome."""
+        assert SubprocessDriver._is_terminal(conditions, spec) == expected
+
+
+# ── CR manifest building ────────────────────────────────────────────
+
+
+class TestBuildCR:
+    """Unit tests for CR manifest construction."""
+
+    def _make_driver(self, mocker: MockerFixture) -> SubprocessDriver:
+        """Create a driver with mocked CLI resolution."""
+        mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
+        return SubprocessDriver(VALID_CONFIG)
+
+    def test_proposal_cr_query_only(self, mocker: MockerFixture) -> None:
+        """Test Proposal CR with query only, no proposal_spec."""
+        driver = self._make_driver(mocker)
+        turn = TurnData(turn_id="t1", query="Pod is crash looping")
+        cr = driver._build_proposal_cr(turn, "eval-abc123")
+
+        assert cr["apiVersion"] == "agentic.openshift.io/v1alpha1"
+        assert cr["kind"] == "Proposal"
+        assert cr["metadata"]["name"] == "eval-abc123"
+        assert cr["metadata"]["namespace"] == "openshift-lightspeed"
+        assert cr["spec"]["request"] == "Pod is crash looping"
+        assert cr["spec"]["analysis"] == {}
+
+    def test_proposal_cr_with_spec(self, mocker: MockerFixture) -> None:
+        """Test Proposal CR with full proposal_spec."""
+        driver = self._make_driver(mocker)
+        turn = TurnData(
+            turn_id="t1",
+            query="Pod is crash looping",
+            proposal_spec={
+                "targetNamespaces": ["production"],
+                "analysis": {"agent": "smart"},
+                "execution": {"agent": "default"},
+            },
+        )
+        cr = driver._build_proposal_cr(turn, "eval-abc123")
+
+        assert cr["spec"]["request"] == "Pod is crash looping"
+        assert cr["spec"]["targetNamespaces"] == ["production"]
+        assert cr["spec"]["analysis"] == {"agent": "smart"}
+        assert cr["spec"]["execution"] == {"agent": "default"}
+
+    def test_proposal_cr_defaults_analysis(self, mocker: MockerFixture) -> None:
+        """Test Proposal CR defaults analysis to empty dict."""
+        driver = self._make_driver(mocker)
+        turn = TurnData(
+            turn_id="t1",
+            query="Q",
+            proposal_spec={"execution": {}},
+        )
+        cr = driver._build_proposal_cr(turn, "eval-x")
+
+        assert cr["spec"]["analysis"] == {}
+
+    def test_approval_cr_analysis_only(self, mocker: MockerFixture) -> None:
+        """Test ProposalApproval with analysis-only spec."""
+        driver = self._make_driver(mocker)
+        cr = driver._build_approval_cr("eval-abc", SPEC_ANALYSIS_ONLY)
+
+        assert cr["kind"] == "ProposalApproval"
+        assert len(cr["spec"]["stages"]) == 1
+        assert cr["spec"]["stages"][0]["type"] == "Analysis"
+        assert cr["spec"]["stages"][0]["decision"] == "Approved"
+
+    def test_approval_cr_full(self, mocker: MockerFixture) -> None:
+        """Test ProposalApproval with all three stages."""
+        driver = self._make_driver(mocker)
+        cr = driver._build_approval_cr("eval-abc", SPEC_FULL)
+
+        assert len(cr["spec"]["stages"]) == 3
+        types = [s["type"] for s in cr["spec"]["stages"]]
+        assert types == ["Analysis", "Execution", "Verification"]
+        assert cr["spec"]["stages"][1]["execution"] == {"option": 0}
+
+
+# ── Extract summary ─────────────────────────────────────────────────
+
+
+class TestExtractSummary:
+    """Unit tests for SubprocessDriver._extract_summary."""
+
+    def test_with_messages(self) -> None:
+        """Test summary from condition messages."""
+        status = {
+            "conditions": [
+                _cond("Analyzed", "True", message="Analysis done"),
+                _cond("Executed", "True", message="Execution ok"),
+            ]
+        }
+        result = SubprocessDriver._extract_summary(status)
+        assert result == "Analysis done; Execution ok"
+
+    def test_no_messages(self) -> None:
+        """Test fallback when conditions have no messages."""
+        status = {"conditions": [_cond("Analyzed", "True")]}
+        assert SubprocessDriver._extract_summary(status) == "No summary available"
+
+    def test_empty_status(self) -> None:
+        """Test fallback for empty status dict."""
+        assert SubprocessDriver._extract_summary({}) == "No summary available"
+
+
+# ── Driver lifecycle ─────────────────────────────────────────────────
+
+
+class TestSubprocessDriver:
+    """Unit tests for SubprocessDriver init, validate_config, enabled, close."""
+
+    def test_validate_config_with_oc(self, mocker: MockerFixture) -> None:
+        """Test driver resolves oc as primary CLI."""
+        mock_shutil = mocker.patch(f"{MODULE}.shutil")
+        mock_shutil.which.side_effect = lambda cmd: (
+            "/usr/bin/oc" if cmd == "oc" else None
+        )
+        driver = SubprocessDriver(VALID_CONFIG)
+        assert driver._cli == "/usr/bin/oc"
+
+    def test_validate_config_kubectl_fallback(self, mocker: MockerFixture) -> None:
+        """Test driver falls back to kubectl when oc not found."""
+        mock_shutil = mocker.patch(f"{MODULE}.shutil")
+        mock_shutil.which.side_effect = lambda cmd: (
+            "/usr/bin/kubectl" if cmd == "kubectl" else None
+        )
+        driver = SubprocessDriver(VALID_CONFIG)
+        assert driver._cli == "/usr/bin/kubectl"
+
+    def test_validate_config_neither_found(self, mocker: MockerFixture) -> None:
+        """Test ConfigurationError when neither oc nor kubectl found."""
+        mocker.patch(f"{MODULE}.shutil").which.return_value = None
+        with pytest.raises(ConfigurationError, match="Neither 'oc' nor 'kubectl'"):
+            SubprocessDriver(VALID_CONFIG)
+
+    def test_validate_config_invalid(self, mocker: MockerFixture) -> None:
+        """Test ValidationError for missing required fields."""
+        mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
+        with pytest.raises(ValidationError):
+            SubprocessDriver({"type": "subprocess"})
+
+    def test_enabled_always_true(self, mocker: MockerFixture) -> None:
+        """Test enabled property defaults to True (inherited)."""
+        mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
+        driver = SubprocessDriver(VALID_CONFIG)
+        assert driver.enabled is True
+
+    def test_close_is_noop(self, mocker: MockerFixture) -> None:
+        """Test close does nothing (no persistent connections)."""
+        mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
+        driver = SubprocessDriver(VALID_CONFIG)
+        driver.close()
+
+
+# ── Cleanup ──────────────────────────────────────────────────────────
+
+
+class TestCleanup:
+    """Unit tests for SubprocessDriver._cleanup."""
+
+    def test_cleanup_calls_delete(self, mocker: MockerFixture) -> None:
+        """Test cleanup delegates to _delete when enabled."""
+        mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
+        driver = SubprocessDriver(VALID_CONFIG)
+        mock_delete = mocker.patch.object(driver, "_delete")
+
+        driver._cleanup("eval-test")
+
+        mock_delete.assert_called_once_with("eval-test")
+
+    def test_cleanup_disabled(self, mocker: MockerFixture) -> None:
+        """Test cleanup skips _delete when cleanup_proposals=False."""
+        mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
+        driver = SubprocessDriver({**VALID_CONFIG, "cleanup_proposals": False})
+        mock_delete = mocker.patch.object(driver, "_delete")
+
+        driver._cleanup("eval-test")
+
+        mock_delete.assert_not_called()
+
+    def test_cleanup_failure_logged(self, mocker: MockerFixture) -> None:
+        """Test cleanup logs warning on _delete failure."""
+        mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
+        driver = SubprocessDriver(VALID_CONFIG)
+        mocker.patch.object(driver, "_delete", side_effect=Exception("boom"))
+        mock_logger = mocker.patch(f"{MODULE}.logger")
+
+        driver._cleanup("eval-test")
+
+        mock_logger.warning.assert_called_once()
+
+
+# ── execute_turn ─────────────────────────────────────────────────────
+
+
+class TestExecuteTurn:
+    """Unit tests for SubprocessDriver.execute_turn."""
+
+    @pytest.fixture()
+    def driver(self, mocker: MockerFixture) -> SubprocessDriver:
+        """Create a driver with mocked shutil and uuid."""
+        mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
+        mocker.patch(f"{MODULE}.uuid").uuid4.return_value.hex = "abcd1234"
+        return SubprocessDriver({**VALID_CONFIG, "timeout": 10, "poll_interval": 1})
+
+    def test_happy_path_completed(
+        self, mocker: MockerFixture, driver: SubprocessDriver
+    ) -> None:
+        """Test successful full lifecycle returns no error."""
+        mock_time = mocker.patch(f"{MODULE}.time")
+        mock_time.monotonic.side_effect = [0.0, 1.0]
+
+        mock_apply = mocker.patch.object(driver, "_apply")
+        mock_apply.return_value = mocker.Mock(returncode=0)
+
+        terminal_status: dict[str, Any] = {
+            "conditions": [
+                _cond("Analyzed", "True", message="Analysis done"),
+                _cond("Executed", "True"),
+                _cond("Verified", "True", message="Passed"),
+            ]
+        }
+        mocker.patch.object(driver, "_get_status", return_value=(terminal_status, None))
+        mocker.patch.object(driver, "_cleanup")
+
+        turn = TurnData(turn_id="t1", query="Fix pod", proposal_spec=SPEC_FULL)
+        error, conv_id = driver.execute_turn(turn)
+
+        assert error is None
+        assert conv_id is None
+        assert turn.response == "Analysis done; Passed"
+        assert turn.proposal_status == terminal_status
+        driver._cleanup.assert_called_once_with("eval-abcd1234")
+
+    def test_apply_failure(
+        self, mocker: MockerFixture, driver: SubprocessDriver
+    ) -> None:
+        """Test apply failure returns error without polling."""
+        mock_apply = mocker.patch.object(driver, "_apply")
+        mock_apply.return_value = mocker.Mock(returncode=1, stderr="connection refused")
+
+        turn = TurnData(turn_id="t1", query="Q")
+        error, conv_id = driver.execute_turn(turn)
+
+        assert error == "Failed to apply Proposal CR: connection refused"
+        assert conv_id is None
+
+    def test_timeout(self, mocker: MockerFixture, driver: SubprocessDriver) -> None:
+        """Test timeout returns error when no terminal condition reached."""
+        mock_time = mocker.patch(f"{MODULE}.time")
+        mock_time.monotonic.side_effect = [0.0, 1.0, 11.0]
+
+        mock_apply = mocker.patch.object(driver, "_apply")
+        mock_apply.return_value = mocker.Mock(returncode=0)
+
+        non_terminal: dict[str, Any] = {"conditions": [_cond("Analyzed", "Unknown")]}
+        mocker.patch.object(driver, "_get_status", return_value=(non_terminal, None))
+        mocker.patch.object(driver, "_cleanup")
+
+        turn = TurnData(turn_id="t1", query="Q", proposal_spec=SPEC_FULL)
+        error, conv_id = driver.execute_turn(turn)
+
+        assert error is not None
+        assert "Timeout after 10s" in error
+        assert conv_id is None
+        driver._cleanup.assert_called_once()
+
+    def test_auto_approve(
+        self, mocker: MockerFixture, driver: SubprocessDriver
+    ) -> None:
+        """Test auto-approve triggers on Analyzed=True."""
+        mock_time = mocker.patch(f"{MODULE}.time")
+        mock_time.monotonic.side_effect = [0.0, 1.0, 2.0]
+
+        mock_apply = mocker.patch.object(driver, "_apply")
+        mock_apply.return_value = mocker.Mock(returncode=0)
+
+        poll_1: dict[str, Any] = {"conditions": [_cond("Analyzed", "True")]}
+        poll_2: dict[str, Any] = {
+            "conditions": [
+                _cond("Analyzed", "True"),
+                _cond("Executed", "True"),
+                _cond("Verified", "True"),
+            ]
+        }
+        mocker.patch.object(
+            driver,
+            "_get_status",
+            side_effect=[(poll_1, None), (poll_2, None)],
+        )
+        mocker.patch.object(driver, "_cleanup")
+
+        turn = TurnData(turn_id="t1", query="Q", proposal_spec=SPEC_FULL)
+        error, _ = driver.execute_turn(turn)
+
+        assert error is None
+        assert mock_apply.call_count == 2
+
+    def test_auto_approve_disabled(
+        self, mocker: MockerFixture, driver: SubprocessDriver
+    ) -> None:
+        """Test auto-approve skipped when disabled."""
+        driver._config.auto_approve = False
+
+        mock_time = mocker.patch(f"{MODULE}.time")
+        mock_time.monotonic.side_effect = [0.0, 1.0]
+
+        mock_apply = mocker.patch.object(driver, "_apply")
+        mock_apply.return_value = mocker.Mock(returncode=0)
+
+        status: dict[str, Any] = {"conditions": [_cond("Analyzed", "True")]}
+        mocker.patch.object(driver, "_get_status", return_value=(status, None))
+        mocker.patch.object(driver, "_cleanup")
+
+        turn = TurnData(turn_id="t1", query="Q")
+        driver.execute_turn(turn)
+
+        assert mock_apply.call_count == 1
+
+    def test_failed_terminal(
+        self, mocker: MockerFixture, driver: SubprocessDriver
+    ) -> None:
+        """Test failed proposal returns error with outcome."""
+        mock_time = mocker.patch(f"{MODULE}.time")
+        mock_time.monotonic.side_effect = [0.0, 1.0]
+
+        mock_apply = mocker.patch.object(driver, "_apply")
+        mock_apply.return_value = mocker.Mock(returncode=0)
+
+        status: dict[str, Any] = {
+            "conditions": [_cond("Analyzed", "False", message="LLM error")]
+        }
+        mocker.patch.object(driver, "_get_status", return_value=(status, None))
+        mocker.patch.object(driver, "_cleanup")
+
+        turn = TurnData(turn_id="t1", query="Q", proposal_spec=SPEC_FULL)
+        error, _ = driver.execute_turn(turn)
+
+        assert error is not None
+        assert "Failed" in error
+        assert turn.proposal_status == status
+        assert turn.response == "LLM error"
+
+    def test_denied_terminal(
+        self, mocker: MockerFixture, driver: SubprocessDriver
+    ) -> None:
+        """Test denied proposal returns error."""
+        mock_time = mocker.patch(f"{MODULE}.time")
+        mock_time.monotonic.side_effect = [0.0, 1.0]
+
+        mock_apply = mocker.patch.object(driver, "_apply")
+        mock_apply.return_value = mocker.Mock(returncode=0)
+
+        status: dict[str, Any] = {
+            "conditions": [_cond("Denied", "True", message="User denied")]
+        }
+        mocker.patch.object(driver, "_get_status", return_value=(status, None))
+        mocker.patch.object(driver, "_cleanup")
+
+        turn = TurnData(turn_id="t1", query="Q", proposal_spec=SPEC_FULL)
+        error, _ = driver.execute_turn(turn)
+
+        assert error is not None
+        assert "Denied" in error
+
+    def test_get_status_error(
+        self, mocker: MockerFixture, driver: SubprocessDriver
+    ) -> None:
+        """Test get_status error triggers cleanup and returns error."""
+        mock_time = mocker.patch(f"{MODULE}.time")
+        mock_time.monotonic.side_effect = [0.0, 1.0]
+
+        mock_apply = mocker.patch.object(driver, "_apply")
+        mock_apply.return_value = mocker.Mock(returncode=0)
+
+        mocker.patch.object(
+            driver,
+            "_get_status",
+            return_value=({}, "Failed to get status for 'eval-abcd1234'"),
+        )
+        mocker.patch.object(driver, "_cleanup")
+
+        turn = TurnData(turn_id="t1", query="Q")
+        error, _ = driver.execute_turn(turn)
+
+        assert error is not None
+        assert "Failed to get status" in error
+        driver._cleanup.assert_called_once()
+
+    def test_conversation_id_in_cr_name(
+        self, mocker: MockerFixture, driver: SubprocessDriver
+    ) -> None:
+        """Test conversation_id is included in CR name."""
+        mock_time = mocker.patch(f"{MODULE}.time")
+        mock_time.monotonic.side_effect = [0.0, 1.0]
+
+        mock_apply = mocker.patch.object(driver, "_apply")
+        mock_apply.return_value = mocker.Mock(returncode=0)
+
+        status: dict[str, Any] = {"conditions": [_cond("Analyzed", "True")]}
+        mocker.patch.object(driver, "_get_status", return_value=(status, None))
+        mocker.patch.object(driver, "_cleanup")
+
+        turn = TurnData(turn_id="t1", query="Q")
+        driver.execute_turn(turn, conversation_id="conv-42")
+
+        driver._cleanup.assert_called_once_with("eval-conv-42-abcd1234")

From dc1247fb0c975ca06a133fb703a026823343b780 Mon Sep 17 00:00:00 2001
From: rioloc <criolo@redhat.com>
Date: Fri, 15 May 2026 11:58:28 +0200
Subject: [PATCH 2/6] feat: integrate driver and rename to ProposalDriver

- Wire ProposalDriver into the evaluation pipeline: add "subprocess"
to SUPPORTED_AGENT_TYPES, move SubprocessAgentConfig to core/models,
register in AgentDriverRegistry via new registry module, and update
all exports. Fixes ProposalDriver.__init__ to accept enabled kwarg.
- Extend unit tests
- Add agentic integration tests

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 Makefile                                      |   3 +
 pytest.ini                                    |   3 +-
 src/lightspeed_evaluation/core/constants.py   |   2 +-
 .../core/metrics/custom/__init__.py           |   4 +
 .../core/metrics/custom/custom.py             |   4 +
 .../core/metrics/custom/proposal_eval.py      | 209 ++++++++
 .../core/models/__init__.py                   |   2 +
 .../core/models/agents.py                     |  15 +-
 src/lightspeed_evaluation/core/models/data.py |   2 +-
 .../core/system/validator.py                  |   4 +
 .../pipeline/evaluation/__init__.py           |   9 +-
 .../pipeline/evaluation/driver.py             | 336 ++++++++++++-
 .../pipeline/evaluation/pipeline.py           |   6 +-
 .../pipeline/evaluation/registry.py           |  41 ++
 .../pipeline/evaluation/subprocess_driver.py  | 296 ------------
 .../agentic/fixtures/namespace.yaml           |   4 +
 .../agentic/fixtures/oomkill-demo.yaml        |  24 +
 ...eanup_subprocess_fixtures-claude-vertex.sh |  26 +
 .../cleanup_subprocess_fixtures-openai.sh     |  27 ++
 ...setup_subprocess_fixtures-claude-vertex.sh | 138 ++++++
 .../setup_subprocess_fixtures-openai.sh       | 133 ++++++
 .../system-config-agents-subprocess.yaml      |  24 +
 .../test_evaluation_data_subprocess.yaml      |  62 +++
 .../integration/test_subprocess_evaluation.py | 293 ++++++++++++
 .../core/metrics/custom/test_proposal_eval.py | 452 ++++++++++++++++++
 tests/unit/pipeline/evaluation/test_driver.py |  19 +-
 ...cess_driver.py => test_proposal_driver.py} | 173 ++++---
 27 files changed, 1892 insertions(+), 419 deletions(-)
 create mode 100644 src/lightspeed_evaluation/core/metrics/custom/proposal_eval.py
 create mode 100644 src/lightspeed_evaluation/pipeline/evaluation/registry.py
 delete mode 100644 src/lightspeed_evaluation/pipeline/evaluation/subprocess_driver.py
 create mode 100644 tests/integration/agentic/fixtures/namespace.yaml
 create mode 100644 tests/integration/agentic/fixtures/oomkill-demo.yaml
 create mode 100755 tests/integration/agentic/scripts/cleanup_subprocess_fixtures-claude-vertex.sh
 create mode 100755 tests/integration/agentic/scripts/cleanup_subprocess_fixtures-openai.sh
 create mode 100755 tests/integration/agentic/scripts/setup_subprocess_fixtures-claude-vertex.sh
 create mode 100755 tests/integration/agentic/scripts/setup_subprocess_fixtures-openai.sh
 create mode 100644 tests/integration/system-config-agents-subprocess.yaml
 create mode 100644 tests/integration/test_evaluation_data_subprocess.yaml
 create mode 100644 tests/integration/test_subprocess_evaluation.py
 create mode 100644 tests/unit/core/metrics/custom/test_proposal_eval.py
 rename tests/unit/pipeline/evaluation/{test_subprocess_driver.py => test_proposal_driver.py} (81%)

diff --git a/Makefile b/Makefile
index 70b93793..44dfb228 100644
--- a/Makefile
+++ b/Makefile
@@ -91,6 +91,9 @@ test: install-deps-test ## Execute tests with Pytest
 e2e_tests: install-deps-test
 	uv run pytest tests/integration -v -m integration
 
+e2e_tests_agentic: install-deps-test ## Run agentic integration tests (requires live OpenShift cluster)
+	uv run pytest tests/integration -v -m agentic
+
 e2e_tests_lcore: e2e_tests
 	# May be changed in the future to different test suite
 	echo "LCORE e2e tests done"
diff --git a/pytest.ini b/pytest.ini
index e42afcb8..b1711c81 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -13,11 +13,12 @@ addopts =
     --tb=short
     --strict-markers
     --color=yes
-    -m "not integration"
+    -m "not integration and not agentic"
 
 # Markers
 markers =
     integration: Integration tests that require real config files and external dependencies
+    agentic: Agentic integration tests that require a live OpenShift cluster with agentic CRDs
 
 # Minimum version
 minversion = 6.0
diff --git a/src/lightspeed_evaluation/core/constants.py b/src/lightspeed_evaluation/core/constants.py
index 9d4c2071..b0214b7e 100644
--- a/src/lightspeed_evaluation/core/constants.py
+++ b/src/lightspeed_evaluation/core/constants.py
@@ -66,7 +66,7 @@
 
 # Agent Constants
 DEFAULT_AGENT_TYPE = "http_api"
-SUPPORTED_AGENT_TYPES = ["http_api"]
+SUPPORTED_AGENT_TYPES = ["http_api", "proposal"]
 
 # Frameworks that don't require judge LLM (NLP, script-based evaluations)
 NON_LLM_FRAMEWORKS = frozenset({"nlp", "script"})
diff --git a/src/lightspeed_evaluation/core/metrics/custom/__init__.py b/src/lightspeed_evaluation/core/metrics/custom/__init__.py
index 112dafa5..d081dd77 100644
--- a/src/lightspeed_evaluation/core/metrics/custom/__init__.py
+++ b/src/lightspeed_evaluation/core/metrics/custom/__init__.py
@@ -6,11 +6,15 @@
     ANSWER_CORRECTNESS_PROMPT,
     INTENT_EVALUATION_PROMPT,
 )
+from lightspeed_evaluation.core.metrics.custom.proposal_eval import (
+    evaluate_proposal_status,
+)
 from lightspeed_evaluation.core.metrics.custom.tool_eval import evaluate_tool_calls
 
 __all__ = [
     "CustomMetrics",
     "evaluate_keywords",
+    "evaluate_proposal_status",
     "evaluate_tool_calls",
     # Prompts
     "ANSWER_CORRECTNESS_PROMPT",
diff --git a/src/lightspeed_evaluation/core/metrics/custom/custom.py b/src/lightspeed_evaluation/core/metrics/custom/custom.py
index a3cd69a4..ad1fca8c 100644
--- a/src/lightspeed_evaluation/core/metrics/custom/custom.py
+++ b/src/lightspeed_evaluation/core/metrics/custom/custom.py
@@ -10,6 +10,9 @@
     INTENT_EVALUATION_PROMPT,
 )
 from lightspeed_evaluation.core.metrics.custom.keywords_eval import evaluate_keywords
+from lightspeed_evaluation.core.metrics.custom.proposal_eval import (
+    evaluate_proposal_status,
+)
 from lightspeed_evaluation.core.metrics.custom.tool_eval import evaluate_tool_calls
 from lightspeed_evaluation.core.models import EvaluationScope, TurnData
 from lightspeed_evaluation.core.system.exceptions import LLMError
@@ -44,6 +47,7 @@ def __init__(
             "answer_correctness": self._evaluate_answer_correctness,
             "intent_eval": self._evaluate_intent,
             "tool_eval": self._evaluate_tool_calls,
+            "proposal_status": evaluate_proposal_status,
         }
 
         print(f"✅ Custom Metrics initialized: {self.llm.model_name}")
diff --git a/src/lightspeed_evaluation/core/metrics/custom/proposal_eval.py b/src/lightspeed_evaluation/core/metrics/custom/proposal_eval.py
new file mode 100644
index 00000000..874b8129
--- /dev/null
+++ b/src/lightspeed_evaluation/core/metrics/custom/proposal_eval.py
@@ -0,0 +1,209 @@
+"""Proposal status evaluation for CRD-based agent workflows."""
+
+from typing import Any, Optional
+
+from lightspeed_evaluation.core.models import TurnData
+
+
+def _derive_phase(
+    conditions: list[dict[str, Any]],
+    proposal_spec: Optional[dict[str, Any]] = None,
+) -> str:
+    """Derive the terminal phase from CRD conditions.
+
+    Args:
+        conditions: List of condition dicts from proposal_status.
+        proposal_spec: Proposal spec to determine the last expected step.
+
+    Returns:
+        Phase string: Completed, Failed, Denied, Escalated, or InProgress.
+    """
+    by_type = {c["type"]: c for c in conditions}
+
+    if by_type.get("Denied", {}).get("status") == "True":
+        return "Denied"
+    if by_type.get("Escalated", {}).get("status") == "True":
+        return "Escalated"
+
+    for c in conditions:
+        if c.get("status") == "False" and c.get("reason") != "RetryingExecution":
+            return "Failed"
+
+    step_to_condition = {"verification": "Verified", "execution": "Executed"}
+    if proposal_spec:
+        last = next(
+            (cond for step, cond in step_to_condition.items() if step in proposal_spec),
+            "Analyzed",
+        )
+    else:
+        last = "Analyzed"
+        for step in ("Verified", "Executed", "Analyzed"):
+            if by_type.get(step, {}).get("status") == "True":
+                last = step
+                break
+
+    if by_type.get(last, {}).get("status") == "True":
+        return "Completed"
+
+    return "InProgress"
+
+
+def _check_phase(
+    expected: dict[str, Any],
+    conditions: list[dict[str, Any]],
+    proposal_spec: Optional[dict[str, Any]],
+) -> Optional[tuple[bool, str]]:
+    """Check exact phase match."""
+    phase = expected.get("phase")
+    if phase is None:
+        return None
+
+    actual = _derive_phase(conditions, proposal_spec)
+    if actual == phase:
+        return True, f"Phase matches: {actual}"
+    return False, f"Phase mismatch: expected '{phase}', got '{actual}'"
+
+
+def _check_phase_in(
+    expected: dict[str, Any],
+    conditions: list[dict[str, Any]],
+    proposal_spec: Optional[dict[str, Any]],
+) -> Optional[tuple[bool, str]]:
+    """Check phase membership in a list."""
+    phase_in = expected.get("phase_in")
+    if phase_in is None:
+        return None
+
+    actual = _derive_phase(conditions, proposal_spec)
+    if actual in phase_in:
+        return True, f"Phase '{actual}' in {phase_in}"
+    return False, f"Phase '{actual}' not in {phase_in}"
+
+
+def _check_conditions(
+    expected: dict[str, Any],
+    conditions: list[dict[str, Any]],
+) -> Optional[tuple[bool, str]]:
+    """Check specific condition assertions."""
+    expected_conditions = expected.get("conditions")
+    if expected_conditions is None:
+        return None
+
+    by_type = {c["type"]: c for c in conditions}
+
+    for exp_cond in expected_conditions:
+        cond_type = exp_cond.get("type")
+        if cond_type is None:
+            return False, "Condition assertion missing 'type' field"
+
+        actual_cond = by_type.get(cond_type)
+        if actual_cond is None:
+            return False, f"Condition '{cond_type}' not found in proposal status"
+
+        exp_status = exp_cond.get("status")
+        if exp_status is not None and actual_cond.get("status") != exp_status:
+            return (
+                False,
+                f"Condition '{cond_type}' status: "
+                f"expected '{exp_status}', got '{actual_cond.get('status')}'",
+            )
+
+        exp_reason = exp_cond.get("reason")
+        if exp_reason is not None and actual_cond.get("reason") != exp_reason:
+            return (
+                False,
+                f"Condition '{cond_type}' reason: "
+                f"expected '{exp_reason}', got '{actual_cond.get('reason')}'",
+            )
+
+    return True, "All condition assertions passed"
+
+
+def _check_verification(
+    expected: dict[str, Any],
+    conditions: list[dict[str, Any]],
+) -> Optional[tuple[bool, str]]:
+    """Check verification-specific assertions."""
+    verification = expected.get("verification")
+    if verification is None:
+        return None
+
+    by_type = {c["type"]: c for c in conditions}
+    verified = by_type.get("Verified")
+
+    if verified is None:
+        return False, "Verified condition not found in proposal status"
+
+    passed = verification.get("passed")
+    if passed is not None:
+        actual_passed = verified.get("status") == "True"
+        if actual_passed != passed:
+            return (
+                False,
+                f"Verification passed: expected {passed}, got {actual_passed}",
+            )
+
+    summary_contains = verification.get("summary_contains")
+    if summary_contains is not None:
+        message = verified.get("message", "")
+        if summary_contains.lower() not in message.lower():
+            return (
+                False,
+                f"Verification summary does not contain '{summary_contains}': "
+                f"got '{message[:200]}'",
+            )
+
+    return True, "Verification assertions passed"
+
+
+def evaluate_proposal_status(
+    _conv_data: Any,
+    _turn_idx: Optional[int],
+    turn_data: Optional[TurnData],
+    is_conversation: bool,
+) -> tuple[Optional[float], str]:
+    """Evaluate proposal status against expected assertions.
+
+    Args:
+        _conv_data: Conversation data (unused).
+        _turn_idx: Turn index (unused).
+        turn_data: Turn data with proposal_status and expected_proposal_status.
+        is_conversation: Whether this is conversation-level evaluation.
+
+    Returns:
+        Tuple of (score, reason). Score is 1.0 if all checks pass, 0.0 on
+        first failure, None if metric should be skipped.
+    """
+    if is_conversation:
+        return None, "Proposal status is a turn-level metric"
+
+    if turn_data is None:
+        return None, "TurnData is required for proposal status evaluation"
+
+    if not turn_data.expected_proposal_status:
+        return None, "No expected_proposal_status provided"
+
+    if not turn_data.proposal_status:
+        return 0.0, "proposal_status not populated by driver"
+
+    expected = turn_data.expected_proposal_status
+    conditions = turn_data.proposal_status.get("conditions", [])
+    proposal_spec = turn_data.proposal_spec
+
+    checks = [
+        _check_phase(expected, conditions, proposal_spec),
+        _check_phase_in(expected, conditions, proposal_spec),
+        _check_conditions(expected, conditions),
+        _check_verification(expected, conditions),
+    ]
+
+    reasons: list[str] = []
+    for result in checks:
+        if result is None:
+            continue
+        passed, reason = result
+        if not passed:
+            return 0.0, reason
+        reasons.append(reason)
+
+    return 1.0, "; ".join(reasons) if reasons else "All checks passed"
diff --git a/src/lightspeed_evaluation/core/models/__init__.py b/src/lightspeed_evaluation/core/models/__init__.py
index 588478c8..1af55c5e 100644
--- a/src/lightspeed_evaluation/core/models/__init__.py
+++ b/src/lightspeed_evaluation/core/models/__init__.py
@@ -6,6 +6,7 @@
     HttpApiAgentConfig,
     MCPHeadersConfig,
     MCPServerConfig,
+    ProposalAgentConfig,
 )
 from lightspeed_evaluation.core.models.api import (
     APIRequest,
@@ -57,6 +58,7 @@
     "HttpApiAgentConfig",
     "MCPHeadersConfig",
     "MCPServerConfig",
+    "ProposalAgentConfig",
     # Data models
     "TurnData",
     "EvaluationData",
diff --git a/src/lightspeed_evaluation/core/models/agents.py b/src/lightspeed_evaluation/core/models/agents.py
index 7ad20129..2b621f89 100644
--- a/src/lightspeed_evaluation/core/models/agents.py
+++ b/src/lightspeed_evaluation/core/models/agents.py
@@ -134,9 +134,22 @@ class HttpApiAgentConfig(HttpApiBaseFields):
     )
 
 
+class ProposalAgentConfig(BaseModel):
+    """Configuration for a Proposal CRD-based agent."""
+
+    model_config = ConfigDict(extra="forbid")
+
+    type: Literal["proposal"] = "proposal"
+    namespace: str
+    auto_approve: bool = True
+    cleanup_proposals: bool = True
+    timeout: int = Field(default=900, gt=0)
+    poll_interval: int = Field(default=2, gt=0)
+
+
 # Discriminated union of all agent config types; extend by adding new
 # config classes to support additional agent types.
-AgentDefinition = Union[HttpApiAgentConfig]
+AgentDefinition = Union[HttpApiAgentConfig, ProposalAgentConfig]
 
 
 class AgentDefaultConfig(BaseModel):
diff --git a/src/lightspeed_evaluation/core/models/data.py b/src/lightspeed_evaluation/core/models/data.py
index 1509f989..cc4489ee 100644
--- a/src/lightspeed_evaluation/core/models/data.py
+++ b/src/lightspeed_evaluation/core/models/data.py
@@ -116,7 +116,7 @@ class TurnData(StreamingMetricsMixin):
         description="Expected proposal status for assertion metrics",
     )
     proposal_status: Optional[dict[str, Any]] = Field(
-        default=None, description="Raw CRD status populated by SubprocessDriver"
+        default=None, description="Raw CRD status populated by ProposalDriver"
     )
 
     # Set of turn metrics that don't pass the validation to ignore them later
diff --git a/src/lightspeed_evaluation/core/system/validator.py b/src/lightspeed_evaluation/core/system/validator.py
index 4c797897..25f10d17 100644
--- a/src/lightspeed_evaluation/core/system/validator.py
+++ b/src/lightspeed_evaluation/core/system/validator.py
@@ -58,6 +58,10 @@
             "with 'tool_name', 'arguments', and optional 'result'"
         ),
     },
+    "custom:proposal_status": {
+        "required_fields": ["expected_proposal_status"],
+        "description": "requires 'expected_proposal_status' field",
+    },
     "script:action_eval": {
         "required_fields": ["verify_script"],
         "description": "requires 'verify_script' field",
diff --git a/src/lightspeed_evaluation/pipeline/evaluation/__init__.py b/src/lightspeed_evaluation/pipeline/evaluation/__init__.py
index 56d5afea..54ef9bf5 100644
--- a/src/lightspeed_evaluation/pipeline/evaluation/__init__.py
+++ b/src/lightspeed_evaluation/pipeline/evaluation/__init__.py
@@ -9,8 +9,9 @@
     from lightspeed_evaluation.pipeline.evaluation.amender import APIDataAmender
     from lightspeed_evaluation.pipeline.evaluation.driver import (
         AgentDriver,
-        AgentDriverRegistry,
+        ProposalDriver,
     )
+    from lightspeed_evaluation.pipeline.evaluation.registry import AgentDriverRegistry
     from lightspeed_evaluation.pipeline.evaluation.errors import EvaluationErrorHandler
     from lightspeed_evaluation.pipeline.evaluation.evaluator import MetricsEvaluator
     from lightspeed_evaluation.pipeline.evaluation.pipeline import EvaluationPipeline
@@ -32,7 +33,7 @@
         "AgentDriver",
     ),
     "AgentDriverRegistry": (
-        "lightspeed_evaluation.pipeline.evaluation.driver",
+        "lightspeed_evaluation.pipeline.evaluation.registry",
         "AgentDriverRegistry",
     ),
     "ConversationProcessor": (
@@ -47,6 +48,10 @@
         "lightspeed_evaluation.pipeline.evaluation.evaluator",
         "MetricsEvaluator",
     ),
+    "ProposalDriver": (
+        "lightspeed_evaluation.pipeline.evaluation.driver",
+        "ProposalDriver",
+    ),
 }
 
 __getattr__ = create_lazy_getattr(_LAZY_IMPORTS, __name__)
diff --git a/src/lightspeed_evaluation/pipeline/evaluation/driver.py b/src/lightspeed_evaluation/pipeline/evaluation/driver.py
index 7c86b9ae..35a15380 100644
--- a/src/lightspeed_evaluation/pipeline/evaluation/driver.py
+++ b/src/lightspeed_evaluation/pipeline/evaluation/driver.py
@@ -1,13 +1,26 @@
-"""Agent driver architecture for evaluation pipeline."""
+"""Agent driver implementations for evaluation pipeline."""
 
 from __future__ import annotations
 
+import json
 import logging
+import os
+import re
+import shutil
+import subprocess
+import time
+import uuid
 from abc import ABC, abstractmethod
+from enum import StrEnum
 from typing import Any, Optional, cast
 
 from lightspeed_evaluation.core.api import APIClient
-from lightspeed_evaluation.core.models import APIConfig, HttpApiAgentConfig, TurnData
+from lightspeed_evaluation.core.models import (
+    APIConfig,
+    HttpApiAgentConfig,
+    ProposalAgentConfig,
+    TurnData,
+)
 from lightspeed_evaluation.core.system.exceptions import ConfigurationError
 from lightspeed_evaluation.pipeline.evaluation.amender import APIDataAmender
 
@@ -84,30 +97,307 @@ def _create_api_client(self, config: HttpApiAgentConfig) -> Optional[APIClient]:
         return APIClient(api_config)
 
 
-AGENT_DRIVERS: dict[str, type[AgentDriver]] = {
-    "http_api": HttpApiDriver,
-}
+# ---------------------------------------------------------------------------
+# Proposal driver — CRD-based agent lifecycle via oc/kubectl CLI
+# ---------------------------------------------------------------------------
 
 
-class AgentDriverRegistry:  # pylint: disable=too-few-public-methods
-    """Registry for creating agent drivers."""
+class TerminalOutcome(StrEnum):
+    """Terminal outcomes for a Proposal CR lifecycle.
 
-    def __init__(self, drivers: Optional[dict[str, type[AgentDriver]]] = None) -> None:
-        """Initialize the driver registry."""
-        self._drivers = drivers or AGENT_DRIVERS
+    These are driver-level labels, not CRD API values. The CRD exposes
+    conditions with statuses that the driver interprets:
 
-    def create_driver(
-        self, agent_config: dict[str, Any], *, enabled: bool = True
-    ) -> AgentDriver:
-        """Create a driver instance from resolved agent configuration."""
-        agent_type = agent_config.get("type")
-        if not agent_type:
-            raise ConfigurationError("Agent config missing required 'type' field")
+    - Analyzed:  True = analysis succeeded, False = failed, Unknown = in progress
+    - Executed:  True = execution succeeded, False = failed, Unknown = in progress
+    - Verified:  True = verification passed, False = failed, Unknown = in progress
+    - Denied:    True = user denied a step (terminal)
+    - Escalated: True = escalation complete (terminal), False = failed, Unknown = in progress
 
-        driver_cls = self._drivers.get(agent_type)
-        if not driver_cls:
-            raise ConfigurationError(
-                f"Unsupported agent type '{agent_type}'. "
-                f"Supported types: {sorted(self._drivers)}"
+    Special reason: RetryingExecution (Verified=False triggers retry, not failure).
+    """
+
+    COMPLETED = "Completed"
+    FAILED = "Failed"
+    DENIED = "Denied"
+    ESCALATED = "Escalated"
+
+
+CRD_GROUP = "agentic.openshift.io"
+CRD_VERSION = "v1alpha1"
+CRD_KIND = "Proposal"
+CRD_PLURAL = "proposals"
+CRD_API_VERSION = f"{CRD_GROUP}/{CRD_VERSION}"
+
+CLI_COMMAND_TIMEOUT = 30
+
+
+class ProposalDriver(AgentDriver):
+    """Driver that manages Proposal CR lifecycle via oc/kubectl CLI."""
+
+    def __init__(self, config: dict[str, Any], *, enabled: bool = True) -> None:
+        """Initialize the proposal driver."""
+        super().__init__(config, enabled=enabled)
+        self._config = ProposalAgentConfig.model_validate(config)
+        self._cli = self._resolve_cli()
+
+    def validate_config(self, config: dict[str, Any]) -> None:
+        """Validate proposal driver configuration."""
+        ProposalAgentConfig.model_validate(config)
+        if not shutil.which("oc") and not shutil.which("kubectl"):
+            raise ConfigurationError("Neither 'oc' nor 'kubectl' found on PATH")
+
+    def execute_turn(
+        self, turn_data: TurnData, conversation_id: Optional[str] = None
+    ) -> tuple[Optional[str], Optional[str]]:
+        """Execute a Proposal CR lifecycle for a single turn."""
+        # Proposal CR lifecycle:
+        # 1. Build Proposal CR from TurnData fields
+        # 2. Apply CR to cluster via oc/kubectl
+        # 3. Poll status — read .status.conditions each interval
+        # 4. Auto-approve — create ProposalApproval when Analyzed=True
+        # 5. Terminal detection — break on completed/failed/denied/escalated
+        # 6. Amend TurnData — set response and proposal_status in-place
+        # 7. Cleanup — delete Proposal CR if cleanup_proposals enabled
+        suffix = uuid.uuid4().hex[:8]
+        safe_id = (
+            re.sub(r"[^a-z0-9-]", "", conversation_id.lower())[:50]
+            if conversation_id
+            else ""
+        )
+        cr_name = f"eval-{safe_id}-{suffix}" if safe_id else f"eval-{suffix}"
+        proposal_spec = turn_data.proposal_spec or {}
+        manifest = self._build_proposal_cr(turn_data, cr_name)
+
+        result = self._apply(manifest)
+        if result.returncode != 0:
+            return (
+                f"Failed to apply Proposal CR: {result.stderr.strip()}",
+                None,
+            )
+
+        if self._config.auto_approve:
+            err = self._approve_when_ready(cr_name, proposal_spec)
+            if err:
+                self._cleanup(cr_name)
+                return (err, None)
+
+        outcome: Optional[TerminalOutcome] = None
+        status_dict: dict[str, Any] = {}
+        start = time.monotonic()
+
+        while time.monotonic() - start < self._config.timeout:
+            time.sleep(self._config.poll_interval)
+            status_dict, err = self._get_status(cr_name)
+            if err:
+                self._cleanup(cr_name)
+                return (err, None)
+            conditions = status_dict.get("conditions", [])
+
+            outcome = self._is_terminal(conditions, proposal_spec)
+            if outcome is not None:
+                break
+        else:
+            self._cleanup(cr_name)
+            return (
+                f"Timeout after {self._config.timeout}s for '{cr_name}'",
+                None,
             )
-        return driver_cls(agent_config, enabled=enabled)
+
+        turn_data.response = self._extract_summary(status_dict)
+        turn_data.proposal_status = status_dict
+        self._cleanup(cr_name)
+
+        if outcome == TerminalOutcome.COMPLETED:
+            return (None, None)
+        return (
+            f"Proposal '{cr_name}' terminated with outcome: {outcome}",
+            None,
+        )
+
+    @staticmethod
+    def _resolve_cli() -> str:
+        """Resolve oc or kubectl binary path."""
+        return shutil.which("oc") or shutil.which("kubectl") or ""
+
+    def _run_cli(
+        self,
+        args: list[str],
+        stdin: Optional[str] = None,
+    ) -> subprocess.CompletedProcess[str]:
+        """Run a CLI command and return the result."""
+        return subprocess.run(
+            [self._cli, *args],
+            input=stdin,
+            text=True,
+            capture_output=True,
+            env=os.environ.copy(),
+            timeout=CLI_COMMAND_TIMEOUT,
+            check=False,
+        )
+
+    def _apply(self, manifest: dict[str, Any]) -> subprocess.CompletedProcess[str]:
+        """Apply a CR manifest via stdin."""
+        return self._run_cli(["apply", "-f", "-"], stdin=json.dumps(manifest))
+
+    def _get_status(self, cr_name: str) -> tuple[dict[str, Any], Optional[str]]:
+        """Get Proposal CR status."""
+        result = self._run_cli(
+            [
+                "get",
+                CRD_PLURAL,
+                cr_name,
+                "-n",
+                self._config.namespace,
+                "-o",
+                "json",
+            ]
+        )
+        if result.returncode != 0:
+            return {}, f"Failed to get status for '{cr_name}': {result.stderr.strip()}"
+        try:
+            cr = json.loads(result.stdout)
+        except json.JSONDecodeError as exc:
+            return {}, f"Failed to parse status JSON for '{cr_name}': {exc}"
+        return cr.get("status", {}), None
+
+    def _delete(self, cr_name: str) -> None:
+        """Delete a Proposal CR."""
+        self._run_cli(
+            [
+                "delete",
+                CRD_PLURAL,
+                cr_name,
+                "-n",
+                self._config.namespace,
+                "--ignore-not-found",
+            ]
+        )
+
+    def _cleanup(self, cr_name: str) -> None:
+        """Delete the Proposal CR if cleanup is enabled."""
+        if not self._config.cleanup_proposals:
+            return
+        try:
+            self._delete(cr_name)
+            logger.info("Cleaned up Proposal CR '%s'", cr_name)
+        except Exception:  # pylint: disable=broad-exception-caught
+            logger.warning("Failed to clean up Proposal CR '%s'", cr_name)
+
+    def _build_proposal_cr(self, turn_data: TurnData, cr_name: str) -> dict[str, Any]:
+        """Build Proposal CR manifest from TurnData."""
+        spec: dict[str, Any] = {"request": turn_data.query}
+        if turn_data.proposal_spec:
+            spec.update(turn_data.proposal_spec)
+        spec.setdefault("analysis", {})
+        return {
+            "apiVersion": CRD_API_VERSION,
+            "kind": CRD_KIND,
+            "metadata": {
+                "name": cr_name,
+                "namespace": self._config.namespace,
+            },
+            "spec": spec,
+        }
+
+    def _build_approval_cr(
+        self, cr_name: str, proposal_spec: dict[str, Any]
+    ) -> dict[str, Any]:
+        """Build ProposalApproval CR manifest."""
+        analysis_params: dict[str, Any] = {}
+        if "analysis" in proposal_spec and isinstance(proposal_spec["analysis"], dict):
+            agent = proposal_spec["analysis"].get("agent")
+            if agent:
+                analysis_params["agent"] = agent
+        if not analysis_params:
+            analysis_params["agent"] = "default"
+
+        stages: list[dict[str, Any]] = [
+            {"type": "Analysis", "decision": "Approved", "analysis": analysis_params},
+        ]
+        if "execution" in proposal_spec:
+            exec_params: dict[str, Any] = {"option": 0}
+            if isinstance(proposal_spec["execution"], dict):
+                agent = proposal_spec["execution"].get("agent")
+                if agent:
+                    exec_params["agent"] = agent
+            stages.append(
+                {
+                    "type": "Execution",
+                    "decision": "Approved",
+                    "execution": exec_params,
+                }
+            )
+        if "verification" in proposal_spec:
+            verif_params: dict[str, Any] = {}
+            if isinstance(proposal_spec["verification"], dict):
+                agent = proposal_spec["verification"].get("agent")
+                if agent:
+                    verif_params["agent"] = agent
+            if not verif_params:
+                verif_params["agent"] = "default"
+            stages.append(
+                {
+                    "type": "Verification",
+                    "decision": "Approved",
+                    "verification": verif_params,
+                }
+            )
+        return {
+            "apiVersion": CRD_API_VERSION,
+            "kind": "ProposalApproval",
+            "metadata": {
+                "name": cr_name,
+                "namespace": self._config.namespace,
+            },
+            "spec": {"stages": stages},
+        }
+
+    def _approve_when_ready(
+        self, cr_name: str, proposal_spec: dict[str, Any]
+    ) -> Optional[str]:
+        """Wait for Proposal CR to exist on the cluster, then approve all stages."""
+        start = time.monotonic()
+        while time.monotonic() - start < self._config.timeout:
+            _, err = self._get_status(cr_name)
+            if err is None:
+                break
+            time.sleep(self._config.poll_interval)
+        else:
+            return f"Proposal '{cr_name}' not found within {self._config.timeout}s"
+
+        approval = self._build_approval_cr(cr_name, proposal_spec)
+        result = self._apply(approval)
+        if result.returncode != 0:
+            return f"Failed to apply ProposalApproval: {result.stderr.strip()}"
+        return None
+
+    @staticmethod
+    def _is_terminal(
+        conditions: list[dict[str, Any]], proposal_spec: dict[str, Any]
+    ) -> Optional[TerminalOutcome]:
+        """Check if conditions indicate a terminal state."""
+        by_type = {c["type"]: c for c in conditions}
+        if by_type.get("Denied", {}).get("status") == "True":
+            return TerminalOutcome.DENIED
+        if by_type.get("Escalated", {}).get("status") == "True":
+            return TerminalOutcome.ESCALATED
+        for c in conditions:
+            if c.get("status") == "False" and c.get("reason") != "RetryingExecution":
+                return TerminalOutcome.FAILED
+        if "verification" in proposal_spec:
+            last = "Verified"
+        elif "execution" in proposal_spec:
+            last = "Executed"
+        else:
+            last = "Analyzed"
+        if by_type.get(last, {}).get("status") == "True":
+            return TerminalOutcome.COMPLETED
+        return None
+
+    @staticmethod
+    def _extract_summary(status_dict: dict[str, Any]) -> str:
+        """Extract a human-readable summary from analysis results."""
+        conditions = status_dict.get("conditions", [])
+        messages = [c["message"] for c in conditions if c.get("message")]
+        return "; ".join(messages) if messages else "No summary available"
diff --git a/src/lightspeed_evaluation/pipeline/evaluation/pipeline.py b/src/lightspeed_evaluation/pipeline/evaluation/pipeline.py
index 98f10d29..d211c560 100644
--- a/src/lightspeed_evaluation/pipeline/evaluation/pipeline.py
+++ b/src/lightspeed_evaluation/pipeline/evaluation/pipeline.py
@@ -28,10 +28,8 @@
     ConfigurationError,
     StorageError,
 )
-from lightspeed_evaluation.pipeline.evaluation.driver import (
-    AgentDriver,
-    AgentDriverRegistry,
-)
+from lightspeed_evaluation.pipeline.evaluation.driver import AgentDriver
+from lightspeed_evaluation.pipeline.evaluation.registry import AgentDriverRegistry
 from lightspeed_evaluation.pipeline.evaluation.errors import EvaluationErrorHandler
 from lightspeed_evaluation.pipeline.evaluation.evaluator import MetricsEvaluator
 from lightspeed_evaluation.pipeline.evaluation.processor import (
diff --git a/src/lightspeed_evaluation/pipeline/evaluation/registry.py b/src/lightspeed_evaluation/pipeline/evaluation/registry.py
new file mode 100644
index 00000000..96c0e867
--- /dev/null
+++ b/src/lightspeed_evaluation/pipeline/evaluation/registry.py
@@ -0,0 +1,41 @@
+"""Agent driver registry for evaluation pipeline."""
+
+from __future__ import annotations
+
+from typing import Any, Optional
+
+from lightspeed_evaluation.core.system.exceptions import ConfigurationError
+from lightspeed_evaluation.pipeline.evaluation.driver import (
+    AgentDriver,
+    HttpApiDriver,
+    ProposalDriver,
+)
+
+AGENT_DRIVERS: dict[str, type[AgentDriver]] = {
+    "http_api": HttpApiDriver,
+    "proposal": ProposalDriver,
+}
+
+
+class AgentDriverRegistry:  # pylint: disable=too-few-public-methods
+    """Registry for creating agent drivers."""
+
+    def __init__(self, drivers: Optional[dict[str, type[AgentDriver]]] = None) -> None:
+        """Initialize the driver registry."""
+        self._drivers = drivers or AGENT_DRIVERS
+
+    def create_driver(
+        self, agent_config: dict[str, Any], *, enabled: bool = True
+    ) -> AgentDriver:
+        """Create a driver instance from resolved agent configuration."""
+        agent_type = agent_config.get("type")
+        if not agent_type:
+            raise ConfigurationError("Agent config missing required 'type' field")
+
+        driver_cls = self._drivers.get(agent_type)
+        if not driver_cls:
+            raise ConfigurationError(
+                f"Unsupported agent type '{agent_type}'. "
+                f"Supported types: {sorted(self._drivers)}"
+            )
+        return driver_cls(agent_config, enabled=enabled)
diff --git a/src/lightspeed_evaluation/pipeline/evaluation/subprocess_driver.py b/src/lightspeed_evaluation/pipeline/evaluation/subprocess_driver.py
deleted file mode 100644
index 72694eba..00000000
--- a/src/lightspeed_evaluation/pipeline/evaluation/subprocess_driver.py
+++ /dev/null
@@ -1,296 +0,0 @@
-"""Subprocess-based agent driver for CRD lifecycle management."""
-
-from __future__ import annotations
-
-import json
-import logging
-import os
-import shutil
-import subprocess
-import time
-import uuid
-from enum import StrEnum
-from typing import Any, Literal, Optional
-
-from pydantic import BaseModel, ConfigDict, Field
-
-from lightspeed_evaluation.core.models import TurnData
-from lightspeed_evaluation.core.system.exceptions import ConfigurationError
-from lightspeed_evaluation.pipeline.evaluation.driver import AgentDriver
-
-logger = logging.getLogger(__name__)
-
-
-class TerminalOutcome(StrEnum):
-    """Terminal outcomes for a Proposal CR lifecycle.
-
-    These are driver-level labels, not CRD API values. The CRD exposes
-    conditions with statuses that the driver interprets:
-
-    - Analyzed:  True = analysis succeeded, False = failed, Unknown = in progress
-    - Executed:  True = execution succeeded, False = failed, Unknown = in progress
-    - Verified:  True = verification passed, False = failed, Unknown = in progress
-    - Denied:    True = user denied a step (terminal)
-    - Escalated: True = escalation complete (terminal), False = failed, Unknown = in progress
-
-    Special reason: RetryingExecution (Verified=False triggers retry, not failure).
-    """
-
-    COMPLETED = "Completed"
-    FAILED = "Failed"
-    DENIED = "Denied"
-    ESCALATED = "Escalated"
-
-
-CRD_GROUP = "agentic.openshift.io"
-CRD_VERSION = "v1alpha1"
-CRD_KIND = "Proposal"
-CRD_PLURAL = "proposals"
-CRD_API_VERSION = f"{CRD_GROUP}/{CRD_VERSION}"
-
-CLI_COMMAND_TIMEOUT = 30
-
-
-class SubprocessAgentConfig(BaseModel):
-    """Configuration for a subprocess-based CRD agent."""
-
-    model_config = ConfigDict(extra="forbid")
-
-    type: Literal["subprocess"] = "subprocess"
-    namespace: str
-    auto_approve: bool = True
-    cleanup_proposals: bool = True
-    timeout: int = Field(default=900, gt=0)
-    poll_interval: int = Field(default=2, gt=0)
-
-
-class SubprocessDriver(AgentDriver):
-    """Driver that manages Proposal CR lifecycle via oc/kubectl CLI."""
-
-    def __init__(self, config: dict[str, Any]) -> None:
-        """Initialize the subprocess driver."""
-        super().__init__(config)
-        self._config = SubprocessAgentConfig.model_validate(config)
-        self._cli = self._resolve_cli()
-
-    def validate_config(self, config: dict[str, Any]) -> None:
-        """Validate subprocess driver configuration."""
-        SubprocessAgentConfig.model_validate(config)
-        if not shutil.which("oc") and not shutil.which("kubectl"):
-            raise ConfigurationError("Neither 'oc' nor 'kubectl' found on PATH")
-
-    def execute_turn(
-        self, turn_data: TurnData, conversation_id: Optional[str] = None
-    ) -> tuple[Optional[str], Optional[str]]:
-        """Execute a Proposal CR lifecycle for a single turn."""
-        # Proposal CR lifecycle:
-        # 1. Build Proposal CR from TurnData fields
-        # 2. Apply CR to cluster via oc/kubectl
-        # 3. Poll status — read .status.conditions each interval
-        # 4. Auto-approve — create ProposalApproval when Analyzed=True
-        # 5. Terminal detection — break on completed/failed/denied/escalated
-        # 6. Amend TurnData — set response and proposal_status in-place
-        # 7. Cleanup — delete Proposal CR if cleanup_proposals enabled
-        suffix = uuid.uuid4().hex[:8]
-        cr_name = (
-            f"eval-{conversation_id}-{suffix}" if conversation_id else f"eval-{suffix}"
-        )
-        proposal_spec = turn_data.proposal_spec or {}
-        manifest = self._build_proposal_cr(turn_data, cr_name)
-
-        result = self._apply(manifest)
-        if result.returncode != 0:
-            return (
-                f"Failed to apply Proposal CR: {result.stderr.strip()}",
-                None,
-            )
-
-        approved = False
-        outcome: Optional[TerminalOutcome] = None
-        status_dict: dict[str, Any] = {}
-        start = time.monotonic()
-
-        while time.monotonic() - start < self._config.timeout:
-            time.sleep(self._config.poll_interval)
-            status_dict, err = self._get_status(cr_name)
-            if err:
-                self._cleanup(cr_name)
-                return (err, None)
-            conditions = status_dict.get("conditions", [])
-
-            if (
-                self._config.auto_approve
-                and not approved
-                and self._should_approve(conditions)
-            ):
-                self._apply(self._build_approval_cr(cr_name, proposal_spec))
-                approved = True
-
-            outcome = self._is_terminal(conditions, proposal_spec)
-            if outcome is not None:
-                break
-        else:
-            self._cleanup(cr_name)
-            return (
-                f"Timeout after {self._config.timeout}s for '{cr_name}'",
-                None,
-            )
-
-        turn_data.response = self._extract_summary(status_dict)
-        turn_data.proposal_status = status_dict
-        self._cleanup(cr_name)
-
-        if outcome == TerminalOutcome.COMPLETED:
-            return (None, None)
-        return (
-            f"Proposal '{cr_name}' terminated with outcome: {outcome}",
-            None,
-        )
-
-    @staticmethod
-    def _resolve_cli() -> str:
-        """Resolve oc or kubectl binary path."""
-        return shutil.which("oc") or shutil.which("kubectl") or ""
-
-    def _run_cli(
-        self,
-        args: list[str],
-        stdin: Optional[str] = None,
-    ) -> subprocess.CompletedProcess[str]:
-        """Run a CLI command and return the result."""
-        return subprocess.run(
-            [self._cli, *args],
-            input=stdin,
-            text=True,
-            capture_output=True,
-            env=os.environ.copy(),
-            timeout=CLI_COMMAND_TIMEOUT,
-            check=False,
-        )
-
-    def _apply(self, manifest: dict[str, Any]) -> subprocess.CompletedProcess[str]:
-        """Apply a CR manifest via stdin."""
-        return self._run_cli(["apply", "-f", "-"], stdin=json.dumps(manifest))
-
-    def _get_status(self, cr_name: str) -> tuple[dict[str, Any], Optional[str]]:
-        """Get Proposal CR status."""
-        result = self._run_cli(
-            [
-                "get",
-                CRD_PLURAL,
-                cr_name,
-                "-n",
-                self._config.namespace,
-                "-o",
-                "json",
-            ]
-        )
-        if result.returncode != 0:
-            return {}, f"Failed to get status for '{cr_name}': {result.stderr.strip()}"
-        try:
-            cr = json.loads(result.stdout)
-        except json.JSONDecodeError as exc:
-            return {}, f"Failed to parse status JSON for '{cr_name}': {exc}"
-        return cr.get("status", {}), None
-
-    def _delete(self, cr_name: str) -> None:
-        """Delete a Proposal CR."""
-        self._run_cli(
-            [
-                "delete",
-                CRD_PLURAL,
-                cr_name,
-                "-n",
-                self._config.namespace,
-                "--ignore-not-found",
-            ]
-        )
-
-    def _cleanup(self, cr_name: str) -> None:
-        """Delete the Proposal CR if cleanup is enabled."""
-        if not self._config.cleanup_proposals:
-            return
-        try:
-            self._delete(cr_name)
-        except Exception:  # pylint: disable=broad-exception-caught
-            logger.warning("Failed to clean up Proposal CR '%s'", cr_name)
-
-    def _build_proposal_cr(self, turn_data: TurnData, cr_name: str) -> dict[str, Any]:
-        """Build Proposal CR manifest from TurnData."""
-        spec: dict[str, Any] = {"request": turn_data.query}
-        if turn_data.proposal_spec:
-            spec.update(turn_data.proposal_spec)
-        spec.setdefault("analysis", {})
-        return {
-            "apiVersion": CRD_API_VERSION,
-            "kind": CRD_KIND,
-            "metadata": {
-                "name": cr_name,
-                "namespace": self._config.namespace,
-            },
-            "spec": spec,
-        }
-
-    def _build_approval_cr(
-        self, cr_name: str, proposal_spec: dict[str, Any]
-    ) -> dict[str, Any]:
-        """Build ProposalApproval CR manifest."""
-        stages: list[dict[str, Any]] = [
-            {"type": "Analysis", "decision": "Approved"},
-        ]
-        if "execution" in proposal_spec:
-            stages.append(
-                {
-                    "type": "Execution",
-                    "decision": "Approved",
-                    "execution": {"option": 0},
-                }
-            )
-        if "verification" in proposal_spec:
-            stages.append({"type": "Verification", "decision": "Approved"})
-        return {
-            "apiVersion": CRD_API_VERSION,
-            "kind": "ProposalApproval",
-            "metadata": {
-                "name": cr_name,
-                "namespace": self._config.namespace,
-            },
-            "spec": {"stages": stages},
-        }
-
-    @staticmethod
-    def _should_approve(conditions: list[dict[str, Any]]) -> bool:
-        """Check if conditions indicate the proposal is ready for approval."""
-        by_type = {c["type"]: c for c in conditions}
-        analyzed = by_type.get("Analyzed")
-        return analyzed is not None and analyzed.get("status") == "True"
-
-    @staticmethod
-    def _is_terminal(
-        conditions: list[dict[str, Any]], proposal_spec: dict[str, Any]
-    ) -> Optional[TerminalOutcome]:
-        """Check if conditions indicate a terminal state."""
-        by_type = {c["type"]: c for c in conditions}
-        if by_type.get("Denied", {}).get("status") == "True":
-            return TerminalOutcome.DENIED
-        if by_type.get("Escalated", {}).get("status") == "True":
-            return TerminalOutcome.ESCALATED
-        for c in conditions:
-            if c.get("status") == "False" and c.get("reason") != "RetryingExecution":
-                return TerminalOutcome.FAILED
-        if "verification" in proposal_spec:
-            last = "Verified"
-        elif "execution" in proposal_spec:
-            last = "Executed"
-        else:
-            last = "Analyzed"
-        if by_type.get(last, {}).get("status") == "True":
-            return TerminalOutcome.COMPLETED
-        return None
-
-    @staticmethod
-    def _extract_summary(status_dict: dict[str, Any]) -> str:
-        """Extract a human-readable summary from analysis results."""
-        conditions = status_dict.get("conditions", [])
-        messages = [c["message"] for c in conditions if c.get("message")]
-        return "; ".join(messages) if messages else "No summary available"
diff --git a/tests/integration/agentic/fixtures/namespace.yaml b/tests/integration/agentic/fixtures/namespace.yaml
new file mode 100644
index 00000000..a39be1c9
--- /dev/null
+++ b/tests/integration/agentic/fixtures/namespace.yaml
@@ -0,0 +1,4 @@
+apiVersion: v1
+kind: Namespace
+metadata:
+  name: lightspeed-evaluation-test
diff --git a/tests/integration/agentic/fixtures/oomkill-demo.yaml b/tests/integration/agentic/fixtures/oomkill-demo.yaml
new file mode 100644
index 00000000..28420036
--- /dev/null
+++ b/tests/integration/agentic/fixtures/oomkill-demo.yaml
@@ -0,0 +1,24 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: oomkill-demo
+  namespace: lightspeed-evaluation-test
+spec:
+  replicas: 1
+  selector:
+    matchLabels:
+      app: oomkill-demo
+  template:
+    metadata:
+      labels:
+        app: oomkill-demo
+    spec:
+      containers:
+        - name: stress
+          image: polinux/stress-ng
+          command: ["stress-ng", "--vm", "1", "--vm-bytes", "256M", "--vm-hang", "0"]
+          resources:
+            requests:
+              memory: "64Mi"
+            limits:
+              memory: "64Mi"
diff --git a/tests/integration/agentic/scripts/cleanup_subprocess_fixtures-claude-vertex.sh b/tests/integration/agentic/scripts/cleanup_subprocess_fixtures-claude-vertex.sh
new file mode 100755
index 00000000..4f84d111
--- /dev/null
+++ b/tests/integration/agentic/scripts/cleanup_subprocess_fixtures-claude-vertex.sh
@@ -0,0 +1,26 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Tear down integration test resources. Only deletes "eval-" prefixed operator
+# resources to avoid touching anything that isn't ours.
+
+OPERATOR_NS="openshift-lightspeed"
+TEST_NS="lightspeed-evaluation-test"
+
+echo "Cleaning up integration test resources..."
+
+# Delete test workload + any leftover Proposals in test namespace
+oc delete deployment oomkill-demo -n "$TEST_NS" --ignore-not-found
+oc delete proposals --all -n "$TEST_NS" --ignore-not-found
+oc delete proposalapprovals --all -n "$TEST_NS" --ignore-not-found
+
+# Delete prefixed operator resources (reverse order of creation)
+oc delete sandboxtemplate eval-lightspeed-agent -n "$OPERATOR_NS" --ignore-not-found
+oc delete agent eval-default --ignore-not-found
+oc delete llmprovider eval-vertex-ai --ignore-not-found
+oc delete secret eval-llm-credentials -n "$OPERATOR_NS" --ignore-not-found
+
+# Delete test namespace (cascades remaining namespaced resources)
+oc delete namespace "$TEST_NS" --ignore-not-found
+
+echo "Cleanup complete."
diff --git a/tests/integration/agentic/scripts/cleanup_subprocess_fixtures-openai.sh b/tests/integration/agentic/scripts/cleanup_subprocess_fixtures-openai.sh
new file mode 100755
index 00000000..ad4b77b7
--- /dev/null
+++ b/tests/integration/agentic/scripts/cleanup_subprocess_fixtures-openai.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Tear down integration test resources deployed by setup_subprocess_fixtures.sh
+# (OpenAI provider). Only deletes "eval-" prefixed operator resources to avoid
+# touching anything that isn't ours.
+
+OPERATOR_NS="openshift-lightspeed"
+TEST_NS="lightspeed-evaluation-test"
+
+echo "Cleaning up integration test resources..."
+
+# Delete test workload + any leftover Proposals in test namespace
+oc delete deployment oomkill-demo -n "$TEST_NS" --ignore-not-found
+oc delete proposals --all -n "$TEST_NS" --ignore-not-found
+oc delete proposalapprovals --all -n "$TEST_NS" --ignore-not-found
+
+# Delete prefixed operator resources (reverse order of creation)
+oc delete sandboxtemplate eval-lightspeed-agent -n "$OPERATOR_NS" --ignore-not-found
+oc delete agent eval-default --ignore-not-found
+oc delete llmprovider eval-openai --ignore-not-found
+oc delete secret eval-llm-credentials -n "$OPERATOR_NS" --ignore-not-found
+
+# Delete test namespace (cascades remaining namespaced resources)
+oc delete namespace "$TEST_NS" --ignore-not-found
+
+echo "Cleanup complete."
diff --git a/tests/integration/agentic/scripts/setup_subprocess_fixtures-claude-vertex.sh b/tests/integration/agentic/scripts/setup_subprocess_fixtures-claude-vertex.sh
new file mode 100755
index 00000000..f7e46a1e
--- /dev/null
+++ b/tests/integration/agentic/scripts/setup_subprocess_fixtures-claude-vertex.sh
@@ -0,0 +1,138 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Deploy agentic infrastructure + OOMKill test workload for integration tests.
+# All operator-level resources use an "eval-" prefix to avoid conflicts with
+# existing cluster resources.
+#
+# Required env vars:
+#   GCP_CREDENTIALS_FILE          — path to GCP credentials JSON file
+#                                   (default: ~/.config/gcloud/application_default_credentials.json)
+#   ANTHROPIC_VERTEX_PROJECT_ID   — GCP project ID for Vertex AI
+#   SANDBOX_IMAGE                 — sandbox container image URL
+#
+# Optional env vars:
+#   CLOUD_ML_REGION  — default: global
+#   AGENT_MODEL      — default: claude-opus-4-6
+
+OPERATOR_NS="openshift-lightspeed"
+TEST_NS="lightspeed-evaluation-test"
+GCP_CREDENTIALS_FILE="${GCP_CREDENTIALS_FILE:-$HOME/.config/gcloud/application_default_credentials.json}"
+CLOUD_ML_REGION="${CLOUD_ML_REGION:-global}"
+AGENT_MODEL="${AGENT_MODEL:-claude-opus-4-6}"
+
+for var in ANTHROPIC_VERTEX_PROJECT_ID SANDBOX_IMAGE; do
+  if [ -z "${!var:-}" ]; then
+    echo "ERROR: $var is not set" >&2
+    exit 1
+  fi
+done
+
+if [ ! -f "$GCP_CREDENTIALS_FILE" ]; then
+  echo "ERROR: GCP credentials file not found: $GCP_CREDENTIALS_FILE" >&2
+  exit 1
+fi
+
+# 1. Test namespace + OOMKill workload (static fixtures, already test-scoped)
+oc apply -f ../fixtures/namespace.yaml
+oc apply -f ../fixtures/oomkill-demo.yaml
+
+# 2. Secret (GCP credentials) — prefixed name in operator namespace
+oc create secret generic eval-llm-credentials \
+  --from-file=credentials.json="$GCP_CREDENTIALS_FILE" \
+  --from-literal=ANTHROPIC_VERTEX_PROJECT_ID="$ANTHROPIC_VERTEX_PROJECT_ID" \
+  --from-literal=CLOUD_ML_REGION="$CLOUD_ML_REGION" \
+  -n "$OPERATOR_NS" --dry-run=client -o yaml | oc apply -f -
+
+# 3. LLMProvider — references prefixed secret
+cat <<EOF | oc apply -f -
+apiVersion: agentic.openshift.io/v1alpha1
+kind: LLMProvider
+metadata:
+  name: eval-vertex-ai
+spec:
+  type: GoogleCloudVertex
+  googleCloudVertex:
+    credentialsSecret:
+      name: eval-llm-credentials
+    projectID: $ANTHROPIC_VERTEX_PROJECT_ID
+    region: $CLOUD_ML_REGION
+EOF
+
+# 4. Agent — references prefixed LLMProvider
+cat <<EOF | oc apply -f -
+apiVersion: agentic.openshift.io/v1alpha1
+kind: Agent
+metadata:
+  name: eval-default
+spec:
+  llmProvider:
+    name: eval-vertex-ai
+  model: $AGENT_MODEL
+  timeouts:
+    analysisSeconds: 300
+    executionSeconds: 600
+    verificationSeconds: 300
+  maxTurns: 200
+EOF
+
+# 5. SandboxTemplate
+cat <<EOF | oc apply -f -
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxTemplate
+metadata:
+  name: eval-lightspeed-agent
+  namespace: $OPERATOR_NS
+spec:
+  networkPolicyManagement: Unmanaged
+  podTemplate:
+    spec:
+      serviceAccountName: lightspeed-agent
+      automountServiceAccountToken: true
+      containers:
+      - name: agent
+        image: $SANDBOX_IMAGE
+        imagePullPolicy: Always
+        ports:
+          - containerPort: 8080
+            protocol: TCP
+        env:
+          - name: LIGHTSPEED_SKILLS_DIR
+            value: /app/skills
+        volumeMounts:
+          - name: home
+            mountPath: /home/agent
+          - name: tmp
+            mountPath: /tmp
+        livenessProbe:
+          httpGet:
+            path: /health
+            port: 8080
+          initialDelaySeconds: 30
+          periodSeconds: 30
+        readinessProbe:
+          httpGet:
+            path: /health
+            port: 8080
+          initialDelaySeconds: 10
+          periodSeconds: 10
+        resources:
+          requests:
+            cpu: 500m
+            memory: 1Gi
+          limits:
+            cpu: "4"
+            memory: 4Gi
+      volumes:
+      - name: home
+        emptyDir: {}
+      - name: tmp
+        emptyDir: {}
+EOF
+
+# 6. Wait for OOMKill workload to start crashing
+echo "Waiting for oomkill-demo pod to appear..."
+oc wait --for=condition=Available=false deployment/oomkill-demo \
+  -n "$TEST_NS" --timeout=60s 2>/dev/null || true
+sleep 10
+echo "Setup complete."
diff --git a/tests/integration/agentic/scripts/setup_subprocess_fixtures-openai.sh b/tests/integration/agentic/scripts/setup_subprocess_fixtures-openai.sh
new file mode 100755
index 00000000..0ed8f523
--- /dev/null
+++ b/tests/integration/agentic/scripts/setup_subprocess_fixtures-openai.sh
@@ -0,0 +1,133 @@
+#!/usr/bin/env bash
+set -euo pipefail
+
+# Deploy agentic infrastructure + OOMKill test workload for integration tests
+# using OpenAI as the LLM provider.
+# All operator-level resources use an "eval-" prefix to avoid conflicts with
+# existing cluster resources.
+#
+# Required env vars:
+#   OPENAI_API_KEY   — OpenAI API key
+#   SANDBOX_IMAGE    — sandbox container image URL
+#
+# Optional env vars:
+#   AGENT_MODEL      — default: gpt-4o
+#
+# To test with Claude via Vertex AI instead, use
+# setup_subprocess_fixtures-claude-vertex.sh and set the corresponding env vars
+# (GCP_CREDENTIALS_FILE, ANTHROPIC_VERTEX_PROJECT_ID, SANDBOX_IMAGE).
+
+OPERATOR_NS="openshift-lightspeed"
+TEST_NS="lightspeed-evaluation-test"
+AGENT_MODEL="${AGENT_MODEL:-gpt-5.2}"
+
+for var in OPENAI_API_KEY SANDBOX_IMAGE; do
+  if [ -z "${!var:-}" ]; then
+    echo "ERROR: $var is not set" >&2
+    exit 1
+  fi
+done
+
+# 1. Test namespace + OOMKill workload (static fixtures, already test-scoped)
+oc apply -f ../fixtures/namespace.yaml
+oc apply -f ../fixtures/oomkill-demo.yaml
+
+# 2. Secret (OpenAI API key + provider override) — prefixed name in operator namespace
+# LIGHTSPEED_AGENT_PROVIDER and OPENAI_MODEL are injected via envFrom so the
+# sandbox picks the OpenAI provider instead of defaulting to claude.
+oc create secret generic eval-llm-credentials \
+  --from-literal=OPENAI_API_KEY="$OPENAI_API_KEY" \
+  --from-literal=LIGHTSPEED_AGENT_PROVIDER="openai" \
+  --from-literal=OPENAI_MODEL="$AGENT_MODEL" \
+  -n "$OPERATOR_NS" --dry-run=client -o yaml | oc apply -f -
+
+# 3. LLMProvider — references prefixed secret
+cat <<EOF | oc apply -f -
+apiVersion: agentic.openshift.io/v1alpha1
+kind: LLMProvider
+metadata:
+  name: eval-openai
+spec:
+  type: OpenAI
+  openAI:
+    credentialsSecret:
+      name: eval-llm-credentials
+EOF
+
+# 4. Agent — references prefixed LLMProvider
+cat <<EOF | oc apply -f -
+apiVersion: agentic.openshift.io/v1alpha1
+kind: Agent
+metadata:
+  name: eval-default
+spec:
+  llmProvider:
+    name: eval-openai
+  model: $AGENT_MODEL
+  timeouts:
+    analysisSeconds: 300
+    executionSeconds: 600
+    verificationSeconds: 300
+  maxTurns: 200
+EOF
+
+# 5. SandboxTemplate
+cat <<EOF | oc apply -f -
+apiVersion: extensions.agents.x-k8s.io/v1alpha1
+kind: SandboxTemplate
+metadata:
+  name: eval-lightspeed-agent
+  namespace: $OPERATOR_NS
+spec:
+  networkPolicyManagement: Unmanaged
+  podTemplate:
+    spec:
+      serviceAccountName: lightspeed-agent
+      automountServiceAccountToken: true
+      containers:
+      - name: agent
+        image: $SANDBOX_IMAGE
+        imagePullPolicy: Always
+        ports:
+          - containerPort: 8080
+            protocol: TCP
+        env:
+          - name: LIGHTSPEED_SKILLS_DIR
+            value: /app/skills
+        volumeMounts:
+          - name: home
+            mountPath: /home/agent
+          - name: tmp
+            mountPath: /tmp
+        livenessProbe:
+          httpGet:
+            path: /health
+            port: 8080
+          initialDelaySeconds: 30
+          periodSeconds: 30
+        readinessProbe:
+          httpGet:
+            path: /health
+            port: 8080
+          initialDelaySeconds: 10
+          periodSeconds: 10
+        resources:
+          requests:
+            cpu: 500m
+            memory: 1Gi
+          limits:
+            cpu: "4"
+            memory: 4Gi
+      volumes:
+      - name: home
+        emptyDir: {}
+      - name: tmp
+        emptyDir: {}
+EOF
+
+# 6. Wait for OOMKill workload to start crashing
+echo "Waiting for oomkill-demo pod to appear..."
+oc wait --for=condition=Available=false deployment/oomkill-demo \
+  -n "$TEST_NS" --timeout=60s 2>/dev/null || true
+sleep 10
+echo "Setup complete."
diff --git a/tests/integration/system-config-agents-subprocess.yaml b/tests/integration/system-config-agents-subprocess.yaml
new file mode 100644
index 00000000..d0902a23
--- /dev/null
+++ b/tests/integration/system-config-agents-subprocess.yaml
@@ -0,0 +1,24 @@
+core:
+  max_threads: 1
+  fail_on_invalid_data: true
+  skip_on_failure: false
+
+agents:
+  enabled: true
+  default:
+    agent: proposal_agent
+  proposal_agent:
+    type: proposal
+    namespace: lightspeed-evaluation-test
+    auto_approve: true
+    cleanup_proposals: true
+    timeout: 900
+    poll_interval: 5
+
+storage:
+  - type: file
+    output_dir: ./eval_output
+    enabled_outputs: [json]
+
+environment:
+  LITELLM_LOG: ERROR
diff --git a/tests/integration/test_evaluation_data_subprocess.yaml b/tests/integration/test_evaluation_data_subprocess.yaml
new file mode 100644
index 00000000..70eebb75
--- /dev/null
+++ b/tests/integration/test_evaluation_data_subprocess.yaml
@@ -0,0 +1,62 @@
+- conversation_group_id: subprocess_full_lifecycle
+  description: Full lifecycle — analysis, execution, verification
+  tag: subprocess
+  setup_script: agentic/scripts/setup_subprocess_fixtures-openai.sh
+  cleanup_script: agentic/scripts/cleanup_subprocess_fixtures-openai.sh
+  conversation_metrics: []
+  conversation_metrics_metadata: {}
+  turns:
+    - turn_id: turn_1
+      query: >-
+        A pod named oomkill-demo in namespace lightspeed-evaluation-test
+        is in CrashLoopBackOff due to OOMKill. Analyze the root cause,
+        fix the memory configuration, and verify the fix.
+      response: null
+      proposal_spec:
+        targetNamespaces:
+          - lightspeed-evaluation-test
+        tools:
+          skills:
+            - image: quay.io/harpatil/agentic-skills:latest
+              paths:
+                - /skills/find-token
+        analysis:
+          agent: eval-default
+        execution:
+          agent: eval-default
+        verification:
+          agent: eval-default
+      expected_proposal_status:
+        phase: Completed
+      turn_metrics:
+        - custom:proposal_status
+      turn_metrics_metadata: {}
+
+- conversation_group_id: subprocess_analysis_only
+  description: Analysis-only — no execution or verification
+  tag: subprocess
+  setup_script: agentic/scripts/setup_subprocess_fixtures-openai.sh
+  cleanup_script: agentic/scripts/cleanup_subprocess_fixtures-openai.sh
+  conversation_metrics: []
+  conversation_metrics_metadata: {}
+  turns:
+    - turn_id: turn_1
+      query: >-
+        A pod named oomkill-demo in namespace lightspeed-evaluation-test
+        is in CrashLoopBackOff. Analyze the root cause.
+      response: null
+      proposal_spec:
+        targetNamespaces:
+          - lightspeed-evaluation-test
+        tools:
+          skills:
+            - image: quay.io/harpatil/agentic-skills:latest
+              paths:
+                - /skills/find-token
+        analysis:
+          agent: eval-default
+      expected_proposal_status:
+        phase: Completed
+      turn_metrics:
+        - custom:proposal_status
+      turn_metrics_metadata: {}
diff --git a/tests/integration/test_subprocess_evaluation.py b/tests/integration/test_subprocess_evaluation.py
new file mode 100644
index 00000000..61f8f24f
--- /dev/null
+++ b/tests/integration/test_subprocess_evaluation.py
@@ -0,0 +1,293 @@
+# pylint: disable=redefined-outer-name,too-many-arguments,too-many-positional-arguments,import-outside-toplevel
+"""Integration tests for ProposalDriver-based evaluation.
+
+These tests run the evaluation pipeline against a live OpenShift cluster
+using the ProposalDriver to create and manage Proposal CRs.
+
+Prerequisites:
+    - oc CLI authenticated against a cluster with agentic CRDs installed
+    - OPENAI_API_KEY, SANDBOX_IMAGE env vars set
+    - Network connectivity to the cluster API
+
+The default setup/cleanup scripts use OpenAI as the LLM provider.
+To test with a different provider (e.g. Claude via Vertex AI), set the
+corresponding env vars and point the eval data at the provider-specific
+scripts (e.g. setup_subprocess_fixtures-claude-vertex.sh).
+
+Run with: pytest tests/integration/test_subprocess_evaluation.py -v -m agentic
+"""
+
+import os
+import shutil
+import subprocess
+from pathlib import Path
+
+import pytest
+
+from lightspeed_evaluation import ConfigLoader, evaluate
+from lightspeed_evaluation.core.storage import FileBackendConfig
+
+
+def check_cli_available() -> bool:
+    """Check if oc or kubectl CLI is available."""
+    return bool(shutil.which("oc") or shutil.which("kubectl"))
+
+
+def check_cluster_reachable() -> bool:
+    """Check if the cluster is reachable via oc whoami."""
+    try:
+        result = subprocess.run(
+            ["oc", "whoami"],
+            capture_output=True,
+            text=True,
+            timeout=10,
+            check=False,
+        )
+        return result.returncode == 0
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        return False
+
+
+def check_crd_installed() -> bool:
+    """Check if the Proposal CRD is installed on the cluster."""
+    try:
+        result = subprocess.run(
+            ["oc", "get", "crd", "proposals.agentic.openshift.io"],
+            capture_output=True,
+            text=True,
+            timeout=10,
+            check=False,
+        )
+        return result.returncode == 0
+    except (FileNotFoundError, subprocess.TimeoutExpired):
+        return False
+
+
+def check_env_vars_set() -> bool:
+    """Check if required environment variables are set."""
+    return all(os.getenv(var) for var in ("OPENAI_API_KEY", "SANDBOX_IMAGE"))
+
+
+pytestmark = pytest.mark.agentic
+
+
+@pytest.fixture
+def integration_test_dir() -> Path:
+    """Get the integration test directory path."""
+    return Path(__file__).parent
+
+
+@pytest.fixture
+def subprocess_config_path(integration_test_dir: Path) -> Path:
+    """Get path to subprocess agent system config file."""
+    return integration_test_dir / "system-config-agents-subprocess.yaml"
+
+
+@pytest.fixture
+def subprocess_eval_data_path(integration_test_dir: Path) -> Path:
+    """Get path to subprocess evaluation data file."""
+    return integration_test_dir / "test_evaluation_data_subprocess.yaml"
+
+
+class TestProposalPrerequisites:
+    """Verify prerequisites for subprocess integration tests."""
+
+    def test_cli_available(self) -> None:
+        """Verify that oc or kubectl CLI is available."""
+        assert check_cli_available(), "oc or kubectl CLI must be installed and in PATH"
+
+    def test_cluster_reachable(self) -> None:
+        """Verify that the cluster is reachable."""
+        assert check_cluster_reachable(), "Cluster must be reachable via 'oc whoami'"
+
+    def test_crd_installed(self) -> None:
+        """Verify that Proposal CRD is installed on the cluster."""
+        assert (
+            check_crd_installed()
+        ), "proposals.agentic.openshift.io CRD must be installed"
+
+    def test_env_vars_configured(self) -> None:
+        """Verify that required environment variables are set."""
+        assert check_env_vars_set(), (
+            "OPENAI_API_KEY and SANDBOX_IMAGE " "environment variables must be set"
+        )
+
+
+class TestProposalDriverEvaluation:
+    """End-to-end tests for ProposalDriver evaluation pipeline."""
+
+    @pytest.mark.timeout(1200)
+    def test_full_lifecycle(
+        self,
+        subprocess_config_path: Path,
+        subprocess_eval_data_path: Path,
+        tmp_path: Path,
+    ) -> None:
+        """Test full Proposal lifecycle: analysis, execution, verification.
+
+        Verifies:
+        - Setup script deploys infrastructure and test workload
+        - ProposalDriver creates Proposal CR and auto-approves stages
+        - Pipeline completes without errors
+        - TurnData is enriched with response and proposal_status
+        - At least the Analyzed condition reaches status True
+        - Cleanup script removes test resources
+        """
+        loader = ConfigLoader()
+        system_config = loader.load_system_config(str(subprocess_config_path))
+        system_config.storage = [
+            FileBackendConfig(output_dir=str(tmp_path / "eval_output"))
+        ]
+
+        from lightspeed_evaluation.core.system import DataValidator
+
+        validator = DataValidator(
+            api_enabled=True,
+            fail_on_invalid_data=system_config.core.fail_on_invalid_data,
+        )
+        all_data = validator.load_evaluation_data(str(subprocess_eval_data_path))
+        eval_data = [
+            d
+            for d in all_data
+            if d.conversation_group_id == "subprocess_full_lifecycle"
+        ]
+        assert len(eval_data) == 1, "Should find subprocess_full_lifecycle data"
+
+        evaluate(system_config, eval_data)
+
+        turn = eval_data[0].turns[0]
+        assert (
+            turn.response and turn.response.strip()
+        ), "Response should be populated by ProposalDriver"
+        assert isinstance(
+            turn.proposal_status, dict
+        ), "proposal_status should be populated"
+        assert (
+            "conditions" in turn.proposal_status
+        ), "proposal_status should contain conditions"
+
+        conditions = turn.proposal_status["conditions"]
+        by_type = {c["type"]: c for c in conditions}
+        assert "Analyzed" in by_type, "Should have Analyzed condition"
+        assert (
+            by_type["Analyzed"].get("status") == "True"
+        ), "Analyzed condition should be True"
+
+    @pytest.mark.timeout(600)
+    def test_analysis_only(
+        self,
+        subprocess_config_path: Path,
+        subprocess_eval_data_path: Path,
+        tmp_path: Path,
+    ) -> None:
+        """Test analysis-only Proposal (no execution or verification).
+
+        Verifies:
+        - Only analysis stage runs
+        - No Executed or Verified conditions present
+        """
+        loader = ConfigLoader()
+        system_config = loader.load_system_config(str(subprocess_config_path))
+        system_config.storage = [
+            FileBackendConfig(output_dir=str(tmp_path / "eval_output"))
+        ]
+
+        from lightspeed_evaluation.core.system import DataValidator
+
+        validator = DataValidator(
+            api_enabled=True,
+            fail_on_invalid_data=system_config.core.fail_on_invalid_data,
+        )
+        all_data = validator.load_evaluation_data(str(subprocess_eval_data_path))
+        eval_data = [
+            d for d in all_data if d.conversation_group_id == "subprocess_analysis_only"
+        ]
+        assert len(eval_data) == 1, "Should find subprocess_analysis_only data"
+
+        evaluate(system_config, eval_data)
+
+        turn = eval_data[0].turns[0]
+        assert (
+            turn.response and turn.response.strip()
+        ), "Response should be populated by ProposalDriver"
+        assert isinstance(
+            turn.proposal_status, dict
+        ), "proposal_status should be populated"
+
+        conditions = turn.proposal_status.get("conditions", [])
+        by_type = {c["type"]: c for c in conditions}
+        assert "Analyzed" in by_type, "Should have Analyzed condition"
+        assert (
+            by_type["Analyzed"].get("status") == "True"
+        ), "Analyzed condition should be True"
+        if "Executed" in by_type:
+            assert by_type["Executed"].get("reason") == "Skipped", (
+                "Analysis-only Executed condition should be Skipped, "
+                f"got reason={by_type['Executed'].get('reason')}"
+            )
+        if "Verified" in by_type:
+            assert by_type["Verified"].get("reason") == "Skipped", (
+                "Analysis-only Verified condition should be Skipped, "
+                f"got reason={by_type['Verified'].get('reason')}"
+            )
+
+    @pytest.mark.timeout(120)
+    def test_timeout_handling(
+        self,
+        subprocess_config_path: Path,
+        subprocess_eval_data_path: Path,
+        tmp_path: Path,
+    ) -> None:
+        """Test that a very short timeout is handled gracefully.
+
+        Verifies:
+        - Pipeline does not crash on timeout
+        - Proposal CRs are cleaned up after timeout
+        """
+        loader = ConfigLoader()
+        system_config = loader.load_system_config(str(subprocess_config_path))
+        system_config.storage = [
+            FileBackendConfig(output_dir=str(tmp_path / "eval_output"))
+        ]
+
+        assert system_config.agents is not None
+        agent_cfg = system_config.agents.agents["proposal_agent"]
+        agent_cfg.timeout = 5
+        agent_cfg.poll_interval = 1
+
+        from lightspeed_evaluation.core.system import DataValidator
+
+        validator = DataValidator(
+            api_enabled=True,
+            fail_on_invalid_data=system_config.core.fail_on_invalid_data,
+        )
+        all_data = validator.load_evaluation_data(str(subprocess_eval_data_path))
+        eval_data = [
+            d for d in all_data if d.conversation_group_id == "subprocess_analysis_only"
+        ]
+
+        evaluate(system_config, eval_data)
+
+        result = subprocess.run(
+            [
+                "oc",
+                "get",
+                "proposals",
+                "-n",
+                "lightspeed-evaluation-test",
+                "-o",
+                "name",
+            ],
+            capture_output=True,
+            text=True,
+            timeout=10,
+            check=False,
+        )
+        lines = [
+            line
+            for line in result.stdout.strip().splitlines()
+            if line.startswith("proposal.agentic.openshift.io/eval-")
+        ]
+        assert len(lines) == 0, (
+            f"Proposal CRs should be cleaned up after timeout, " f"but found: {lines}"
+        )
diff --git a/tests/unit/core/metrics/custom/test_proposal_eval.py b/tests/unit/core/metrics/custom/test_proposal_eval.py
new file mode 100644
index 00000000..2c3875aa
--- /dev/null
+++ b/tests/unit/core/metrics/custom/test_proposal_eval.py
@@ -0,0 +1,452 @@
+"""Unit tests for proposal status evaluation metric."""
+
+from typing import Any, Optional
+
+from lightspeed_evaluation.core.metrics.custom.proposal_eval import (
+    _derive_phase,
+    evaluate_proposal_status,
+)
+from lightspeed_evaluation.core.models import TurnData
+
+
+def _make_turn(
+    expected_proposal_status: Optional[dict[str, Any]] = None,
+    proposal_status: Optional[dict[str, Any]] = None,
+    proposal_spec: Optional[dict[str, Any]] = None,
+) -> TurnData:
+    """Build a minimal TurnData for testing."""
+    return TurnData(
+        turn_id="t1",
+        query="test query",
+        expected_proposal_status=expected_proposal_status,
+        proposal_status=proposal_status,
+        proposal_spec=proposal_spec,
+    )
+
+
+class TestValidation:
+    """Input validation guards."""
+
+    def test_conversation_level_returns_none(self) -> None:
+        """Conversation-level evaluation returns None (turn-level only)."""
+        turn = _make_turn(expected_proposal_status={"phase": "Completed"})
+        score, reason = evaluate_proposal_status(None, None, turn, True)
+        assert score is None
+        assert "turn-level" in reason
+
+    def test_none_turn_data_returns_none(self) -> None:
+        """Missing turn_data returns None."""
+        score, reason = evaluate_proposal_status(None, None, None, False)
+        assert score is None
+        assert "TurnData" in reason
+
+    def test_missing_expected_returns_none(self) -> None:
+        """Missing expected_proposal_status returns None (skip)."""
+        turn = _make_turn()
+        score, reason = evaluate_proposal_status(None, None, turn, False)
+        assert score is None
+        assert "expected_proposal_status" in reason
+
+    def test_missing_proposal_status_returns_zero(self) -> None:
+        """Missing proposal_status (driver didn't populate) returns 0.0."""
+        turn = _make_turn(expected_proposal_status={"phase": "Completed"})
+        score, reason = evaluate_proposal_status(None, None, turn, False)
+        assert score == 0.0
+        assert "not populated" in reason
+
+
+class TestDerivePhase:
+    """Phase derivation from CRD conditions."""
+
+    def test_completed_analysis_only(self) -> None:
+        """Analysis-only with Analyzed=True derives Completed."""
+        conditions = [{"type": "Analyzed", "status": "True"}]
+        assert _derive_phase(conditions, {"analysis": {}}) == "Completed"
+
+    def test_completed_full_lifecycle(self) -> None:
+        """Full lifecycle with all conditions True derives Completed."""
+        conditions = [
+            {"type": "Analyzed", "status": "True"},
+            {"type": "Executed", "status": "True"},
+            {"type": "Verified", "status": "True"},
+        ]
+        spec: dict[str, Any] = {"analysis": {}, "execution": {}, "verification": {}}
+        assert _derive_phase(conditions, spec) == "Completed"
+
+    def test_completed_execution_no_verification(self) -> None:
+        """Execution without verification with Executed=True derives Completed."""
+        conditions = [
+            {"type": "Analyzed", "status": "True"},
+            {"type": "Executed", "status": "True"},
+        ]
+        spec: dict[str, Any] = {"analysis": {}, "execution": {}}
+        assert _derive_phase(conditions, spec) == "Completed"
+
+    def test_failed_condition(self) -> None:
+        """Any condition with status False derives Failed."""
+        conditions = [
+            {"type": "Analyzed", "status": "True"},
+            {"type": "Executed", "status": "False", "reason": "Error"},
+        ]
+        assert _derive_phase(conditions) == "Failed"
+
+    def test_retrying_execution_not_failed(self) -> None:
+        """RetryingExecution reason does not count as failure."""
+        conditions = [
+            {"type": "Analyzed", "status": "True"},
+            {"type": "Verified", "status": "False", "reason": "RetryingExecution"},
+        ]
+        spec: dict[str, Any] = {"analysis": {}, "execution": {}, "verification": {}}
+        assert _derive_phase(conditions, spec) == "InProgress"
+
+    def test_denied(self) -> None:
+        """Denied=True derives Denied."""
+        conditions = [{"type": "Denied", "status": "True"}]
+        assert _derive_phase(conditions) == "Denied"
+
+    def test_escalated(self) -> None:
+        """Escalated=True derives Escalated."""
+        conditions = [{"type": "Escalated", "status": "True"}]
+        assert _derive_phase(conditions) == "Escalated"
+
+    def test_in_progress(self) -> None:
+        """Unknown status derives InProgress."""
+        conditions = [{"type": "Analyzed", "status": "Unknown"}]
+        assert _derive_phase(conditions) == "InProgress"
+
+    def test_no_proposal_spec_infers_last_step(self) -> None:
+        """Without proposal_spec, infers last step from conditions."""
+        conditions = [
+            {"type": "Analyzed", "status": "True"},
+            {"type": "Executed", "status": "True"},
+            {"type": "Verified", "status": "True"},
+        ]
+        assert _derive_phase(conditions, None) == "Completed"
+
+
+class TestPhaseCheck:
+    """Phase exact match assertion."""
+
+    def test_phase_match_pass(self) -> None:
+        """Exact phase match returns 1.0."""
+        turn = _make_turn(
+            expected_proposal_status={"phase": "Completed"},
+            proposal_status={
+                "conditions": [{"type": "Analyzed", "status": "True"}],
+            },
+            proposal_spec={"analysis": {}},
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 1.0
+        assert "Phase matches" in reason
+
+    def test_phase_match_fail(self) -> None:
+        """Phase mismatch returns 0.0 with details."""
+        turn = _make_turn(
+            expected_proposal_status={"phase": "Completed"},
+            proposal_status={
+                "conditions": [
+                    {"type": "Analyzed", "status": "False", "reason": "Error"},
+                ],
+            },
+            proposal_spec={"analysis": {}},
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 0.0
+        assert "Phase mismatch" in reason
+        assert "'Completed'" in reason
+        assert "'Failed'" in reason
+
+
+class TestPhaseInCheck:
+    """Phase membership assertion."""
+
+    def test_phase_in_pass(self) -> None:
+        """Phase in allowed list returns 1.0."""
+        turn = _make_turn(
+            expected_proposal_status={"phase_in": ["Completed", "Escalated"]},
+            proposal_status={
+                "conditions": [{"type": "Analyzed", "status": "True"}],
+            },
+            proposal_spec={"analysis": {}},
+        )
+        score, _ = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 1.0
+
+    def test_phase_in_fail(self) -> None:
+        """Phase not in allowed list returns 0.0."""
+        turn = _make_turn(
+            expected_proposal_status={"phase_in": ["Completed"]},
+            proposal_status={
+                "conditions": [{"type": "Denied", "status": "True"}],
+            },
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 0.0
+        assert "not in" in reason
+
+
+class TestConditionsCheck:
+    """Specific condition assertions."""
+
+    def test_condition_status_pass(self) -> None:
+        """Condition status match returns 1.0."""
+        turn = _make_turn(
+            expected_proposal_status={
+                "conditions": [{"type": "Analyzed", "status": "True"}],
+            },
+            proposal_status={
+                "conditions": [
+                    {"type": "Analyzed", "status": "True", "reason": "Done"},
+                ],
+            },
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 1.0
+        assert "condition assertions passed" in reason
+
+    def test_condition_status_fail(self) -> None:
+        """Condition status mismatch returns 0.0."""
+        turn = _make_turn(
+            expected_proposal_status={
+                "conditions": [{"type": "Analyzed", "status": "True"}],
+            },
+            proposal_status={
+                "conditions": [{"type": "Analyzed", "status": "False"}],
+            },
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 0.0
+        assert "Analyzed" in reason
+        assert "status" in reason
+
+    def test_condition_reason_pass(self) -> None:
+        """Condition reason match returns 1.0."""
+        turn = _make_turn(
+            expected_proposal_status={
+                "conditions": [
+                    {"type": "Executed", "status": "True", "reason": "Skipped"},
+                ],
+            },
+            proposal_status={
+                "conditions": [
+                    {"type": "Executed", "status": "True", "reason": "Skipped"},
+                ],
+            },
+        )
+        score, _ = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 1.0
+
+    def test_condition_reason_fail(self) -> None:
+        """Condition reason mismatch returns 0.0."""
+        turn = _make_turn(
+            expected_proposal_status={
+                "conditions": [
+                    {"type": "Executed", "reason": "Skipped"},
+                ],
+            },
+            proposal_status={
+                "conditions": [
+                    {"type": "Executed", "status": "True", "reason": "Done"},
+                ],
+            },
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 0.0
+        assert "reason" in reason
+
+    def test_condition_not_found(self) -> None:
+        """Missing condition type returns 0.0."""
+        turn = _make_turn(
+            expected_proposal_status={
+                "conditions": [{"type": "Verified", "status": "True"}],
+            },
+            proposal_status={
+                "conditions": [{"type": "Analyzed", "status": "True"}],
+            },
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 0.0
+        assert "not found" in reason
+
+    def test_condition_missing_type_field(self) -> None:
+        """Condition assertion without type field returns 0.0."""
+        turn = _make_turn(
+            expected_proposal_status={
+                "conditions": [{"status": "True"}],
+            },
+            proposal_status={
+                "conditions": [{"type": "Analyzed", "status": "True"}],
+            },
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 0.0
+        assert "missing 'type'" in reason
+
+
+class TestVerificationCheck:
+    """Verification-specific assertions."""
+
+    def test_verification_passed_true(self) -> None:
+        """Verification passed=True with Verified=True returns 1.0."""
+        turn = _make_turn(
+            expected_proposal_status={"verification": {"passed": True}},
+            proposal_status={
+                "conditions": [{"type": "Verified", "status": "True"}],
+            },
+        )
+        score, _ = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 1.0
+
+    def test_verification_passed_false(self) -> None:
+        """Verification passed=True with Verified=False returns 0.0."""
+        turn = _make_turn(
+            expected_proposal_status={"verification": {"passed": True}},
+            proposal_status={
+                "conditions": [
+                    {"type": "Verified", "status": "False", "reason": "Error"},
+                ],
+            },
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 0.0
+        assert "Verification passed" in reason
+
+    def test_verification_summary_contains_pass(self) -> None:
+        """Verification summary_contains match returns 1.0."""
+        turn = _make_turn(
+            expected_proposal_status={
+                "verification": {"summary_contains": "replicas running"},
+            },
+            proposal_status={
+                "conditions": [
+                    {
+                        "type": "Verified",
+                        "status": "True",
+                        "message": "3 replicas running successfully",
+                    },
+                ],
+            },
+        )
+        score, _ = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 1.0
+
+    def test_verification_summary_contains_fail(self) -> None:
+        """Verification summary_contains mismatch returns 0.0."""
+        turn = _make_turn(
+            expected_proposal_status={
+                "verification": {"summary_contains": "replicas running"},
+            },
+            proposal_status={
+                "conditions": [
+                    {
+                        "type": "Verified",
+                        "status": "True",
+                        "message": "Pod restarted",
+                    },
+                ],
+            },
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 0.0
+        assert "does not contain" in reason
+
+    def test_verification_condition_missing(self) -> None:
+        """Verification check with missing Verified condition returns 0.0."""
+        turn = _make_turn(
+            expected_proposal_status={"verification": {"passed": True}},
+            proposal_status={
+                "conditions": [{"type": "Analyzed", "status": "True"}],
+            },
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 0.0
+        assert "not found" in reason
+
+    def test_verification_summary_case_insensitive(self) -> None:
+        """Verification summary_contains is case-insensitive."""
+        turn = _make_turn(
+            expected_proposal_status={
+                "verification": {"summary_contains": "REPLICAS"},
+            },
+            proposal_status={
+                "conditions": [
+                    {
+                        "type": "Verified",
+                        "status": "True",
+                        "message": "3 replicas running",
+                    },
+                ],
+            },
+        )
+        score, _ = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 1.0
+
+
+class TestMultipleChecks:
+    """Combined assertions."""
+
+    def test_all_checks_pass(self) -> None:
+        """All checks passing returns 1.0 with combined reasons."""
+        turn = _make_turn(
+            expected_proposal_status={
+                "phase": "Completed",
+                "conditions": [{"type": "Analyzed", "status": "True"}],
+                "verification": {"passed": True},
+            },
+            proposal_status={
+                "conditions": [
+                    {"type": "Analyzed", "status": "True"},
+                    {"type": "Executed", "status": "True"},
+                    {"type": "Verified", "status": "True"},
+                ],
+            },
+            proposal_spec={"analysis": {}, "execution": {}, "verification": {}},
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 1.0
+        assert "Phase matches" in reason
+        assert "condition assertions passed" in reason
+        assert "Verification assertions passed" in reason
+
+    def test_fail_fast_on_first_failure(self) -> None:
+        """First failing check returns 0.0 without running subsequent checks."""
+        turn = _make_turn(
+            expected_proposal_status={
+                "phase": "Completed",
+                "conditions": [{"type": "Analyzed", "status": "True"}],
+            },
+            proposal_status={
+                "conditions": [
+                    {"type": "Analyzed", "status": "False", "reason": "Error"},
+                ],
+            },
+            proposal_spec={"analysis": {}},
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 0.0
+        assert "Phase mismatch" in reason
+        assert "condition" not in reason.lower()
+
+    def test_empty_expected_skips(self) -> None:
+        """Empty expected_proposal_status dict is treated as no expectations."""
+        turn = _make_turn(
+            expected_proposal_status={},
+            proposal_status={
+                "conditions": [{"type": "Analyzed", "status": "True"}],
+            },
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score is None
+        assert "expected_proposal_status" in reason
+
+    def test_empty_conditions_list(self) -> None:
+        """Empty conditions list with phase check fails."""
+        turn = _make_turn(
+            expected_proposal_status={"phase": "Completed"},
+            proposal_status={"conditions": []},
+            proposal_spec={"analysis": {}},
+        )
+        score, reason = evaluate_proposal_status(None, 0, turn, False)
+        assert score == 0.0
+        assert "Phase mismatch" in reason
diff --git a/tests/unit/pipeline/evaluation/test_driver.py b/tests/unit/pipeline/evaluation/test_driver.py
index 994ecfd6..78a85f5d 100644
--- a/tests/unit/pipeline/evaluation/test_driver.py
+++ b/tests/unit/pipeline/evaluation/test_driver.py
@@ -12,9 +12,10 @@
 from lightspeed_evaluation.core.system.exceptions import ConfigurationError
 from lightspeed_evaluation.pipeline.evaluation.driver import (
     AgentDriver,
-    AgentDriverRegistry,
     HttpApiDriver,
+    ProposalDriver,
 )
+from lightspeed_evaluation.pipeline.evaluation.registry import AgentDriverRegistry
 
 
 class TestAgentDriverRegistry:
@@ -51,6 +52,22 @@ def test_default_registry_contains_http_api(self) -> None:
         registry = AgentDriverRegistry()
         assert "http_api" in registry._drivers
 
+    def test_default_registry_contains_proposal(self) -> None:
+        """Test default registry includes proposal driver."""
+        registry = AgentDriverRegistry()
+        assert "proposal" in registry._drivers
+
+    def test_create_proposal_driver(self, mocker: MockerFixture) -> None:
+        """Test registry creates ProposalDriver for type 'proposal'."""
+        mocker.patch("shutil.which", return_value="/usr/bin/oc")
+        registry = AgentDriverRegistry()
+        driver = registry.create_driver(
+            {"type": "proposal", "namespace": "test-ns"}, enabled=True
+        )
+
+        assert isinstance(driver, ProposalDriver)
+        assert driver.enabled is True
+
 
 class TestHttpApiDriver:
     """Unit tests for HttpApiDriver."""
diff --git a/tests/unit/pipeline/evaluation/test_subprocess_driver.py b/tests/unit/pipeline/evaluation/test_proposal_driver.py
similarity index 81%
rename from tests/unit/pipeline/evaluation/test_subprocess_driver.py
rename to tests/unit/pipeline/evaluation/test_proposal_driver.py
index e2176c90..7992f27a 100644
--- a/tests/unit/pipeline/evaluation/test_subprocess_driver.py
+++ b/tests/unit/pipeline/evaluation/test_proposal_driver.py
@@ -1,6 +1,6 @@
 # pylint: disable=protected-access
 
-"""Unit tests for subprocess agent driver module."""
+"""Unit tests for proposal agent driver module."""
 
 from typing import Any
 
@@ -8,18 +8,17 @@
 from pydantic import ValidationError
 from pytest_mock import MockerFixture
 
-from lightspeed_evaluation.core.models import TurnData
+from lightspeed_evaluation.core.models import ProposalAgentConfig, TurnData
 from lightspeed_evaluation.core.system.exceptions import ConfigurationError
-from lightspeed_evaluation.pipeline.evaluation.subprocess_driver import (
-    SubprocessAgentConfig,
-    SubprocessDriver,
+from lightspeed_evaluation.pipeline.evaluation.driver import (
+    ProposalDriver,
     TerminalOutcome,
 )
 
-MODULE = "lightspeed_evaluation.pipeline.evaluation.subprocess_driver"
+MODULE = "lightspeed_evaluation.pipeline.evaluation.driver"
 
 VALID_CONFIG: dict[str, Any] = {
-    "type": "subprocess",
+    "type": "proposal",
     "namespace": "openshift-lightspeed",
 }
 
@@ -46,14 +45,14 @@ def _cond(
 # ── Config validation ────────────────────────────────────────────────
 
 
-class TestSubprocessAgentConfig:
-    """Unit tests for SubprocessAgentConfig Pydantic model."""
+class TestProposalAgentConfig:
+    """Unit tests for ProposalAgentConfig Pydantic model."""
 
     def test_valid_config_all_fields(self) -> None:
         """Test config with all fields explicit."""
-        config = SubprocessAgentConfig.model_validate(
+        config = ProposalAgentConfig.model_validate(
             {
-                "type": "subprocess",
+                "type": "proposal",
                 "namespace": "ns",
                 "auto_approve": False,
                 "cleanup_proposals": False,
@@ -69,7 +68,7 @@ def test_valid_config_all_fields(self) -> None:
 
     def test_valid_config_defaults(self) -> None:
         """Test config with only required fields uses defaults."""
-        config = SubprocessAgentConfig.model_validate(VALID_CONFIG)
+        config = ProposalAgentConfig.model_validate(VALID_CONFIG)
         assert config.auto_approve is True
         assert config.cleanup_proposals is True
         assert config.timeout == 900
@@ -78,57 +77,34 @@ def test_valid_config_defaults(self) -> None:
     def test_missing_namespace(self) -> None:
         """Test missing required namespace raises ValidationError."""
         with pytest.raises(ValidationError):
-            SubprocessAgentConfig.model_validate({"type": "subprocess"})
+            ProposalAgentConfig.model_validate({"type": "proposal"})
 
     def test_extra_field_rejected(self) -> None:
         """Test extra fields raise ValidationError."""
         with pytest.raises(ValidationError):
-            SubprocessAgentConfig.model_validate({**VALID_CONFIG, "extra": "bad"})
+            ProposalAgentConfig.model_validate({**VALID_CONFIG, "extra": "bad"})
 
     def test_invalid_timeout_zero(self) -> None:
         """Test timeout=0 raises ValidationError."""
         with pytest.raises(ValidationError):
-            SubprocessAgentConfig.model_validate({**VALID_CONFIG, "timeout": 0})
+            ProposalAgentConfig.model_validate({**VALID_CONFIG, "timeout": 0})
 
     def test_invalid_timeout_negative(self) -> None:
         """Test negative timeout raises ValidationError."""
         with pytest.raises(ValidationError):
-            SubprocessAgentConfig.model_validate({**VALID_CONFIG, "timeout": -1})
+            ProposalAgentConfig.model_validate({**VALID_CONFIG, "timeout": -1})
 
     def test_invalid_poll_interval_zero(self) -> None:
         """Test poll_interval=0 raises ValidationError."""
         with pytest.raises(ValidationError):
-            SubprocessAgentConfig.model_validate({**VALID_CONFIG, "poll_interval": 0})
+            ProposalAgentConfig.model_validate({**VALID_CONFIG, "poll_interval": 0})
 
 
 # ── Condition helpers ────────────────────────────────────────────────
 
 
-class TestConditionHelpers:
-    """Unit tests for SubprocessDriver._should_approve and _is_terminal."""
-
-    @pytest.mark.parametrize(
-        "conditions, expected",
-        [
-            ([], False),
-            ([_cond("Analyzed", "Unknown")], False),
-            ([_cond("Analyzed", "True")], True),
-            ([_cond("Analyzed", "False")], False),
-            ([_cond("Executed", "True")], False),
-        ],
-        ids=[
-            "no-conditions",
-            "analyzing",
-            "analyzed",
-            "analysis-failed",
-            "wrong-condition-type",
-        ],
-    )
-    def test_should_approve(
-        self, conditions: list[dict[str, Any]], expected: bool
-    ) -> None:
-        """Test _should_approve returns correct result."""
-        assert SubprocessDriver._should_approve(conditions) == expected
+class TestIsTerminal:  # pylint: disable=too-few-public-methods
+    """Unit tests for ProposalDriver._is_terminal."""
 
     @pytest.mark.parametrize(
         "conditions, spec, expected",
@@ -252,7 +228,7 @@ def test_is_terminal(
         expected: TerminalOutcome | None,
     ) -> None:
         """Test _is_terminal returns correct terminal outcome."""
-        assert SubprocessDriver._is_terminal(conditions, spec) == expected
+        assert ProposalDriver._is_terminal(conditions, spec) == expected
 
 
 # ── CR manifest building ────────────────────────────────────────────
@@ -261,10 +237,10 @@ def test_is_terminal(
 class TestBuildCR:
     """Unit tests for CR manifest construction."""
 
-    def _make_driver(self, mocker: MockerFixture) -> SubprocessDriver:
+    def _make_driver(self, mocker: MockerFixture) -> ProposalDriver:
         """Create a driver with mocked CLI resolution."""
         mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
-        return SubprocessDriver(VALID_CONFIG)
+        return ProposalDriver(VALID_CONFIG)
 
     def test_proposal_cr_query_only(self, mocker: MockerFixture) -> None:
         """Test Proposal CR with query only, no proposal_spec."""
@@ -319,6 +295,7 @@ def test_approval_cr_analysis_only(self, mocker: MockerFixture) -> None:
         assert len(cr["spec"]["stages"]) == 1
         assert cr["spec"]["stages"][0]["type"] == "Analysis"
         assert cr["spec"]["stages"][0]["decision"] == "Approved"
+        assert cr["spec"]["stages"][0]["analysis"] == {"agent": "default"}
 
     def test_approval_cr_full(self, mocker: MockerFixture) -> None:
         """Test ProposalApproval with all three stages."""
@@ -328,14 +305,30 @@ def test_approval_cr_full(self, mocker: MockerFixture) -> None:
         assert len(cr["spec"]["stages"]) == 3
         types = [s["type"] for s in cr["spec"]["stages"]]
         assert types == ["Analysis", "Execution", "Verification"]
+        assert cr["spec"]["stages"][0]["analysis"] == {"agent": "default"}
         assert cr["spec"]["stages"][1]["execution"] == {"option": 0}
+        assert cr["spec"]["stages"][2]["verification"] == {"agent": "default"}
+
+    def test_approval_cr_with_agent_refs(self, mocker: MockerFixture) -> None:
+        """Test ProposalApproval passes agent names from proposal_spec."""
+        driver = self._make_driver(mocker)
+        spec: dict[str, Any] = {
+            "analysis": {"agent": "eval-default"},
+            "execution": {"agent": "eval-default"},
+            "verification": {"agent": "eval-default"},
+        }
+        cr = driver._build_approval_cr("eval-abc", spec)
+
+        assert cr["spec"]["stages"][0]["analysis"] == {"agent": "eval-default"}
+        assert cr["spec"]["stages"][1]["execution"]["agent"] == "eval-default"
+        assert cr["spec"]["stages"][2]["verification"] == {"agent": "eval-default"}
 
 
 # ── Extract summary ─────────────────────────────────────────────────
 
 
 class TestExtractSummary:
-    """Unit tests for SubprocessDriver._extract_summary."""
+    """Unit tests for ProposalDriver._extract_summary."""
 
     def test_with_messages(self) -> None:
         """Test summary from condition messages."""
@@ -345,24 +338,24 @@ def test_with_messages(self) -> None:
                 _cond("Executed", "True", message="Execution ok"),
             ]
         }
-        result = SubprocessDriver._extract_summary(status)
+        result = ProposalDriver._extract_summary(status)
         assert result == "Analysis done; Execution ok"
 
     def test_no_messages(self) -> None:
         """Test fallback when conditions have no messages."""
         status = {"conditions": [_cond("Analyzed", "True")]}
-        assert SubprocessDriver._extract_summary(status) == "No summary available"
+        assert ProposalDriver._extract_summary(status) == "No summary available"
 
     def test_empty_status(self) -> None:
         """Test fallback for empty status dict."""
-        assert SubprocessDriver._extract_summary({}) == "No summary available"
+        assert ProposalDriver._extract_summary({}) == "No summary available"
 
 
 # ── Driver lifecycle ─────────────────────────────────────────────────
 
 
-class TestSubprocessDriver:
-    """Unit tests for SubprocessDriver init, validate_config, enabled, close."""
+class TestProposalDriver:
+    """Unit tests for ProposalDriver init, validate_config, enabled, close."""
 
     def test_validate_config_with_oc(self, mocker: MockerFixture) -> None:
         """Test driver resolves oc as primary CLI."""
@@ -370,7 +363,7 @@ def test_validate_config_with_oc(self, mocker: MockerFixture) -> None:
         mock_shutil.which.side_effect = lambda cmd: (
             "/usr/bin/oc" if cmd == "oc" else None
         )
-        driver = SubprocessDriver(VALID_CONFIG)
+        driver = ProposalDriver(VALID_CONFIG)
         assert driver._cli == "/usr/bin/oc"
 
     def test_validate_config_kubectl_fallback(self, mocker: MockerFixture) -> None:
@@ -379,31 +372,31 @@ def test_validate_config_kubectl_fallback(self, mocker: MockerFixture) -> None:
         mock_shutil.which.side_effect = lambda cmd: (
             "/usr/bin/kubectl" if cmd == "kubectl" else None
         )
-        driver = SubprocessDriver(VALID_CONFIG)
+        driver = ProposalDriver(VALID_CONFIG)
         assert driver._cli == "/usr/bin/kubectl"
 
     def test_validate_config_neither_found(self, mocker: MockerFixture) -> None:
         """Test ConfigurationError when neither oc nor kubectl found."""
         mocker.patch(f"{MODULE}.shutil").which.return_value = None
         with pytest.raises(ConfigurationError, match="Neither 'oc' nor 'kubectl'"):
-            SubprocessDriver(VALID_CONFIG)
+            ProposalDriver(VALID_CONFIG)
 
     def test_validate_config_invalid(self, mocker: MockerFixture) -> None:
         """Test ValidationError for missing required fields."""
         mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
         with pytest.raises(ValidationError):
-            SubprocessDriver({"type": "subprocess"})
+            ProposalDriver({"type": "proposal"})
 
     def test_enabled_always_true(self, mocker: MockerFixture) -> None:
         """Test enabled property defaults to True (inherited)."""
         mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
-        driver = SubprocessDriver(VALID_CONFIG)
+        driver = ProposalDriver(VALID_CONFIG)
         assert driver.enabled is True
 
     def test_close_is_noop(self, mocker: MockerFixture) -> None:
         """Test close does nothing (no persistent connections)."""
         mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
-        driver = SubprocessDriver(VALID_CONFIG)
+        driver = ProposalDriver(VALID_CONFIG)
         driver.close()
 
 
@@ -411,12 +404,12 @@ def test_close_is_noop(self, mocker: MockerFixture) -> None:
 
 
 class TestCleanup:
-    """Unit tests for SubprocessDriver._cleanup."""
+    """Unit tests for ProposalDriver._cleanup."""
 
     def test_cleanup_calls_delete(self, mocker: MockerFixture) -> None:
         """Test cleanup delegates to _delete when enabled."""
         mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
-        driver = SubprocessDriver(VALID_CONFIG)
+        driver = ProposalDriver(VALID_CONFIG)
         mock_delete = mocker.patch.object(driver, "_delete")
 
         driver._cleanup("eval-test")
@@ -426,7 +419,7 @@ def test_cleanup_calls_delete(self, mocker: MockerFixture) -> None:
     def test_cleanup_disabled(self, mocker: MockerFixture) -> None:
         """Test cleanup skips _delete when cleanup_proposals=False."""
         mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
-        driver = SubprocessDriver({**VALID_CONFIG, "cleanup_proposals": False})
+        driver = ProposalDriver({**VALID_CONFIG, "cleanup_proposals": False})
         mock_delete = mocker.patch.object(driver, "_delete")
 
         driver._cleanup("eval-test")
@@ -436,7 +429,7 @@ def test_cleanup_disabled(self, mocker: MockerFixture) -> None:
     def test_cleanup_failure_logged(self, mocker: MockerFixture) -> None:
         """Test cleanup logs warning on _delete failure."""
         mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
-        driver = SubprocessDriver(VALID_CONFIG)
+        driver = ProposalDriver(VALID_CONFIG)
         mocker.patch.object(driver, "_delete", side_effect=Exception("boom"))
         mock_logger = mocker.patch(f"{MODULE}.logger")
 
@@ -449,21 +442,21 @@ def test_cleanup_failure_logged(self, mocker: MockerFixture) -> None:
 
 
 class TestExecuteTurn:
-    """Unit tests for SubprocessDriver.execute_turn."""
+    """Unit tests for ProposalDriver.execute_turn."""
 
     @pytest.fixture()
-    def driver(self, mocker: MockerFixture) -> SubprocessDriver:
+    def driver(self, mocker: MockerFixture) -> ProposalDriver:
         """Create a driver with mocked shutil and uuid."""
         mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
         mocker.patch(f"{MODULE}.uuid").uuid4.return_value.hex = "abcd1234"
-        return SubprocessDriver({**VALID_CONFIG, "timeout": 10, "poll_interval": 1})
+        return ProposalDriver({**VALID_CONFIG, "timeout": 10, "poll_interval": 1})
 
     def test_happy_path_completed(
-        self, mocker: MockerFixture, driver: SubprocessDriver
+        self, mocker: MockerFixture, driver: ProposalDriver
     ) -> None:
         """Test successful full lifecycle returns no error."""
         mock_time = mocker.patch(f"{MODULE}.time")
-        mock_time.monotonic.side_effect = [0.0, 1.0]
+        mock_time.monotonic.side_effect = [0.0, 0.0, 0.0, 1.0]
 
         mock_apply = mocker.patch.object(driver, "_apply")
         mock_apply.return_value = mocker.Mock(returncode=0)
@@ -487,9 +480,7 @@ def test_happy_path_completed(
         assert turn.proposal_status == terminal_status
         driver._cleanup.assert_called_once_with("eval-abcd1234")
 
-    def test_apply_failure(
-        self, mocker: MockerFixture, driver: SubprocessDriver
-    ) -> None:
+    def test_apply_failure(self, mocker: MockerFixture, driver: ProposalDriver) -> None:
         """Test apply failure returns error without polling."""
         mock_apply = mocker.patch.object(driver, "_apply")
         mock_apply.return_value = mocker.Mock(returncode=1, stderr="connection refused")
@@ -500,10 +491,10 @@ def test_apply_failure(
         assert error == "Failed to apply Proposal CR: connection refused"
         assert conv_id is None
 
-    def test_timeout(self, mocker: MockerFixture, driver: SubprocessDriver) -> None:
+    def test_timeout(self, mocker: MockerFixture, driver: ProposalDriver) -> None:
         """Test timeout returns error when no terminal condition reached."""
         mock_time = mocker.patch(f"{MODULE}.time")
-        mock_time.monotonic.side_effect = [0.0, 1.0, 11.0]
+        mock_time.monotonic.side_effect = [0.0, 0.0, 0.0, 1.0, 11.0]
 
         mock_apply = mocker.patch.object(driver, "_apply")
         mock_apply.return_value = mocker.Mock(returncode=0)
@@ -520,18 +511,16 @@ def test_timeout(self, mocker: MockerFixture, driver: SubprocessDriver) -> None:
         assert conv_id is None
         driver._cleanup.assert_called_once()
 
-    def test_auto_approve(
-        self, mocker: MockerFixture, driver: SubprocessDriver
-    ) -> None:
-        """Test auto-approve triggers on Analyzed=True."""
+    def test_auto_approve(self, mocker: MockerFixture, driver: ProposalDriver) -> None:
+        """Test auto-approve sends approval before polling."""
         mock_time = mocker.patch(f"{MODULE}.time")
-        mock_time.monotonic.side_effect = [0.0, 1.0, 2.0]
+        mock_time.monotonic.side_effect = [0.0, 0.0, 0.0, 1.0]
 
         mock_apply = mocker.patch.object(driver, "_apply")
         mock_apply.return_value = mocker.Mock(returncode=0)
 
-        poll_1: dict[str, Any] = {"conditions": [_cond("Analyzed", "True")]}
-        poll_2: dict[str, Any] = {
+        status_ready: dict[str, Any] = {"conditions": []}
+        status_terminal: dict[str, Any] = {
             "conditions": [
                 _cond("Analyzed", "True"),
                 _cond("Executed", "True"),
@@ -541,7 +530,10 @@ def test_auto_approve(
         mocker.patch.object(
             driver,
             "_get_status",
-            side_effect=[(poll_1, None), (poll_2, None)],
+            side_effect=[
+                (status_ready, None),
+                (status_terminal, None),
+            ],
         )
         mocker.patch.object(driver, "_cleanup")
 
@@ -552,7 +544,7 @@ def test_auto_approve(
         assert mock_apply.call_count == 2
 
     def test_auto_approve_disabled(
-        self, mocker: MockerFixture, driver: SubprocessDriver
+        self, mocker: MockerFixture, driver: ProposalDriver
     ) -> None:
         """Test auto-approve skipped when disabled."""
         driver._config.auto_approve = False
@@ -573,11 +565,11 @@ def test_auto_approve_disabled(
         assert mock_apply.call_count == 1
 
     def test_failed_terminal(
-        self, mocker: MockerFixture, driver: SubprocessDriver
+        self, mocker: MockerFixture, driver: ProposalDriver
     ) -> None:
         """Test failed proposal returns error with outcome."""
         mock_time = mocker.patch(f"{MODULE}.time")
-        mock_time.monotonic.side_effect = [0.0, 1.0]
+        mock_time.monotonic.side_effect = [0.0, 0.0, 0.0, 1.0]
 
         mock_apply = mocker.patch.object(driver, "_apply")
         mock_apply.return_value = mocker.Mock(returncode=0)
@@ -597,11 +589,11 @@ def test_failed_terminal(
         assert turn.response == "LLM error"
 
     def test_denied_terminal(
-        self, mocker: MockerFixture, driver: SubprocessDriver
+        self, mocker: MockerFixture, driver: ProposalDriver
     ) -> None:
         """Test denied proposal returns error."""
         mock_time = mocker.patch(f"{MODULE}.time")
-        mock_time.monotonic.side_effect = [0.0, 1.0]
+        mock_time.monotonic.side_effect = [0.0, 0.0, 0.0, 1.0]
 
         mock_apply = mocker.patch.object(driver, "_apply")
         mock_apply.return_value = mocker.Mock(returncode=0)
@@ -619,11 +611,11 @@ def test_denied_terminal(
         assert "Denied" in error
 
     def test_get_status_error(
-        self, mocker: MockerFixture, driver: SubprocessDriver
+        self, mocker: MockerFixture, driver: ProposalDriver
     ) -> None:
         """Test get_status error triggers cleanup and returns error."""
         mock_time = mocker.patch(f"{MODULE}.time")
-        mock_time.monotonic.side_effect = [0.0, 1.0]
+        mock_time.monotonic.side_effect = [0.0, 0.0, 0.0, 1.0]
 
         mock_apply = mocker.patch.object(driver, "_apply")
         mock_apply.return_value = mocker.Mock(returncode=0)
@@ -631,7 +623,10 @@ def test_get_status_error(
         mocker.patch.object(
             driver,
             "_get_status",
-            return_value=({}, "Failed to get status for 'eval-abcd1234'"),
+            side_effect=[
+                ({}, None),
+                ({}, "Failed to get status for 'eval-abcd1234'"),
+            ],
         )
         mocker.patch.object(driver, "_cleanup")
 
@@ -643,11 +638,11 @@ def test_get_status_error(
         driver._cleanup.assert_called_once()
 
     def test_conversation_id_in_cr_name(
-        self, mocker: MockerFixture, driver: SubprocessDriver
+        self, mocker: MockerFixture, driver: ProposalDriver
     ) -> None:
         """Test conversation_id is included in CR name."""
         mock_time = mocker.patch(f"{MODULE}.time")
-        mock_time.monotonic.side_effect = [0.0, 1.0]
+        mock_time.monotonic.side_effect = [0.0, 0.0, 0.0, 1.0]
 
         mock_apply = mocker.patch.object(driver, "_apply")
         mock_apply.return_value = mocker.Mock(returncode=0)

From d4dc8781ae8443cfc6b6c9cce2be3e058fb376bf Mon Sep 17 00:00:00 2001
From: rioloc <criolo@redhat.com>
Date: Tue, 26 May 2026 15:50:38 +0200
Subject: [PATCH 3/6] fix: address CodeRabbit review feedback on PR #232

- Narrow failure detection in _derive_phase to step conditions only
  (Analyzed/Executed/Verified) instead of all conditions
- Make by_type construction safe against missing 'type' keys
- Add namespace validation (min_length=1) to ProposalAgentConfig
- Gate CLI binary check behind driver enablement flag
- Narrow exception handling in _cleanup to SubprocessError/OSError
- Refactor _is_terminal to delegate to _derive_phase (dedup)
- Use explicit None check in AgentDriverRegistry init
- Make cleanup script deletions best-effort (|| true)
- Fix SCRIPT_DIR for fixture paths in setup script
- Align model default comment with runtime value (gpt-5.2)
- Use stringData manifest for secret creation (avoid process args)
- Remove module-level pylint disable in integration tests
- Align CLI prerequisite checks to oc-only
- Assert oc returncode before checking proposal cleanup
- Add TODO for pinning agentic-skills image to stable version

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../core/metrics/custom/proposal_eval.py      | 12 ++-
 .../core/models/agents.py                     |  7 +-
 .../pipeline/evaluation/driver.py             | 35 ++++----
 .../pipeline/evaluation/registry.py           |  2 +-
 .../cleanup_subprocess_fixtures-openai.sh     |  4 +-
 .../setup_subprocess_fixtures-openai.sh       | 24 ++++--
 .../test_evaluation_data_subprocess.yaml      |  2 +
 .../integration/test_subprocess_evaluation.py | 81 ++++++-------------
 .../evaluation/test_proposal_driver.py        |  2 +-
 9 files changed, 77 insertions(+), 92 deletions(-)

diff --git a/src/lightspeed_evaluation/core/metrics/custom/proposal_eval.py b/src/lightspeed_evaluation/core/metrics/custom/proposal_eval.py
index 874b8129..c29ee235 100644
--- a/src/lightspeed_evaluation/core/metrics/custom/proposal_eval.py
+++ b/src/lightspeed_evaluation/core/metrics/custom/proposal_eval.py
@@ -18,7 +18,7 @@ def _derive_phase(
     Returns:
         Phase string: Completed, Failed, Denied, Escalated, or InProgress.
     """
-    by_type = {c["type"]: c for c in conditions}
+    by_type = {c["type"]: c for c in conditions if isinstance(c, dict) and "type" in c}
 
     if by_type.get("Denied", {}).get("status") == "True":
         return "Denied"
@@ -26,7 +26,11 @@ def _derive_phase(
         return "Escalated"
 
     for c in conditions:
-        if c.get("status") == "False" and c.get("reason") != "RetryingExecution":
+        if (
+            c.get("type") in {"Analyzed", "Executed", "Verified"}
+            and c.get("status") == "False"
+            and c.get("reason") != "RetryingExecution"
+        ):
             return "Failed"
 
     step_to_condition = {"verification": "Verified", "execution": "Executed"}
@@ -89,7 +93,7 @@ def _check_conditions(
     if expected_conditions is None:
         return None
 
-    by_type = {c["type"]: c for c in conditions}
+    by_type = {c["type"]: c for c in conditions if isinstance(c, dict) and "type" in c}
 
     for exp_cond in expected_conditions:
         cond_type = exp_cond.get("type")
@@ -128,7 +132,7 @@ def _check_verification(
     if verification is None:
         return None
 
-    by_type = {c["type"]: c for c in conditions}
+    by_type = {c["type"]: c for c in conditions if isinstance(c, dict) and "type" in c}
     verified = by_type.get("Verified")
 
     if verified is None:
diff --git a/src/lightspeed_evaluation/core/models/agents.py b/src/lightspeed_evaluation/core/models/agents.py
index 2b621f89..4f4831bb 100644
--- a/src/lightspeed_evaluation/core/models/agents.py
+++ b/src/lightspeed_evaluation/core/models/agents.py
@@ -140,7 +140,12 @@ class ProposalAgentConfig(BaseModel):
     model_config = ConfigDict(extra="forbid")
 
     type: Literal["proposal"] = "proposal"
-    namespace: str
+    namespace: str = Field(
+        ...,
+        min_length=1,
+        pattern=r"\S+",
+        description="Kubernetes namespace containing Proposal resources",
+    )
     auto_approve: bool = True
     cleanup_proposals: bool = True
     timeout: int = Field(default=900, gt=0)
diff --git a/src/lightspeed_evaluation/pipeline/evaluation/driver.py b/src/lightspeed_evaluation/pipeline/evaluation/driver.py
index 35a15380..6f95d3eb 100644
--- a/src/lightspeed_evaluation/pipeline/evaluation/driver.py
+++ b/src/lightspeed_evaluation/pipeline/evaluation/driver.py
@@ -21,6 +21,9 @@
     ProposalAgentConfig,
     TurnData,
 )
+from lightspeed_evaluation.core.metrics.custom.proposal_eval import (
+    _derive_phase,
+)
 from lightspeed_evaluation.core.system.exceptions import ConfigurationError
 from lightspeed_evaluation.pipeline.evaluation.amender import APIDataAmender
 
@@ -144,7 +147,7 @@ def __init__(self, config: dict[str, Any], *, enabled: bool = True) -> None:
     def validate_config(self, config: dict[str, Any]) -> None:
         """Validate proposal driver configuration."""
         ProposalAgentConfig.model_validate(config)
-        if not shutil.which("oc") and not shutil.which("kubectl"):
+        if self._enabled and not shutil.which("oc") and not shutil.which("kubectl"):
             raise ConfigurationError("Neither 'oc' nor 'kubectl' found on PATH")
 
     def execute_turn(
@@ -281,8 +284,8 @@ def _cleanup(self, cr_name: str) -> None:
         try:
             self._delete(cr_name)
             logger.info("Cleaned up Proposal CR '%s'", cr_name)
-        except Exception:  # pylint: disable=broad-exception-caught
-            logger.warning("Failed to clean up Proposal CR '%s'", cr_name)
+        except (subprocess.SubprocessError, OSError) as exc:
+            logger.warning("Failed to clean up Proposal CR '%s': %s", cr_name, exc)
 
     def _build_proposal_cr(self, turn_data: TurnData, cr_name: str) -> dict[str, Any]:
         """Build Proposal CR manifest from TurnData."""
@@ -372,28 +375,20 @@ def _approve_when_ready(
             return f"Failed to apply ProposalApproval: {result.stderr.strip()}"
         return None
 
+    _PHASE_TO_OUTCOME: dict[str, TerminalOutcome] = {
+        "Completed": TerminalOutcome.COMPLETED,
+        "Failed": TerminalOutcome.FAILED,
+        "Denied": TerminalOutcome.DENIED,
+        "Escalated": TerminalOutcome.ESCALATED,
+    }
+
     @staticmethod
     def _is_terminal(
         conditions: list[dict[str, Any]], proposal_spec: dict[str, Any]
     ) -> Optional[TerminalOutcome]:
         """Check if conditions indicate a terminal state."""
-        by_type = {c["type"]: c for c in conditions}
-        if by_type.get("Denied", {}).get("status") == "True":
-            return TerminalOutcome.DENIED
-        if by_type.get("Escalated", {}).get("status") == "True":
-            return TerminalOutcome.ESCALATED
-        for c in conditions:
-            if c.get("status") == "False" and c.get("reason") != "RetryingExecution":
-                return TerminalOutcome.FAILED
-        if "verification" in proposal_spec:
-            last = "Verified"
-        elif "execution" in proposal_spec:
-            last = "Executed"
-        else:
-            last = "Analyzed"
-        if by_type.get(last, {}).get("status") == "True":
-            return TerminalOutcome.COMPLETED
-        return None
+        phase = _derive_phase(conditions, proposal_spec or None)
+        return ProposalDriver._PHASE_TO_OUTCOME.get(phase)
 
     @staticmethod
     def _extract_summary(status_dict: dict[str, Any]) -> str:
diff --git a/src/lightspeed_evaluation/pipeline/evaluation/registry.py b/src/lightspeed_evaluation/pipeline/evaluation/registry.py
index 96c0e867..0dd97453 100644
--- a/src/lightspeed_evaluation/pipeline/evaluation/registry.py
+++ b/src/lightspeed_evaluation/pipeline/evaluation/registry.py
@@ -22,7 +22,7 @@ class AgentDriverRegistry:  # pylint: disable=too-few-public-methods
 
     def __init__(self, drivers: Optional[dict[str, type[AgentDriver]]] = None) -> None:
         """Initialize the driver registry."""
-        self._drivers = drivers or AGENT_DRIVERS
+        self._drivers = AGENT_DRIVERS if drivers is None else drivers
 
     def create_driver(
         self, agent_config: dict[str, Any], *, enabled: bool = True
diff --git a/tests/integration/agentic/scripts/cleanup_subprocess_fixtures-openai.sh b/tests/integration/agentic/scripts/cleanup_subprocess_fixtures-openai.sh
index ad4b77b7..2cb308b9 100755
--- a/tests/integration/agentic/scripts/cleanup_subprocess_fixtures-openai.sh
+++ b/tests/integration/agentic/scripts/cleanup_subprocess_fixtures-openai.sh
@@ -12,8 +12,8 @@ echo "Cleaning up integration test resources..."
 
 # Delete test workload + any leftover Proposals in test namespace
 oc delete deployment oomkill-demo -n "$TEST_NS" --ignore-not-found
-oc delete proposals --all -n "$TEST_NS" --ignore-not-found
-oc delete proposalapprovals --all -n "$TEST_NS" --ignore-not-found
+oc delete proposals --all -n "$TEST_NS" --ignore-not-found || true
+oc delete proposalapprovals --all -n "$TEST_NS" --ignore-not-found || true
 
 # Delete prefixed operator resources (reverse order of creation)
 oc delete sandboxtemplate eval-lightspeed-agent -n "$OPERATOR_NS" --ignore-not-found
diff --git a/tests/integration/agentic/scripts/setup_subprocess_fixtures-openai.sh b/tests/integration/agentic/scripts/setup_subprocess_fixtures-openai.sh
index 0ed8f523..dea157bd 100755
--- a/tests/integration/agentic/scripts/setup_subprocess_fixtures-openai.sh
+++ b/tests/integration/agentic/scripts/setup_subprocess_fixtures-openai.sh
@@ -11,7 +11,7 @@ set -euo pipefail
 #   SANDBOX_IMAGE    — sandbox container image URL
 #
 # Optional env vars:
-#   AGENT_MODEL      — default: gpt-4o
+#   AGENT_MODEL      — default: gpt-5.2
 #
 # To test with Claude via Vertex AI instead, use
 # setup_subprocess_fixtures-claude-vertex.sh and set the corresponding env vars
@@ -21,6 +21,8 @@ OPERATOR_NS="openshift-lightspeed"
 TEST_NS="lightspeed-evaluation-test"
 AGENT_MODEL="${AGENT_MODEL:-gpt-5.2}"
 
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+
 for var in OPENAI_API_KEY SANDBOX_IMAGE; do
   if [ -z "${!var:-}" ]; then
     echo "ERROR: $var is not set" >&2
@@ -29,17 +31,23 @@ for var in OPENAI_API_KEY SANDBOX_IMAGE; do
 done
 
 # 1. Test namespace + OOMKill workload (static fixtures, already test-scoped)
-oc apply -f ../fixtures/namespace.yaml
-oc apply -f ../fixtures/oomkill-demo.yaml
+oc apply -f "$SCRIPT_DIR/../fixtures/namespace.yaml"
+oc apply -f "$SCRIPT_DIR/../fixtures/oomkill-demo.yaml"
 
 # 2. Secret (OpenAI API key + provider override) — prefixed name in operator namespace
 # LIGHTSPEED_AGENT_PROVIDER and OPENAI_MODEL are injected via envFrom so the
 # sandbox picks the OpenAI provider instead of defaulting to claude.
-oc create secret generic eval-llm-credentials \
-  --from-literal=OPENAI_API_KEY="$OPENAI_API_KEY" \
-  --from-literal=LIGHTSPEED_AGENT_PROVIDER="openai" \
-  --from-literal=OPENAI_MODEL="$AGENT_MODEL" \
-  -n "$OPERATOR_NS" --dry-run=client -o yaml | oc apply -f -
+cat <<EOF | oc apply -n "$OPERATOR_NS" -f -
+apiVersion: v1
+kind: Secret
+metadata:
+  name: eval-llm-credentials
+type: Opaque
+stringData:
+  OPENAI_API_KEY: ${OPENAI_API_KEY}
+  LIGHTSPEED_AGENT_PROVIDER: openai
+  OPENAI_MODEL: ${AGENT_MODEL}
+EOF
 
 # 3. LLMProvider — references prefixed secret
 cat <<EOF | oc apply -f -
diff --git a/tests/integration/test_evaluation_data_subprocess.yaml b/tests/integration/test_evaluation_data_subprocess.yaml
index 70eebb75..66d571e3 100644
--- a/tests/integration/test_evaluation_data_subprocess.yaml
+++ b/tests/integration/test_evaluation_data_subprocess.yaml
@@ -17,6 +17,7 @@
           - lightspeed-evaluation-test
         tools:
           skills:
+            # TODO(rioloc): replace with a stable image
             - image: quay.io/harpatil/agentic-skills:latest
               paths:
                 - /skills/find-token
@@ -50,6 +51,7 @@
           - lightspeed-evaluation-test
         tools:
           skills:
+            # TODO(rioloc): replace with a stable image
             - image: quay.io/harpatil/agentic-skills:latest
               paths:
                 - /skills/find-token
diff --git a/tests/integration/test_subprocess_evaluation.py b/tests/integration/test_subprocess_evaluation.py
index 61f8f24f..c3415338 100644
--- a/tests/integration/test_subprocess_evaluation.py
+++ b/tests/integration/test_subprocess_evaluation.py
@@ -1,4 +1,3 @@
-# pylint: disable=redefined-outer-name,too-many-arguments,too-many-positional-arguments,import-outside-toplevel
 """Integration tests for ProposalDriver-based evaluation.
 
 These tests run the evaluation pipeline against a live OpenShift cluster
@@ -26,11 +25,12 @@
 
 from lightspeed_evaluation import ConfigLoader, evaluate
 from lightspeed_evaluation.core.storage import FileBackendConfig
+from lightspeed_evaluation.core.system import DataValidator
 
 
 def check_cli_available() -> bool:
-    """Check if oc or kubectl CLI is available."""
-    return bool(shutil.which("oc") or shutil.which("kubectl"))
+    """Check if oc CLI is available."""
+    return bool(shutil.which("oc"))
 
 
 def check_cluster_reachable() -> bool:
@@ -70,31 +70,19 @@ def check_env_vars_set() -> bool:
 
 pytestmark = pytest.mark.agentic
 
-
-@pytest.fixture
-def integration_test_dir() -> Path:
-    """Get the integration test directory path."""
-    return Path(__file__).parent
-
-
-@pytest.fixture
-def subprocess_config_path(integration_test_dir: Path) -> Path:
-    """Get path to subprocess agent system config file."""
-    return integration_test_dir / "system-config-agents-subprocess.yaml"
-
-
-@pytest.fixture
-def subprocess_eval_data_path(integration_test_dir: Path) -> Path:
-    """Get path to subprocess evaluation data file."""
-    return integration_test_dir / "test_evaluation_data_subprocess.yaml"
+INTEGRATION_TEST_DIR = Path(__file__).parent
+SUBPROCESS_CONFIG_PATH = INTEGRATION_TEST_DIR / "system-config-agents-subprocess.yaml"
+SUBPROCESS_EVAL_DATA_PATH = (
+    INTEGRATION_TEST_DIR / "test_evaluation_data_subprocess.yaml"
+)
 
 
 class TestProposalPrerequisites:
     """Verify prerequisites for subprocess integration tests."""
 
     def test_cli_available(self) -> None:
-        """Verify that oc or kubectl CLI is available."""
-        assert check_cli_available(), "oc or kubectl CLI must be installed and in PATH"
+        """Verify that oc CLI is available."""
+        assert check_cli_available(), "oc CLI must be installed and in PATH"
 
     def test_cluster_reachable(self) -> None:
         """Verify that the cluster is reachable."""
@@ -117,12 +105,7 @@ class TestProposalDriverEvaluation:
     """End-to-end tests for ProposalDriver evaluation pipeline."""
 
     @pytest.mark.timeout(1200)
-    def test_full_lifecycle(
-        self,
-        subprocess_config_path: Path,
-        subprocess_eval_data_path: Path,
-        tmp_path: Path,
-    ) -> None:
+    def test_full_lifecycle(self, tmp_path: Path) -> None:
         """Test full Proposal lifecycle: analysis, execution, verification.
 
         Verifies:
@@ -134,18 +117,16 @@ def test_full_lifecycle(
         - Cleanup script removes test resources
         """
         loader = ConfigLoader()
-        system_config = loader.load_system_config(str(subprocess_config_path))
+        system_config = loader.load_system_config(str(SUBPROCESS_CONFIG_PATH))
         system_config.storage = [
             FileBackendConfig(output_dir=str(tmp_path / "eval_output"))
         ]
 
-        from lightspeed_evaluation.core.system import DataValidator
-
         validator = DataValidator(
             api_enabled=True,
             fail_on_invalid_data=system_config.core.fail_on_invalid_data,
         )
-        all_data = validator.load_evaluation_data(str(subprocess_eval_data_path))
+        all_data = validator.load_evaluation_data(str(SUBPROCESS_EVAL_DATA_PATH))
         eval_data = [
             d
             for d in all_data
@@ -174,12 +155,7 @@ def test_full_lifecycle(
         ), "Analyzed condition should be True"
 
     @pytest.mark.timeout(600)
-    def test_analysis_only(
-        self,
-        subprocess_config_path: Path,
-        subprocess_eval_data_path: Path,
-        tmp_path: Path,
-    ) -> None:
+    def test_analysis_only(self, tmp_path: Path) -> None:
         """Test analysis-only Proposal (no execution or verification).
 
         Verifies:
@@ -187,18 +163,16 @@ def test_analysis_only(
         - No Executed or Verified conditions present
         """
         loader = ConfigLoader()
-        system_config = loader.load_system_config(str(subprocess_config_path))
+        system_config = loader.load_system_config(str(SUBPROCESS_CONFIG_PATH))
         system_config.storage = [
             FileBackendConfig(output_dir=str(tmp_path / "eval_output"))
         ]
 
-        from lightspeed_evaluation.core.system import DataValidator
-
         validator = DataValidator(
             api_enabled=True,
             fail_on_invalid_data=system_config.core.fail_on_invalid_data,
         )
-        all_data = validator.load_evaluation_data(str(subprocess_eval_data_path))
+        all_data = validator.load_evaluation_data(str(SUBPROCESS_EVAL_DATA_PATH))
         eval_data = [
             d for d in all_data if d.conversation_group_id == "subprocess_analysis_only"
         ]
@@ -232,12 +206,7 @@ def test_analysis_only(
             )
 
     @pytest.mark.timeout(120)
-    def test_timeout_handling(
-        self,
-        subprocess_config_path: Path,
-        subprocess_eval_data_path: Path,
-        tmp_path: Path,
-    ) -> None:
+    def test_timeout_handling(self, tmp_path: Path) -> None:
         """Test that a very short timeout is handled gracefully.
 
         Verifies:
@@ -245,7 +214,7 @@ def test_timeout_handling(
         - Proposal CRs are cleaned up after timeout
         """
         loader = ConfigLoader()
-        system_config = loader.load_system_config(str(subprocess_config_path))
+        system_config = loader.load_system_config(str(SUBPROCESS_CONFIG_PATH))
         system_config.storage = [
             FileBackendConfig(output_dir=str(tmp_path / "eval_output"))
         ]
@@ -255,13 +224,11 @@ def test_timeout_handling(
         agent_cfg.timeout = 5
         agent_cfg.poll_interval = 1
 
-        from lightspeed_evaluation.core.system import DataValidator
-
         validator = DataValidator(
             api_enabled=True,
             fail_on_invalid_data=system_config.core.fail_on_invalid_data,
         )
-        all_data = validator.load_evaluation_data(str(subprocess_eval_data_path))
+        all_data = validator.load_evaluation_data(str(SUBPROCESS_EVAL_DATA_PATH))
         eval_data = [
             d for d in all_data if d.conversation_group_id == "subprocess_analysis_only"
         ]
@@ -283,11 +250,15 @@ def test_timeout_handling(
             timeout=10,
             check=False,
         )
+        assert result.returncode == 0, (
+            "Failed to list proposals during timeout cleanup validation: "
+            f"{result.stderr.strip()}"
+        )
         lines = [
             line
             for line in result.stdout.strip().splitlines()
             if line.startswith("proposal.agentic.openshift.io/eval-")
         ]
-        assert len(lines) == 0, (
-            f"Proposal CRs should be cleaned up after timeout, " f"but found: {lines}"
-        )
+        assert (
+            len(lines) == 0
+        ), f"Proposal CRs should be cleaned up after timeout, but found: {lines}"
diff --git a/tests/unit/pipeline/evaluation/test_proposal_driver.py b/tests/unit/pipeline/evaluation/test_proposal_driver.py
index 7992f27a..32d5fb22 100644
--- a/tests/unit/pipeline/evaluation/test_proposal_driver.py
+++ b/tests/unit/pipeline/evaluation/test_proposal_driver.py
@@ -430,7 +430,7 @@ def test_cleanup_failure_logged(self, mocker: MockerFixture) -> None:
         """Test cleanup logs warning on _delete failure."""
         mocker.patch(f"{MODULE}.shutil").which.return_value = "/usr/bin/oc"
         driver = ProposalDriver(VALID_CONFIG)
-        mocker.patch.object(driver, "_delete", side_effect=Exception("boom"))
+        mocker.patch.object(driver, "_delete", side_effect=OSError("boom"))
         mock_logger = mocker.patch(f"{MODULE}.logger")
 
         driver._cleanup("eval-test")

From 41a786febf39f3f400a288f43080aee1508ee28a Mon Sep 17 00:00:00 2001
From: rioloc <criolo@redhat.com>
Date: Tue, 26 May 2026 16:24:24 +0200
Subject: [PATCH 4/6] feat: make query optional if proposal_spec.request is
 provided

TurnData.query is now Optional with a model_validator that auto-populates
it from proposal_spec["request"] when absent, removing the need to
duplicate the query text in both fields for CRD-based agent workflows.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../core/metrics/custom/proposal_eval.py      |  2 +-
 .../core/metrics/ragas.py                     |  2 +-
 src/lightspeed_evaluation/core/models/data.py | 24 ++++++++++++--
 .../pipeline/evaluation/amender.py            |  2 +-
 .../pipeline/evaluation/errors.py             |  6 ++--
 .../pipeline/evaluation/evaluator.py          | 18 +++++------
 .../pipeline/evaluation/processor.py          |  2 +-
 .../test_evaluation_data_subprocess.yaml      | 16 +++++-----
 tests/unit/core/models/test_data.py           | 32 +++++++++++++++++++
 9 files changed, 78 insertions(+), 26 deletions(-)

diff --git a/src/lightspeed_evaluation/core/metrics/custom/proposal_eval.py b/src/lightspeed_evaluation/core/metrics/custom/proposal_eval.py
index c29ee235..bbb3cabe 100644
--- a/src/lightspeed_evaluation/core/metrics/custom/proposal_eval.py
+++ b/src/lightspeed_evaluation/core/metrics/custom/proposal_eval.py
@@ -26,7 +26,7 @@ def _derive_phase(
         return "Escalated"
 
     for c in conditions:
-        if (
+        if isinstance(c, dict) and (
             c.get("type") in {"Analyzed", "Executed", "Verified"}
             and c.get("status") == "False"
             and c.get("reason") != "RetryingExecution"
diff --git a/src/lightspeed_evaluation/core/metrics/ragas.py b/src/lightspeed_evaluation/core/metrics/ragas.py
index 613943b5..0f8fd280 100644
--- a/src/lightspeed_evaluation/core/metrics/ragas.py
+++ b/src/lightspeed_evaluation/core/metrics/ragas.py
@@ -92,7 +92,7 @@ def _extract_turn_data(
         if turn_data is None:
             return "", "", []
 
-        query = turn_data.query
+        query = turn_data.query or ""
         response = turn_data.response
         contexts = [
             ctx.get("content", str(ctx)) if isinstance(ctx, dict) else str(ctx)
diff --git a/src/lightspeed_evaluation/core/models/data.py b/src/lightspeed_evaluation/core/models/data.py
index cc4489ee..c45c1317 100644
--- a/src/lightspeed_evaluation/core/models/data.py
+++ b/src/lightspeed_evaluation/core/models/data.py
@@ -4,7 +4,7 @@
 from pathlib import Path
 from typing import Any, Optional, Union
 
-from pydantic import BaseModel, ConfigDict, Field, field_validator
+from pydantic import BaseModel, ConfigDict, Field, field_validator, model_validator
 
 from lightspeed_evaluation.core.constants import SUPPORTED_RESULT_STATUSES
 from lightspeed_evaluation.core.models.mixins import StreamingMetricsMixin
@@ -39,7 +39,11 @@ class TurnData(StreamingMetricsMixin):
     model_config = ConfigDict(extra="forbid")
 
     turn_id: str = Field(..., min_length=1, description="Turn ID (alphanumeric)")
-    query: str = Field(..., min_length=1, description="Query")
+    query: Optional[str] = Field(
+        default=None,
+        min_length=1,
+        description="Query — auto-populated from proposal_spec.request when absent",
+    )
     attachments: Optional[list[str]] = Field(
         default=None, min_length=0, description="Attachments"
     )
@@ -130,6 +134,22 @@ def is_metric_invalid(self, metric: str) -> bool:
         """Returns True if the metric didn't pass the validation."""
         return metric in self._invalid_metrics
 
+    @model_validator(mode="after")
+    def populate_query_from_proposal_spec(self) -> "TurnData":
+        """Auto-populate query from proposal_spec.request when absent."""
+        if self.query is not None:
+            return self
+        if self.proposal_spec is not None:
+            request = self.proposal_spec.get("request")
+            if isinstance(request, str) and request.strip():
+                self.query = request
+                return self
+            raise ValueError(
+                "proposal_spec must contain a non-empty 'request' "
+                "when query is not provided"
+            )
+        raise ValueError("query is required when proposal_spec is not provided")
+
     @field_validator("turn_metrics")
     @classmethod
     def validate_turn_metrics(cls, v: Optional[list[str]]) -> Optional[list[str]]:
diff --git a/src/lightspeed_evaluation/pipeline/evaluation/amender.py b/src/lightspeed_evaluation/pipeline/evaluation/amender.py
index 21d2b22e..f904a742 100644
--- a/src/lightspeed_evaluation/pipeline/evaluation/amender.py
+++ b/src/lightspeed_evaluation/pipeline/evaluation/amender.py
@@ -41,7 +41,7 @@ def amend_single_turn(
         api_start_time = time.perf_counter()
         try:
             api_response = self.api_client.query(
-                query=turn_data.query,
+                query=turn_data.query or "",
                 conversation_id=conversation_id,
                 attachments=turn_data.attachments,
                 extra_request_params=turn_data.extra_request_params,
diff --git a/src/lightspeed_evaluation/pipeline/evaluation/errors.py b/src/lightspeed_evaluation/pipeline/evaluation/errors.py
index 99536961..5a352906 100644
--- a/src/lightspeed_evaluation/pipeline/evaluation/errors.py
+++ b/src/lightspeed_evaluation/pipeline/evaluation/errors.py
@@ -130,7 +130,7 @@ def mark_all_metrics_as_error(
                         error_reason,
                         tag=conv_data.tag,
                         turn_id=turn_data.turn_id,
-                        query=turn_data.query,
+                        query=turn_data.query or "",
                     )
                 )
 
@@ -181,7 +181,7 @@ def mark_turn_metrics_as_error(  # pylint: disable=too-many-arguments,too-many-p
                 error_reason,
                 tag=conv_data.tag,
                 turn_id=turn_data.turn_id,
-                query=turn_data.query,
+                query=turn_data.query or "",
             )
             for metric_id in turn_metrics
         ]
@@ -224,7 +224,7 @@ def _mark_cascade(  # pylint: disable=too-many-arguments,too-many-positional-arg
                         result_status,
                         tag=conv_data.tag,
                         turn_id=turn_data.turn_id,
-                        query=turn_data.query,
+                        query=turn_data.query or "",
                     )
                 )
 
diff --git a/src/lightspeed_evaluation/pipeline/evaluation/evaluator.py b/src/lightspeed_evaluation/pipeline/evaluation/evaluator.py
index f091266b..091e6516 100644
--- a/src/lightspeed_evaluation/pipeline/evaluation/evaluator.py
+++ b/src/lightspeed_evaluation/pipeline/evaluation/evaluator.py
@@ -5,9 +5,13 @@
 import time
 from typing import Any, Optional
 
+from lightspeed_evaluation.core.constants import (
+    DEFAULT_METRIC_THRESHOLD,
+    NON_LLM_FRAMEWORKS,
+)
 from lightspeed_evaluation.core.embedding.manager import EmbeddingManager
-from lightspeed_evaluation.core.llm.token_tracker import TokenTracker
 from lightspeed_evaluation.core.llm.manager import LLMManager
+from lightspeed_evaluation.core.llm.token_tracker import TokenTracker
 from lightspeed_evaluation.core.metrics.custom import CustomMetrics
 from lightspeed_evaluation.core.metrics.deepeval import DeepEvalMetrics
 from lightspeed_evaluation.core.metrics.manager import MetricLevel, MetricManager
@@ -30,10 +34,6 @@
     METRIC_REQUIREMENTS,
     check_metric_required_data,
 )
-from lightspeed_evaluation.core.constants import (
-    DEFAULT_METRIC_THRESHOLD,
-    NON_LLM_FRAMEWORKS,
-)
 from lightspeed_evaluation.pipeline.evaluation.judges import JudgeOrchestrator
 
 logger = logging.getLogger(__name__)
@@ -241,8 +241,8 @@ def evaluate_metric(  # pylint: disable=too-many-locals
                 turn_id=request.turn_id,
                 metric_identifier=request.metric_identifier,
                 metric_metadata=self._extract_metadata_for_csv(request),
-                query=turn_data.query if turn_data else "",
-                response=turn_data.response or "" if turn_data else "",
+                query=(turn_data.query or "") if turn_data else "",
+                response=(turn_data.response or "") if turn_data else "",
                 evaluation_latency=evaluation_latency,
                 agent_latency=agent_latency,
                 execution_time=evaluation_latency + agent_latency,
@@ -742,9 +742,9 @@ def _create_error_result(
             score=None,
             threshold=None,
             reason=reason,
-            query=turn_data.query if turn_data else "",
-            response=turn_data.response or "" if turn_data else "",
             evaluation_latency=evaluation_latency,
+            query=(turn_data.query or "") if turn_data else "",
+            response=(turn_data.response or "") if turn_data else "",
             agent_latency=agent_latency,
             execution_time=evaluation_latency + agent_latency,
             api_input_tokens=api_input_tokens,
diff --git a/src/lightspeed_evaluation/pipeline/evaluation/processor.py b/src/lightspeed_evaluation/pipeline/evaluation/processor.py
index b2399d9a..4f4adaab 100644
--- a/src/lightspeed_evaluation/pipeline/evaluation/processor.py
+++ b/src/lightspeed_evaluation/pipeline/evaluation/processor.py
@@ -284,7 +284,7 @@ def _evaluate_turn(
                         metric_identifier,
                         error_reason,
                         turn_id=turn_data.turn_id,
-                        query=turn_data.query,
+                        query=turn_data.query or "",
                     )
                 )
                 continue
diff --git a/tests/integration/test_evaluation_data_subprocess.yaml b/tests/integration/test_evaluation_data_subprocess.yaml
index 66d571e3..845a677d 100644
--- a/tests/integration/test_evaluation_data_subprocess.yaml
+++ b/tests/integration/test_evaluation_data_subprocess.yaml
@@ -7,12 +7,12 @@
   conversation_metrics_metadata: {}
   turns:
     - turn_id: turn_1
-      query: >-
-        A pod named oomkill-demo in namespace lightspeed-evaluation-test
-        is in CrashLoopBackOff due to OOMKill. Analyze the root cause,
-        fix the memory configuration, and verify the fix.
       response: null
       proposal_spec:
+        request: >-
+          A pod named oomkill-demo in namespace lightspeed-evaluation-test
+          is in CrashLoopBackOff due to OOMKill. Analyze the root cause,
+          fix the memory configuration, and verify the fix.
         targetNamespaces:
           - lightspeed-evaluation-test
         tools:
@@ -34,7 +34,7 @@
       turn_metrics_metadata: {}
 
 - conversation_group_id: subprocess_analysis_only
-  description: Analysis-only — no execution or verification
+  description: Analysis-only — no execution or verification (query from proposal_spec.request)
   tag: subprocess
   setup_script: agentic/scripts/setup_subprocess_fixtures-openai.sh
   cleanup_script: agentic/scripts/cleanup_subprocess_fixtures-openai.sh
@@ -42,11 +42,11 @@
   conversation_metrics_metadata: {}
   turns:
     - turn_id: turn_1
-      query: >-
-        A pod named oomkill-demo in namespace lightspeed-evaluation-test
-        is in CrashLoopBackOff. Analyze the root cause.
       response: null
       proposal_spec:
+        request: >-
+          A pod named oomkill-demo in namespace lightspeed-evaluation-test
+          is in CrashLoopBackOff. Analyze the root cause.
         targetNamespaces:
           - lightspeed-evaluation-test
         tools:
diff --git a/tests/unit/core/models/test_data.py b/tests/unit/core/models/test_data.py
index 10c882fa..c438b923 100644
--- a/tests/unit/core/models/test_data.py
+++ b/tests/unit/core/models/test_data.py
@@ -34,6 +34,38 @@ def test_empty_query_fails(self) -> None:
         with pytest.raises(ValidationError):
             TurnData(turn_id="turn1", query="")
 
+    def test_query_populated_from_proposal_spec_request(self) -> None:
+        """Test query is auto-populated from proposal_spec.request."""
+        turn = TurnData(
+            turn_id="turn1",
+            proposal_spec={"request": "Fix the OOM issue"},
+        )
+        assert turn.query == "Fix the OOM issue"
+
+    def test_query_preserved_when_both_provided(self) -> None:
+        """Test explicit query is not overridden by proposal_spec.request."""
+        turn = TurnData(
+            turn_id="turn1",
+            query="Explicit query",
+            proposal_spec={"request": "Spec request"},
+        )
+        assert turn.query == "Explicit query"
+
+    def test_no_query_no_proposal_spec_fails(self) -> None:
+        """Test that missing query without proposal_spec raises."""
+        with pytest.raises(ValidationError, match="query is required"):
+            TurnData(turn_id="turn1")
+
+    def test_no_query_proposal_spec_missing_request_fails(self) -> None:
+        """Test that proposal_spec without request key raises."""
+        with pytest.raises(ValidationError, match="proposal_spec must contain"):
+            TurnData(turn_id="turn1", proposal_spec={"analysis": {}})
+
+    def test_no_query_proposal_spec_empty_request_fails(self) -> None:
+        """Test that proposal_spec with empty request raises."""
+        with pytest.raises(ValidationError, match="proposal_spec must contain"):
+            TurnData(turn_id="turn1", proposal_spec={"request": ""})
+
     def test_turn_metrics_metadata_accepted_at_load(self) -> None:
         """Override dict is accepted at load; GEval is validated at eval time when resolved."""
         turn = TurnData(

From eed9a7d67b0fa824e03102c3bebaefc5349e1753 Mon Sep 17 00:00:00 2001
From: rioloc <criolo@redhat.com>
Date: Wed, 27 May 2026 21:33:00 +0200
Subject: [PATCH 5/6] fix: remove redundant validation and make CLI timeout
 configurable

Address review feedback from Asutosh on PR #232:
- Remove duplicate ProposalAgentConfig.model_validate() in __init__
  (base class already calls validate_config which does the same)
- Fix validate_config return type to properly populate self._config
- Add cli_timeout field to ProposalAgentConfig (default 30s) and
  replace hardcoded CLI_COMMAND_TIMEOUT constant

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/lightspeed_evaluation/core/models/agents.py        |  1 +
 .../pipeline/evaluation/driver.py                      | 10 ++++------
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/src/lightspeed_evaluation/core/models/agents.py b/src/lightspeed_evaluation/core/models/agents.py
index 4f4831bb..5321178d 100644
--- a/src/lightspeed_evaluation/core/models/agents.py
+++ b/src/lightspeed_evaluation/core/models/agents.py
@@ -149,6 +149,7 @@ class ProposalAgentConfig(BaseModel):
     auto_approve: bool = True
     cleanup_proposals: bool = True
     timeout: int = Field(default=900, gt=0)
+    cli_timeout: int = Field(default=30, gt=0)
     poll_interval: int = Field(default=2, gt=0)
 
 
diff --git a/src/lightspeed_evaluation/pipeline/evaluation/driver.py b/src/lightspeed_evaluation/pipeline/evaluation/driver.py
index 6f95d3eb..6ab562b0 100644
--- a/src/lightspeed_evaluation/pipeline/evaluation/driver.py
+++ b/src/lightspeed_evaluation/pipeline/evaluation/driver.py
@@ -132,8 +132,6 @@ class TerminalOutcome(StrEnum):
 CRD_PLURAL = "proposals"
 CRD_API_VERSION = f"{CRD_GROUP}/{CRD_VERSION}"
 
-CLI_COMMAND_TIMEOUT = 30
-
 
 class ProposalDriver(AgentDriver):
     """Driver that manages Proposal CR lifecycle via oc/kubectl CLI."""
@@ -141,14 +139,14 @@ class ProposalDriver(AgentDriver):
     def __init__(self, config: dict[str, Any], *, enabled: bool = True) -> None:
         """Initialize the proposal driver."""
         super().__init__(config, enabled=enabled)
-        self._config = ProposalAgentConfig.model_validate(config)
         self._cli = self._resolve_cli()
 
-    def validate_config(self, config: dict[str, Any]) -> None:
+    def validate_config(self, config: dict[str, Any]) -> ProposalAgentConfig:
         """Validate proposal driver configuration."""
-        ProposalAgentConfig.model_validate(config)
+        parsed = ProposalAgentConfig.model_validate(config)
         if self._enabled and not shutil.which("oc") and not shutil.which("kubectl"):
             raise ConfigurationError("Neither 'oc' nor 'kubectl' found on PATH")
+        return parsed
 
     def execute_turn(
         self, turn_data: TurnData, conversation_id: Optional[str] = None
@@ -235,7 +233,7 @@ def _run_cli(
             text=True,
             capture_output=True,
             env=os.environ.copy(),
-            timeout=CLI_COMMAND_TIMEOUT,
+            timeout=self._config.cli_timeout,
             check=False,
         )
 

From 15b40209f0d2bd3edf0c791abd22450e26b18d49 Mon Sep 17 00:00:00 2001
From: rioloc <criolo@redhat.com>
Date: Wed, 27 May 2026 21:40:40 +0200
Subject: [PATCH 6/6] =?UTF-8?q?refactor:=20rename=20subprocess=20=E2=86=92?=
 =?UTF-8?q?=20proposal=20in=20integration=20test=20files?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Align test file names and internal references with the ProposalDriver
terminology after the SubprocessDriver rename.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 src/lightspeed_evaluation/core/models/data.py |  2 +-
 .../agentic/fixtures/oomkill-demo.yaml        |  8 ++++-
 ...leanup_proposal_fixtures-claude-vertex.sh} |  0
 ...sh => cleanup_proposal_fixtures-openai.sh} |  9 ++---
 ... setup_proposal_fixtures-claude-vertex.sh} |  6 ++--
 ...i.sh => setup_proposal_fixtures-openai.sh} |  2 +-
 ...aml => system-config-agents-proposal.yaml} |  0
 ...aml => test_evaluation_data_proposal.yaml} | 16 ++++-----
 ...luation.py => test_proposal_evaluation.py} | 36 +++++++++----------
 9 files changed, 39 insertions(+), 40 deletions(-)
 rename tests/integration/agentic/scripts/{cleanup_subprocess_fixtures-claude-vertex.sh => cleanup_proposal_fixtures-claude-vertex.sh} (100%)
 rename tests/integration/agentic/scripts/{cleanup_subprocess_fixtures-openai.sh => cleanup_proposal_fixtures-openai.sh} (63%)
 rename tests/integration/agentic/scripts/{setup_subprocess_fixtures-claude-vertex.sh => setup_proposal_fixtures-claude-vertex.sh} (95%)
 rename tests/integration/agentic/scripts/{setup_subprocess_fixtures-openai.sh => setup_proposal_fixtures-openai.sh} (97%)
 rename tests/integration/{system-config-agents-subprocess.yaml => system-config-agents-proposal.yaml} (100%)
 rename tests/integration/{test_evaluation_data_subprocess.yaml => test_evaluation_data_proposal.yaml} (81%)
 rename tests/integration/{test_subprocess_evaluation.py => test_proposal_evaluation.py} (85%)

diff --git a/src/lightspeed_evaluation/core/models/data.py b/src/lightspeed_evaluation/core/models/data.py
index c45c1317..8c16c9ad 100644
--- a/src/lightspeed_evaluation/core/models/data.py
+++ b/src/lightspeed_evaluation/core/models/data.py
@@ -108,7 +108,7 @@ class TurnData(StreamingMetricsMixin):
         default=None, description="Path to verify script for script-based evaluation"
     )
 
-    # Subprocess driver fields
+    # Proposal driver fields
     description: Optional[str] = Field(
         default=None, description="Human-readable label for reports"
     )
diff --git a/tests/integration/agentic/fixtures/oomkill-demo.yaml b/tests/integration/agentic/fixtures/oomkill-demo.yaml
index 28420036..4003be9b 100644
--- a/tests/integration/agentic/fixtures/oomkill-demo.yaml
+++ b/tests/integration/agentic/fixtures/oomkill-demo.yaml
@@ -16,7 +16,13 @@ spec:
       containers:
         - name: stress
           image: polinux/stress-ng
-          command: ["stress-ng", "--vm", "1", "--vm-bytes", "256M", "--vm-hang", "0"]
+          command:
+            ["stress-ng", "--vm", "1", "--vm-bytes", "256M", "--vm-hang", "0"]
+          securityContext:
+            allowPrivilegeEscalation: false
+            runAsNonRoot: true
+            capabilities:
+              drop: ["ALL"]
           resources:
             requests:
               memory: "64Mi"
diff --git a/tests/integration/agentic/scripts/cleanup_subprocess_fixtures-claude-vertex.sh b/tests/integration/agentic/scripts/cleanup_proposal_fixtures-claude-vertex.sh
similarity index 100%
rename from tests/integration/agentic/scripts/cleanup_subprocess_fixtures-claude-vertex.sh
rename to tests/integration/agentic/scripts/cleanup_proposal_fixtures-claude-vertex.sh
diff --git a/tests/integration/agentic/scripts/cleanup_subprocess_fixtures-openai.sh b/tests/integration/agentic/scripts/cleanup_proposal_fixtures-openai.sh
similarity index 63%
rename from tests/integration/agentic/scripts/cleanup_subprocess_fixtures-openai.sh
rename to tests/integration/agentic/scripts/cleanup_proposal_fixtures-openai.sh
index 2cb308b9..550b95d5 100755
--- a/tests/integration/agentic/scripts/cleanup_subprocess_fixtures-openai.sh
+++ b/tests/integration/agentic/scripts/cleanup_proposal_fixtures-openai.sh
@@ -1,7 +1,7 @@
 #!/usr/bin/env bash
 set -euo pipefail
 
-# Tear down integration test resources deployed by setup_subprocess_fixtures.sh
+# Tear down integration test resources deployed by setup_proposal_fixtures.sh
 # (OpenAI provider). Only deletes "eval-" prefixed operator resources to avoid
 # touching anything that isn't ours.
 
@@ -10,10 +10,8 @@ TEST_NS="lightspeed-evaluation-test"
 
 echo "Cleaning up integration test resources..."
 
-# Delete test workload + any leftover Proposals in test namespace
+# Delete test workload (Proposal cleanup is handled by the driver via cleanup_proposals)
 oc delete deployment oomkill-demo -n "$TEST_NS" --ignore-not-found
-oc delete proposals --all -n "$TEST_NS" --ignore-not-found || true
-oc delete proposalapprovals --all -n "$TEST_NS" --ignore-not-found || true
 
 # Delete prefixed operator resources (reverse order of creation)
 oc delete sandboxtemplate eval-lightspeed-agent -n "$OPERATOR_NS" --ignore-not-found
@@ -21,7 +19,4 @@ oc delete agent eval-default --ignore-not-found
 oc delete llmprovider eval-openai --ignore-not-found
 oc delete secret eval-llm-credentials -n "$OPERATOR_NS" --ignore-not-found
 
-# Delete test namespace (cascades remaining namespaced resources)
-oc delete namespace "$TEST_NS" --ignore-not-found
-
 echo "Cleanup complete."
diff --git a/tests/integration/agentic/scripts/setup_subprocess_fixtures-claude-vertex.sh b/tests/integration/agentic/scripts/setup_proposal_fixtures-claude-vertex.sh
similarity index 95%
rename from tests/integration/agentic/scripts/setup_subprocess_fixtures-claude-vertex.sh
rename to tests/integration/agentic/scripts/setup_proposal_fixtures-claude-vertex.sh
index f7e46a1e..70689a06 100755
--- a/tests/integration/agentic/scripts/setup_subprocess_fixtures-claude-vertex.sh
+++ b/tests/integration/agentic/scripts/setup_proposal_fixtures-claude-vertex.sh
@@ -17,6 +17,7 @@ set -euo pipefail
 
 OPERATOR_NS="openshift-lightspeed"
 TEST_NS="lightspeed-evaluation-test"
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
 GCP_CREDENTIALS_FILE="${GCP_CREDENTIALS_FILE:-$HOME/.config/gcloud/application_default_credentials.json}"
 CLOUD_ML_REGION="${CLOUD_ML_REGION:-global}"
 AGENT_MODEL="${AGENT_MODEL:-claude-opus-4-6}"
@@ -34,8 +35,9 @@ if [ ! -f "$GCP_CREDENTIALS_FILE" ]; then
 fi
 
 # 1. Test namespace + OOMKill workload (static fixtures, already test-scoped)
-oc apply -f ../fixtures/namespace.yaml
-oc apply -f ../fixtures/oomkill-demo.yaml
+oc apply -f "$SCRIPT_DIR/../fixtures/namespace.yaml"
+oc apply -f "$SCRIPT_DIR/../fixtures/oomkill-demo.yaml"
+
 
 # 2. Secret (GCP credentials) — prefixed name in operator namespace
 oc create secret generic eval-llm-credentials \
diff --git a/tests/integration/agentic/scripts/setup_subprocess_fixtures-openai.sh b/tests/integration/agentic/scripts/setup_proposal_fixtures-openai.sh
similarity index 97%
rename from tests/integration/agentic/scripts/setup_subprocess_fixtures-openai.sh
rename to tests/integration/agentic/scripts/setup_proposal_fixtures-openai.sh
index dea157bd..34110ca1 100755
--- a/tests/integration/agentic/scripts/setup_subprocess_fixtures-openai.sh
+++ b/tests/integration/agentic/scripts/setup_proposal_fixtures-openai.sh
@@ -14,7 +14,7 @@ set -euo pipefail
 #   AGENT_MODEL      — default: gpt-5.2
 #
 # To test with Claude via Vertex AI instead, use
-# setup_subprocess_fixtures-claude-vertex.sh and set the corresponding env vars
+# setup_proposal_fixtures-claude-vertex.sh and set the corresponding env vars
 # (GCP_CREDENTIALS_FILE, ANTHROPIC_VERTEX_PROJECT_ID, SANDBOX_IMAGE).
 
 OPERATOR_NS="openshift-lightspeed"
diff --git a/tests/integration/system-config-agents-subprocess.yaml b/tests/integration/system-config-agents-proposal.yaml
similarity index 100%
rename from tests/integration/system-config-agents-subprocess.yaml
rename to tests/integration/system-config-agents-proposal.yaml
diff --git a/tests/integration/test_evaluation_data_subprocess.yaml b/tests/integration/test_evaluation_data_proposal.yaml
similarity index 81%
rename from tests/integration/test_evaluation_data_subprocess.yaml
rename to tests/integration/test_evaluation_data_proposal.yaml
index 845a677d..27456b68 100644
--- a/tests/integration/test_evaluation_data_subprocess.yaml
+++ b/tests/integration/test_evaluation_data_proposal.yaml
@@ -1,8 +1,8 @@
-- conversation_group_id: subprocess_full_lifecycle
+- conversation_group_id: proposal_full_lifecycle
   description: Full lifecycle — analysis, execution, verification
-  tag: subprocess
-  setup_script: agentic/scripts/setup_subprocess_fixtures-openai.sh
-  cleanup_script: agentic/scripts/cleanup_subprocess_fixtures-openai.sh
+  tag: proposal
+  setup_script: agentic/scripts/setup_proposal_fixtures-openai.sh
+  cleanup_script: agentic/scripts/cleanup_proposal_fixtures-openai.sh
   conversation_metrics: []
   conversation_metrics_metadata: {}
   turns:
@@ -33,11 +33,11 @@
         - custom:proposal_status
       turn_metrics_metadata: {}
 
-- conversation_group_id: subprocess_analysis_only
+- conversation_group_id: proposal_analysis_only
   description: Analysis-only — no execution or verification (query from proposal_spec.request)
-  tag: subprocess
-  setup_script: agentic/scripts/setup_subprocess_fixtures-openai.sh
-  cleanup_script: agentic/scripts/cleanup_subprocess_fixtures-openai.sh
+  tag: proposal
+  setup_script: agentic/scripts/setup_proposal_fixtures-openai.sh
+  cleanup_script: agentic/scripts/cleanup_proposal_fixtures-openai.sh
   conversation_metrics: []
   conversation_metrics_metadata: {}
   turns:
diff --git a/tests/integration/test_subprocess_evaluation.py b/tests/integration/test_proposal_evaluation.py
similarity index 85%
rename from tests/integration/test_subprocess_evaluation.py
rename to tests/integration/test_proposal_evaluation.py
index c3415338..06238126 100644
--- a/tests/integration/test_subprocess_evaluation.py
+++ b/tests/integration/test_proposal_evaluation.py
@@ -11,9 +11,9 @@
 The default setup/cleanup scripts use OpenAI as the LLM provider.
 To test with a different provider (e.g. Claude via Vertex AI), set the
 corresponding env vars and point the eval data at the provider-specific
-scripts (e.g. setup_subprocess_fixtures-claude-vertex.sh).
+scripts (e.g. setup_proposal_fixtures-claude-vertex.sh).
 
-Run with: pytest tests/integration/test_subprocess_evaluation.py -v -m agentic
+Run with: pytest tests/integration/test_proposal_evaluation.py -v -m agentic
 """
 
 import os
@@ -71,14 +71,12 @@ def check_env_vars_set() -> bool:
 pytestmark = pytest.mark.agentic
 
 INTEGRATION_TEST_DIR = Path(__file__).parent
-SUBPROCESS_CONFIG_PATH = INTEGRATION_TEST_DIR / "system-config-agents-subprocess.yaml"
-SUBPROCESS_EVAL_DATA_PATH = (
-    INTEGRATION_TEST_DIR / "test_evaluation_data_subprocess.yaml"
-)
+PROPOSAL_CONFIG_PATH = INTEGRATION_TEST_DIR / "system-config-agents-proposal.yaml"
+PROPOSAL_EVAL_DATA_PATH = INTEGRATION_TEST_DIR / "test_evaluation_data_proposal.yaml"
 
 
 class TestProposalPrerequisites:
-    """Verify prerequisites for subprocess integration tests."""
+    """Verify prerequisites for proposal integration tests."""
 
     def test_cli_available(self) -> None:
         """Verify that oc CLI is available."""
@@ -117,7 +115,7 @@ def test_full_lifecycle(self, tmp_path: Path) -> None:
         - Cleanup script removes test resources
         """
         loader = ConfigLoader()
-        system_config = loader.load_system_config(str(SUBPROCESS_CONFIG_PATH))
+        system_config = loader.load_system_config(str(PROPOSAL_CONFIG_PATH))
         system_config.storage = [
             FileBackendConfig(output_dir=str(tmp_path / "eval_output"))
         ]
@@ -126,13 +124,11 @@ def test_full_lifecycle(self, tmp_path: Path) -> None:
             api_enabled=True,
             fail_on_invalid_data=system_config.core.fail_on_invalid_data,
         )
-        all_data = validator.load_evaluation_data(str(SUBPROCESS_EVAL_DATA_PATH))
+        all_data = validator.load_evaluation_data(str(PROPOSAL_EVAL_DATA_PATH))
         eval_data = [
-            d
-            for d in all_data
-            if d.conversation_group_id == "subprocess_full_lifecycle"
+            d for d in all_data if d.conversation_group_id == "proposal_full_lifecycle"
         ]
-        assert len(eval_data) == 1, "Should find subprocess_full_lifecycle data"
+        assert len(eval_data) == 1, "Should find proposal_full_lifecycle data"
 
         evaluate(system_config, eval_data)
 
@@ -163,7 +159,7 @@ def test_analysis_only(self, tmp_path: Path) -> None:
         - No Executed or Verified conditions present
         """
         loader = ConfigLoader()
-        system_config = loader.load_system_config(str(SUBPROCESS_CONFIG_PATH))
+        system_config = loader.load_system_config(str(PROPOSAL_CONFIG_PATH))
         system_config.storage = [
             FileBackendConfig(output_dir=str(tmp_path / "eval_output"))
         ]
@@ -172,11 +168,11 @@ def test_analysis_only(self, tmp_path: Path) -> None:
             api_enabled=True,
             fail_on_invalid_data=system_config.core.fail_on_invalid_data,
         )
-        all_data = validator.load_evaluation_data(str(SUBPROCESS_EVAL_DATA_PATH))
+        all_data = validator.load_evaluation_data(str(PROPOSAL_EVAL_DATA_PATH))
         eval_data = [
-            d for d in all_data if d.conversation_group_id == "subprocess_analysis_only"
+            d for d in all_data if d.conversation_group_id == "proposal_analysis_only"
         ]
-        assert len(eval_data) == 1, "Should find subprocess_analysis_only data"
+        assert len(eval_data) == 1, "Should find proposal_analysis_only data"
 
         evaluate(system_config, eval_data)
 
@@ -214,7 +210,7 @@ def test_timeout_handling(self, tmp_path: Path) -> None:
         - Proposal CRs are cleaned up after timeout
         """
         loader = ConfigLoader()
-        system_config = loader.load_system_config(str(SUBPROCESS_CONFIG_PATH))
+        system_config = loader.load_system_config(str(PROPOSAL_CONFIG_PATH))
         system_config.storage = [
             FileBackendConfig(output_dir=str(tmp_path / "eval_output"))
         ]
@@ -228,9 +224,9 @@ def test_timeout_handling(self, tmp_path: Path) -> None:
             api_enabled=True,
             fail_on_invalid_data=system_config.core.fail_on_invalid_data,
         )
-        all_data = validator.load_evaluation_data(str(SUBPROCESS_EVAL_DATA_PATH))
+        all_data = validator.load_evaluation_data(str(PROPOSAL_EVAL_DATA_PATH))
         eval_data = [
-            d for d in all_data if d.conversation_group_id == "subprocess_analysis_only"
+            d for d in all_data if d.conversation_group_id == "proposal_analysis_only"
         ]
 
         evaluate(system_config, eval_data)