GitHubSecurityLab
diff --git a/‎README.md‎
Lines changed: 35 additions & 0 deletions b/‎README.md‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎examples/model_configs/responses_api.yaml‎
Lines changed: 1 addition & 1 deletion b/‎examples/model_configs/responses_api.yaml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 9 additions & 0 deletions b/‎pyproject.toml‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎src/seclab_taskflow_agent/__init__.py‎
Lines changed: 2 additions & 0 deletions b/‎src/seclab_taskflow_agent/__init__.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/seclab_taskflow_agent/_stream.py‎
Lines changed: 142 additions & 0 deletions b/‎src/seclab_taskflow_agent/_stream.py‎
Lines changed: 142 additions & 0 deletions
diff --git a/‎src/seclab_taskflow_agent/_watchdog.py‎
Lines changed: 78 additions & 0 deletions b/‎src/seclab_taskflow_agent/_watchdog.py‎
Lines changed: 78 additions & 0 deletions
diff --git a/‎src/seclab_taskflow_agent/agent.py‎
Lines changed: 17 additions & 0 deletions b/‎src/seclab_taskflow_agent/agent.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎src/seclab_taskflow_agent/cli.py‎
Lines changed: 14 additions & 2 deletions b/‎src/seclab_taskflow_agent/cli.py‎
Lines changed: 14 additions & 2 deletions
@@ -81,6 +81,41 @@ Per-model `model_settings` can include:
 - **`endpoint`** — API base URL override for this model
 - **`token`** — name of an environment variable containing the API key
 
+### Backends
+
+The runner can drive two SDKs behind a common interface:
+
+- **`openai_agents`** (default) — the OpenAI Agents Python SDK. Supports
+  multi-personality handoffs, both `chat_completions` and `responses`
+  `api_type`, `temperature`, `parallel_tool_calls`,
+  `exclude_from_context`, and MCP over stdio, SSE, and streamable HTTP.
+- **`copilot_sdk`** (optional, `pip install seclab-taskflow-agent[copilot]`)
+  — the GitHub Copilot Python SDK. Supports streaming, `reasoning_effort`,
+  MCP over stdio/SSE/HTTP, and per-tool permission gating. The SDK
+  selects its own wire protocol per model, so the YAML `api_type` field
+  is not honoured; multi-personality handoffs, `temperature`, and
+  `parallel_tool_calls` are likewise not available. Taskflows that use
+  unsupported fields fail at load time with a `BackendCapabilityError`
+  naming the offending field.
+
+Selection precedence:
+
+1. `backend:` field in the model config document.
+2. `SECLAB_TASKFLOW_BACKEND` environment variable.
+3. Endpoint auto-default (`api.githubcopilot.com` prefers `copilot_sdk`
+   when the optional dependency is installed).
+4. `openai_agents`.
+
+```yaml
+seclab-taskflow-agent:
+  version: "1.0"
+  filetype: model_config
+backend: copilot_sdk
+models:
+  fast: gpt-5-mini
+  slow: claude-opus-4.6
+```
+
 ### Session Recovery
 
 Taskflow runs are automatically checkpointed at the task level. If a task
 
@@ -9,7 +9,7 @@ seclab-taskflow-agent:
   version: "1.0"
   filetype: model_config
 models:
-  gpt_responses: gpt-5.1
+  gpt_responses: gpt-5-mini
 model_settings:
   gpt_responses:
     api_type: responses
 
@@ -123,6 +123,15 @@ dependencies = [
 [project.scripts]
 seclab-taskflow-agent = "seclab_taskflow_agent.cli:app"
 
+[project.optional-dependencies]
+# Pulls in the GitHub Copilot SDK (public preview) so the copilot_sdk
+# backend can be selected. Requires Python >= 3.11. Pinned to the
+# 0.2.x line because the SDK may ship breaking changes between minor
+# versions while still in preview.
+copilot = [
+  "github-copilot-sdk>=0.2.2,<0.3",
+]
+
 [project.urls]
 Source = "https://github.com/GitHubSecurityLab/seclab-taskflow-agent"
 Issues = "https://github.com/GitHubSecurityLab/seclab-taskflow-agent/issues"
 
@@ -26,6 +26,7 @@
 __all__ = [
     "ApiType",
     "AvailableTools",
+    "BackendSdk",
     "TaskAgent",
     "TaskRunHooks",
     "TaskAgentHooks",
@@ -41,6 +42,7 @@
 from .available_tools import AvailableTools
 from .models import (
     ApiType,
+    BackendSdk,
     ModelConfigDocument,
     PersonalityDocument,
     PromptDocument,
 
@@ -0,0 +1,142 @@
+# SPDX-FileCopyrightText: GitHub, Inc.
+# SPDX-License-Identifier: MIT
+
+"""Stream-driving helpers for the runner.
+
+This module owns the inner loop that consumes events from a backend
+adapter (`TextDelta` / `ToolEnd`), renders text deltas to the user, and
+bridges Copilot-side tool events into the run-hook callbacks that the
+runner uses to capture MCP results for ``repeat_prompt`` and session
+checkpointing.
+
+Extracted from ``runner.py`` so the rate-limit/retry loop and the
+backend-event translation are independently readable and testable.
+"""
+
+from __future__ import annotations
+
+__all__ = ["STREAM_IDLE_TIMEOUT", "bridge_copilot_tool_event", "drive_backend_stream"]
+
+import asyncio
+import json
+import logging
+from types import SimpleNamespace
+from typing import Any
+
+from ._watchdog import watchdog_ping
+from .render_utils import render_model_output
+from .sdk import TextDelta, ToolEnd
+from .sdk.errors import BackendRateLimitError, BackendTimeoutError
+
+# Application-level backstop: if the backend's event stream goes silent
+# for this long, surface a BackendTimeoutError so the retry loop can
+# recover. This complements the TCP-level httpx timeouts in the
+# openai-agents adapter — those catch dead sockets, this catches the
+# subtler case where the connection stays open but nothing is flowing.
+STREAM_IDLE_TIMEOUT = 1800
+
+
+async def bridge_copilot_tool_event(event: ToolEnd, run_hooks: Any) -> None:
+    """Forward a Copilot ``ToolEnd`` into the openai-agents-style hooks.
+
+    The runner captures MCP tool output via ``run_hooks.on_tool_end``,
+    which the openai-agents path drives natively. The Copilot adapter
+    surfaces tool completions as ``ToolEnd`` events instead, so we
+    invoke the same hooks here with:
+
+    * a ``SimpleNamespace(name=...)`` placeholder in lieu of the
+      openai-agents ``Tool`` object — the hooks only read ``.name``.
+    * a ``json.dumps({"text": ...})`` envelope around the result text,
+      matching the wire format openai-agents uses when serialising MCP
+      ``TextContent`` lists. ``_build_prompts_to_run`` in the runner
+      depends on that exact envelope shape, so both backends produce
+      identical entries in ``last_mcp_tool_results``.
+    """
+    if run_hooks is None:
+        return
+    fake_tool = SimpleNamespace(name=event.tool_name)
+    payload = json.dumps({"text": event.text})
+    await run_hooks.on_tool_start(None, None, fake_tool)
+    await run_hooks.on_tool_end(None, None, fake_tool, payload)
+
+
+async def drive_backend_stream(
+    *,
+    backend_impl: Any,
+    agent_handle: Any,
+    prompt: str,
+    max_turns: int,
+    run_hooks: Any,
+    async_task: bool,
+    task_id: str,
+    max_api_retry: int,
+    initial_rate_limit_backoff: int,
+    max_rate_limit_backoff: int,
+) -> None:
+    """Run the backend's event stream to completion with retry/backoff.
+
+    Renders ``TextDelta`` events to stdout, forwards ``ToolEnd`` events
+    to the run-hook bridge, retries up to *max_api_retry* times on
+    :class:`BackendTimeoutError`, and applies exponential backoff up to
+    *max_rate_limit_backoff* seconds on :class:`BackendRateLimitError`
+    before giving up with a :class:`BackendTimeoutError`.
+    """
+    max_retry = max_api_retry
+    rate_limit_backoff = initial_rate_limit_backoff
+    last_rate_limit_exc: BackendRateLimitError | None = None
+
+    while rate_limit_backoff:
+        try:
+            stream = backend_impl.run_streamed(
+                agent_handle, prompt, max_turns=max_turns
+            )
+            stream_iter = stream.__aiter__()
+            try:
+                while True:
+                    try:
+                        event = await asyncio.wait_for(
+                            stream_iter.__anext__(), timeout=STREAM_IDLE_TIMEOUT
+                        )
+                    except StopAsyncIteration:
+                        break
+                    except asyncio.TimeoutError as exc:
+                        raise BackendTimeoutError(
+                            f"Backend stream idle for {STREAM_IDLE_TIMEOUT}s"
+                        ) from exc
+                    watchdog_ping()
+                    if isinstance(event, TextDelta):
+                        await render_model_output(
+                            event.text, async_task=async_task, task_id=task_id
+                        )
+                    elif isinstance(event, ToolEnd):
+                        await bridge_copilot_tool_event(event, run_hooks)
+            finally:
+                # Close the async generator so its finally block runs even
+                # if we abort early (timeout / consumer break) — the
+                # adapters use that to release backend-native resources.
+                aclose = getattr(stream_iter, "aclose", None)
+                if aclose is not None:
+                    try:
+                        await aclose()
+                    except Exception:  # noqa: BLE001 - best-effort cleanup
+                        logging.exception("Failed to aclose backend stream iterator")
+            await render_model_output("\n\n", async_task=async_task, task_id=task_id)
+            return
+        except BackendTimeoutError:
+            if not max_retry:
+                logging.exception("Max retries for BackendTimeoutError reached")
+                raise
+            max_retry -= 1
+        except BackendRateLimitError as exc:
+            last_rate_limit_exc = exc
+            if rate_limit_backoff == max_rate_limit_backoff:
+                raise BackendTimeoutError("Max rate limit backoff reached") from exc
+            if rate_limit_backoff > max_rate_limit_backoff:
+                rate_limit_backoff = max_rate_limit_backoff
+            else:
+                rate_limit_backoff += rate_limit_backoff
+            logging.exception(f"Hit rate limit ... holding for {rate_limit_backoff}")
+            await asyncio.sleep(rate_limit_backoff)
+
+    if last_rate_limit_exc is not None:  # pragma: no cover - loop always returns/raises above
+        raise BackendTimeoutError("Rate limit backoff exhausted") from last_rate_limit_exc
@@ -0,0 +1,78 @@
+# SPDX-FileCopyrightText: GitHub, Inc.
+# SPDX-License-Identifier: MIT
+
+"""Process-level watchdog that force-exits if the event loop stops progressing.
+
+The asyncio retry loop, the httpx client timeouts, and the per-stream
+idle timeout already cover the cases we know how to recover from. This
+module is the last-resort backstop for everything else (a stuck MCP
+cleanup, an asyncio loop spinning on a leaked task, a kernel-level
+socket pathology) — a daemon thread polls a monotonic timestamp that
+the runtime updates from every interesting event and force-exits the
+process if the timestamp ever goes stale for too long.
+
+Sources of pings:
+
+* :func:`drive_backend_stream` — every backend event.
+* The runner's ``on_tool_start`` / ``on_tool_end`` hooks.
+* The runner's MCP cleanup / backend ``aclose`` paths.
+
+The default timeout is intentionally larger than every recoverable
+timeout below it so the watchdog never fires before the asyncio layer
+has had a chance to recover.
+"""
+
+from __future__ import annotations
+
+__all__ = ["WATCHDOG_IDLE_TIMEOUT", "start_watchdog", "watchdog_ping"]
+
+import logging
+import os
+import sys
+import threading
+import time
+
+# 35 minutes by default — comfortably above the per-stream idle timeout
+# (30 min) and the rate-limit backoff cap (2 min) so the watchdog only
+# trips on hangs the asyncio path could not recover from.
+WATCHDOG_IDLE_TIMEOUT = int(os.environ.get("WATCHDOG_IDLE_TIMEOUT", "2100"))
+
+_last_activity = time.monotonic()
+_lock = threading.Lock()
+_started = False
+
+
+def watchdog_ping() -> None:
+    """Record activity. Safe to call from any coroutine or callback."""
+    global _last_activity
+    with _lock:
+        _last_activity = time.monotonic()
+
+
+def _watchdog_loop(timeout: int) -> None:
+    check_interval = min(60, max(1, timeout // 5))
+    while True:
+        time.sleep(check_interval)
+        with _lock:
+            idle = time.monotonic() - _last_activity
+        if idle > timeout:
+            logging.error(
+                "Watchdog: no activity for %.0fs (limit %ds) — force-exiting to prevent hang",
+                idle,
+                timeout,
+            )
+            sys.stderr.flush()
+            sys.stdout.flush()
+            os._exit(2)
+
+
+def start_watchdog(timeout: int = WATCHDOG_IDLE_TIMEOUT) -> None:
+    """Start the watchdog thread once per process (idempotent)."""
+    global _started
+    if _started:
+        return
+    _started = True
+    watchdog_ping()  # reset timestamp so a late call doesn't trip immediately
+    threading.Thread(
+        target=_watchdog_loop, args=(timeout,), daemon=True, name="seclab-watchdog"
+    ).start()
@@ -24,6 +24,7 @@
 from agents.run import DEFAULT_MAX_TURNS
 from dotenv import find_dotenv, load_dotenv
 from openai import AsyncOpenAI
+import httpx
 
 from .capi import get_AI_endpoint, get_AI_token, get_provider
 
@@ -178,11 +179,17 @@ def __init__(
 
         # Only send provider-specific headers to matching endpoints
         provider = get_provider(resolved_endpoint)
+        # httpx defaults to no read timeout, which lets a streaming run
+        # block forever on a half-open TCP connection (CLOSE_WAIT). Pin
+        # explicit per-phase timeouts so dead sockets surface as
+        # APITimeoutError and our retry loop can recover.
         client = AsyncOpenAI(
             base_url=resolved_endpoint,
             api_key=resolved_token,
             default_headers=provider.extra_headers or None,
+            timeout=httpx.Timeout(connect=10.0, read=300.0, write=300.0, pool=60.0),
         )
+        self._openai_client = client
         set_tracing_disabled(True)
         self.run_hooks = run_hooks or TaskRunHooks()
 
@@ -209,6 +216,16 @@ def _ToolsToFinalOutputFunction(
             hooks=agent_hooks or TaskAgentHooks(),
         )
 
+    async def close(self) -> None:
+        """Release the underlying httpx connection pool.
+
+        Dead CLOSE_WAIT sockets left in the pool can keep kqueue/epoll
+        spinning on the event loop after the agent is otherwise done,
+        so the runner calls this in its ``finally`` to free them.
+        """
+        if self._openai_client is not None:
+            await self._openai_client.close()
+
     async def run(self, prompt: str, max_turns: int = DEFAULT_MAX_TURNS) -> result.RunResult:
         """Run the agent to completion and return the result."""
         return await Runner.run(starting_agent=self.agent, input=prompt, max_turns=max_turns, hooks=self.run_hooks)
 
@@ -15,6 +15,7 @@
 import asyncio
 import logging
 import os
+import sys
 import traceback
 from typing import Annotated
 
@@ -152,6 +153,7 @@ def main(
     # When resuming, the session carries taskflow_path/globals/prompt
     effective_taskflow = taskflow if not resume else None
 
+    exit_code = 0
     try:
         asyncio.run(
             run_main(
@@ -162,13 +164,23 @@ def main(
         )
     except KeyboardInterrupt:
         typer.echo("\nInterrupted.", err=True)
-        raise typer.Exit(code=130)
+        exit_code = 130
     except Exception as exc:
         if debug:
             traceback.print_exc()
         else:
             _print_concise_error(exc)
-        raise typer.Exit(code=1)
+        exit_code = 1
+
+    # Force-exit at the CLI boundary. Python's shutdown path can spin on
+    # dangling asyncio tasks or half-open sockets (notably through the
+    # Responses API + MCP combination), which blocks the interpreter
+    # from returning even after asyncio.run() completes. Tests that
+    # invoke run_main() directly never hit this path.
+    logging.shutdown()
+    sys.stdout.flush()
+    sys.stderr.flush()
+    os._exit(exit_code)
 
 
 # ---------------------------------------------------------------------------