Skip to content

Commit 0fbb33b

Browse files
authored
Merge pull request #225 from GitHubSecurityLab/anticomputer/copilot-sdk-support
Cross-SDK backend abstraction with copilot_sdk adapter
2 parents 4894ba0 + 3f8a732 commit 0fbb33b

29 files changed

Lines changed: 2450 additions & 106 deletions

README.md

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,41 @@ Per-model `model_settings` can include:
8181
- **`endpoint`** — API base URL override for this model
8282
- **`token`** — name of an environment variable containing the API key
8383

84+
### Backends
85+
86+
The runner can drive two SDKs behind a common interface:
87+
88+
- **`openai_agents`** (default) — the OpenAI Agents Python SDK. Supports
89+
multi-personality handoffs, both `chat_completions` and `responses`
90+
`api_type`, `temperature`, `parallel_tool_calls`,
91+
`exclude_from_context`, and MCP over stdio, SSE, and streamable HTTP.
92+
- **`copilot_sdk`** (optional, `pip install seclab-taskflow-agent[copilot]`)
93+
— the GitHub Copilot Python SDK. Supports streaming, `reasoning_effort`,
94+
MCP over stdio/SSE/HTTP, and per-tool permission gating. The SDK
95+
selects its own wire protocol per model, so the YAML `api_type` field
96+
is not honoured; multi-personality handoffs, `temperature`, and
97+
`parallel_tool_calls` are likewise not available. Taskflows that use
98+
unsupported fields fail at load time with a `BackendCapabilityError`
99+
naming the offending field.
100+
101+
Selection precedence:
102+
103+
1. `backend:` field in the model config document.
104+
2. `SECLAB_TASKFLOW_BACKEND` environment variable.
105+
3. Endpoint auto-default (`api.githubcopilot.com` prefers `copilot_sdk`
106+
when the optional dependency is installed).
107+
4. `openai_agents`.
108+
109+
```yaml
110+
seclab-taskflow-agent:
111+
version: "1.0"
112+
filetype: model_config
113+
backend: copilot_sdk
114+
models:
115+
fast: gpt-5-mini
116+
slow: claude-opus-4.6
117+
```
118+
84119
### Session Recovery
85120

86121
Taskflow runs are automatically checkpointed at the task level. If a task

examples/model_configs/responses_api.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ seclab-taskflow-agent:
99
version: "1.0"
1010
filetype: model_config
1111
models:
12-
gpt_responses: gpt-5.1
12+
gpt_responses: gpt-5-mini
1313
model_settings:
1414
gpt_responses:
1515
api_type: responses

pyproject.toml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -123,6 +123,15 @@ dependencies = [
123123
[project.scripts]
124124
seclab-taskflow-agent = "seclab_taskflow_agent.cli:app"
125125

126+
[project.optional-dependencies]
127+
# Pulls in the GitHub Copilot SDK (public preview) so the copilot_sdk
128+
# backend can be selected. Requires Python >= 3.11. Pinned to the
129+
# 0.2.x line because the SDK may ship breaking changes between minor
130+
# versions while still in preview.
131+
copilot = [
132+
"github-copilot-sdk>=0.2.2,<0.3",
133+
]
134+
126135
[project.urls]
127136
Source = "https://github.com/GitHubSecurityLab/seclab-taskflow-agent"
128137
Issues = "https://github.com/GitHubSecurityLab/seclab-taskflow-agent/issues"

src/seclab_taskflow_agent/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
__all__ = [
2727
"ApiType",
2828
"AvailableTools",
29+
"BackendSdk",
2930
"TaskAgent",
3031
"TaskRunHooks",
3132
"TaskAgentHooks",
@@ -41,6 +42,7 @@
4142
from .available_tools import AvailableTools
4243
from .models import (
4344
ApiType,
45+
BackendSdk,
4446
ModelConfigDocument,
4547
PersonalityDocument,
4648
PromptDocument,
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
# SPDX-FileCopyrightText: GitHub, Inc.
2+
# SPDX-License-Identifier: MIT
3+
4+
"""Stream-driving helpers for the runner.
5+
6+
This module owns the inner loop that consumes events from a backend
7+
adapter (`TextDelta` / `ToolEnd`), renders text deltas to the user, and
8+
bridges Copilot-side tool events into the run-hook callbacks that the
9+
runner uses to capture MCP results for ``repeat_prompt`` and session
10+
checkpointing.
11+
12+
Extracted from ``runner.py`` so the rate-limit/retry loop and the
13+
backend-event translation are independently readable and testable.
14+
"""
15+
16+
from __future__ import annotations
17+
18+
__all__ = ["STREAM_IDLE_TIMEOUT", "bridge_copilot_tool_event", "drive_backend_stream"]
19+
20+
import asyncio
21+
import json
22+
import logging
23+
from types import SimpleNamespace
24+
from typing import Any
25+
26+
from ._watchdog import watchdog_ping
27+
from .render_utils import render_model_output
28+
from .sdk import TextDelta, ToolEnd
29+
from .sdk.errors import BackendRateLimitError, BackendTimeoutError
30+
31+
# Application-level backstop: if the backend's event stream goes silent
32+
# for this long, surface a BackendTimeoutError so the retry loop can
33+
# recover. This complements the TCP-level httpx timeouts in the
34+
# openai-agents adapter — those catch dead sockets, this catches the
35+
# subtler case where the connection stays open but nothing is flowing.
36+
STREAM_IDLE_TIMEOUT = 1800
37+
38+
39+
async def bridge_copilot_tool_event(event: ToolEnd, run_hooks: Any) -> None:
40+
"""Forward a Copilot ``ToolEnd`` into the openai-agents-style hooks.
41+
42+
The runner captures MCP tool output via ``run_hooks.on_tool_end``,
43+
which the openai-agents path drives natively. The Copilot adapter
44+
surfaces tool completions as ``ToolEnd`` events instead, so we
45+
invoke the same hooks here with:
46+
47+
* a ``SimpleNamespace(name=...)`` placeholder in lieu of the
48+
openai-agents ``Tool`` object — the hooks only read ``.name``.
49+
* a ``json.dumps({"text": ...})`` envelope around the result text,
50+
matching the wire format openai-agents uses when serialising MCP
51+
``TextContent`` lists. ``_build_prompts_to_run`` in the runner
52+
depends on that exact envelope shape, so both backends produce
53+
identical entries in ``last_mcp_tool_results``.
54+
"""
55+
if run_hooks is None:
56+
return
57+
fake_tool = SimpleNamespace(name=event.tool_name)
58+
payload = json.dumps({"text": event.text})
59+
await run_hooks.on_tool_start(None, None, fake_tool)
60+
await run_hooks.on_tool_end(None, None, fake_tool, payload)
61+
62+
63+
async def drive_backend_stream(
64+
*,
65+
backend_impl: Any,
66+
agent_handle: Any,
67+
prompt: str,
68+
max_turns: int,
69+
run_hooks: Any,
70+
async_task: bool,
71+
task_id: str,
72+
max_api_retry: int,
73+
initial_rate_limit_backoff: int,
74+
max_rate_limit_backoff: int,
75+
) -> None:
76+
"""Run the backend's event stream to completion with retry/backoff.
77+
78+
Renders ``TextDelta`` events to stdout, forwards ``ToolEnd`` events
79+
to the run-hook bridge, retries up to *max_api_retry* times on
80+
:class:`BackendTimeoutError`, and applies exponential backoff up to
81+
*max_rate_limit_backoff* seconds on :class:`BackendRateLimitError`
82+
before giving up with a :class:`BackendTimeoutError`.
83+
"""
84+
max_retry = max_api_retry
85+
rate_limit_backoff = initial_rate_limit_backoff
86+
last_rate_limit_exc: BackendRateLimitError | None = None
87+
88+
while rate_limit_backoff:
89+
try:
90+
stream = backend_impl.run_streamed(
91+
agent_handle, prompt, max_turns=max_turns
92+
)
93+
stream_iter = stream.__aiter__()
94+
try:
95+
while True:
96+
try:
97+
event = await asyncio.wait_for(
98+
stream_iter.__anext__(), timeout=STREAM_IDLE_TIMEOUT
99+
)
100+
except StopAsyncIteration:
101+
break
102+
except asyncio.TimeoutError as exc:
103+
raise BackendTimeoutError(
104+
f"Backend stream idle for {STREAM_IDLE_TIMEOUT}s"
105+
) from exc
106+
watchdog_ping()
107+
if isinstance(event, TextDelta):
108+
await render_model_output(
109+
event.text, async_task=async_task, task_id=task_id
110+
)
111+
elif isinstance(event, ToolEnd):
112+
await bridge_copilot_tool_event(event, run_hooks)
113+
finally:
114+
# Close the async generator so its finally block runs even
115+
# if we abort early (timeout / consumer break) — the
116+
# adapters use that to release backend-native resources.
117+
aclose = getattr(stream_iter, "aclose", None)
118+
if aclose is not None:
119+
try:
120+
await aclose()
121+
except Exception: # noqa: BLE001 - best-effort cleanup
122+
logging.exception("Failed to aclose backend stream iterator")
123+
await render_model_output("\n\n", async_task=async_task, task_id=task_id)
124+
return
125+
except BackendTimeoutError:
126+
if not max_retry:
127+
logging.exception("Max retries for BackendTimeoutError reached")
128+
raise
129+
max_retry -= 1
130+
except BackendRateLimitError as exc:
131+
last_rate_limit_exc = exc
132+
if rate_limit_backoff == max_rate_limit_backoff:
133+
raise BackendTimeoutError("Max rate limit backoff reached") from exc
134+
if rate_limit_backoff > max_rate_limit_backoff:
135+
rate_limit_backoff = max_rate_limit_backoff
136+
else:
137+
rate_limit_backoff += rate_limit_backoff
138+
logging.exception(f"Hit rate limit ... holding for {rate_limit_backoff}")
139+
await asyncio.sleep(rate_limit_backoff)
140+
141+
if last_rate_limit_exc is not None: # pragma: no cover - loop always returns/raises above
142+
raise BackendTimeoutError("Rate limit backoff exhausted") from last_rate_limit_exc
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# SPDX-FileCopyrightText: GitHub, Inc.
2+
# SPDX-License-Identifier: MIT
3+
4+
"""Process-level watchdog that force-exits if the event loop stops progressing.
5+
6+
The asyncio retry loop, the httpx client timeouts, and the per-stream
7+
idle timeout already cover the cases we know how to recover from. This
8+
module is the last-resort backstop for everything else (a stuck MCP
9+
cleanup, an asyncio loop spinning on a leaked task, a kernel-level
10+
socket pathology) — a daemon thread polls a monotonic timestamp that
11+
the runtime updates from every interesting event and force-exits the
12+
process if the timestamp ever goes stale for too long.
13+
14+
Sources of pings:
15+
16+
* :func:`drive_backend_stream` — every backend event.
17+
* The runner's ``on_tool_start`` / ``on_tool_end`` hooks.
18+
* The runner's MCP cleanup / backend ``aclose`` paths.
19+
20+
The default timeout is intentionally larger than every recoverable
21+
timeout below it so the watchdog never fires before the asyncio layer
22+
has had a chance to recover.
23+
"""
24+
25+
from __future__ import annotations
26+
27+
__all__ = ["WATCHDOG_IDLE_TIMEOUT", "start_watchdog", "watchdog_ping"]
28+
29+
import logging
30+
import os
31+
import sys
32+
import threading
33+
import time
34+
35+
# 35 minutes by default — comfortably above the per-stream idle timeout
36+
# (30 min) and the rate-limit backoff cap (2 min) so the watchdog only
37+
# trips on hangs the asyncio path could not recover from.
38+
WATCHDOG_IDLE_TIMEOUT = int(os.environ.get("WATCHDOG_IDLE_TIMEOUT", "2100"))
39+
40+
_last_activity = time.monotonic()
41+
_lock = threading.Lock()
42+
_started = False
43+
44+
45+
def watchdog_ping() -> None:
46+
"""Record activity. Safe to call from any coroutine or callback."""
47+
global _last_activity
48+
with _lock:
49+
_last_activity = time.monotonic()
50+
51+
52+
def _watchdog_loop(timeout: int) -> None:
53+
check_interval = min(60, max(1, timeout // 5))
54+
while True:
55+
time.sleep(check_interval)
56+
with _lock:
57+
idle = time.monotonic() - _last_activity
58+
if idle > timeout:
59+
logging.error(
60+
"Watchdog: no activity for %.0fs (limit %ds) — force-exiting to prevent hang",
61+
idle,
62+
timeout,
63+
)
64+
sys.stderr.flush()
65+
sys.stdout.flush()
66+
os._exit(2)
67+
68+
69+
def start_watchdog(timeout: int = WATCHDOG_IDLE_TIMEOUT) -> None:
70+
"""Start the watchdog thread once per process (idempotent)."""
71+
global _started
72+
if _started:
73+
return
74+
_started = True
75+
watchdog_ping() # reset timestamp so a late call doesn't trip immediately
76+
threading.Thread(
77+
target=_watchdog_loop, args=(timeout,), daemon=True, name="seclab-watchdog"
78+
).start()

src/seclab_taskflow_agent/agent.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
from agents.run import DEFAULT_MAX_TURNS
2525
from dotenv import find_dotenv, load_dotenv
2626
from openai import AsyncOpenAI
27+
import httpx
2728

2829
from .capi import get_AI_endpoint, get_AI_token, get_provider
2930

@@ -178,11 +179,17 @@ def __init__(
178179

179180
# Only send provider-specific headers to matching endpoints
180181
provider = get_provider(resolved_endpoint)
182+
# httpx defaults to no read timeout, which lets a streaming run
183+
# block forever on a half-open TCP connection (CLOSE_WAIT). Pin
184+
# explicit per-phase timeouts so dead sockets surface as
185+
# APITimeoutError and our retry loop can recover.
181186
client = AsyncOpenAI(
182187
base_url=resolved_endpoint,
183188
api_key=resolved_token,
184189
default_headers=provider.extra_headers or None,
190+
timeout=httpx.Timeout(connect=10.0, read=300.0, write=300.0, pool=60.0),
185191
)
192+
self._openai_client = client
186193
set_tracing_disabled(True)
187194
self.run_hooks = run_hooks or TaskRunHooks()
188195

@@ -209,6 +216,16 @@ def _ToolsToFinalOutputFunction(
209216
hooks=agent_hooks or TaskAgentHooks(),
210217
)
211218

219+
async def close(self) -> None:
220+
"""Release the underlying httpx connection pool.
221+
222+
Dead CLOSE_WAIT sockets left in the pool can keep kqueue/epoll
223+
spinning on the event loop after the agent is otherwise done,
224+
so the runner calls this in its ``finally`` to free them.
225+
"""
226+
if self._openai_client is not None:
227+
await self._openai_client.close()
228+
212229
async def run(self, prompt: str, max_turns: int = DEFAULT_MAX_TURNS) -> result.RunResult:
213230
"""Run the agent to completion and return the result."""
214231
return await Runner.run(starting_agent=self.agent, input=prompt, max_turns=max_turns, hooks=self.run_hooks)

src/seclab_taskflow_agent/cli.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import asyncio
1616
import logging
1717
import os
18+
import sys
1819
import traceback
1920
from typing import Annotated
2021

@@ -152,6 +153,7 @@ def main(
152153
# When resuming, the session carries taskflow_path/globals/prompt
153154
effective_taskflow = taskflow if not resume else None
154155

156+
exit_code = 0
155157
try:
156158
asyncio.run(
157159
run_main(
@@ -162,13 +164,23 @@ def main(
162164
)
163165
except KeyboardInterrupt:
164166
typer.echo("\nInterrupted.", err=True)
165-
raise typer.Exit(code=130)
167+
exit_code = 130
166168
except Exception as exc:
167169
if debug:
168170
traceback.print_exc()
169171
else:
170172
_print_concise_error(exc)
171-
raise typer.Exit(code=1)
173+
exit_code = 1
174+
175+
# Force-exit at the CLI boundary. Python's shutdown path can spin on
176+
# dangling asyncio tasks or half-open sockets (notably through the
177+
# Responses API + MCP combination), which blocks the interpreter
178+
# from returning even after asyncio.run() completes. Tests that
179+
# invoke run_main() directly never hit this path.
180+
logging.shutdown()
181+
sys.stdout.flush()
182+
sys.stderr.flush()
183+
os._exit(exit_code)
172184

173185

174186
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)