Skip to content

Commit 9adaeb9

Browse files
committed
ci: add optional E2E agent proof workflow
- Manual-trigger workflow for end-to-end scenario validation - Supports unattended Copilot auth via COPILOT_GITHUB_TOKEN - Also supports OpenAI BYOK mode - Uploads transcript artifacts as proof - scripts/run_agent_scenarios.py: deterministic scenario runner with timeout
1 parent 34c7c82 commit 9adaeb9

2 files changed

Lines changed: 210 additions & 0 deletions

File tree

Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
name: Agent Scenarios (E2E Proof)
2+
3+
on:
4+
workflow_dispatch:
5+
inputs:
6+
provider:
7+
description: "Provider to use for E2E runs"
8+
required: true
9+
default: "copilot"
10+
type: choice
11+
options:
12+
- copilot
13+
- openai
14+
model:
15+
description: "Model ID (must be available for the chosen provider)"
16+
required: true
17+
default: "gpt-5-mini"
18+
type: string
19+
20+
permissions:
21+
contents: read
22+
23+
jobs:
24+
e2e:
25+
name: E2E scenarios (${{ inputs.provider }})
26+
runs-on: ubuntu-latest
27+
timeout-minutes: 15
28+
29+
steps:
30+
- name: Checkout
31+
uses: actions/checkout@v4
32+
33+
- name: Set up Python
34+
uses: actions/setup-python@v5
35+
with:
36+
python-version: "3.13"
37+
cache: pip
38+
39+
- name: Install dependencies
40+
run: |
41+
python -m pip install --upgrade pip
42+
python -m pip install -r requirements.txt
43+
44+
- name: Validate required secrets
45+
env:
46+
PROVIDER: ${{ inputs.provider }}
47+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
48+
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
49+
run: |
50+
if [ "$PROVIDER" = "copilot" ] && [ -z "$COPILOT_GITHUB_TOKEN" ]; then
51+
echo "COPILOT_GITHUB_TOKEN secret is required for provider=copilot (unattended CI)" >&2
52+
exit 1
53+
fi
54+
if [ "$PROVIDER" = "openai" ] && [ -z "$OPENAI_API_KEY" ]; then
55+
echo "OPENAI_API_KEY secret is required for provider=openai" >&2
56+
exit 1
57+
fi
58+
59+
- name: Run E2E agent scenarios
60+
env:
61+
COPILOT_E2E_PROVIDER: ${{ inputs.provider }}
62+
COPILOT_E2E_MODEL: ${{ inputs.model }}
63+
COPILOT_GITHUB_TOKEN: ${{ secrets.COPILOT_GITHUB_TOKEN }}
64+
# For provider=openai, add this secret in repo settings:
65+
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
66+
OPENAI_BASE_URL: ${{ secrets.OPENAI_BASE_URL }}
67+
run: |
68+
mkdir -p artifacts
69+
python scripts/run_agent_scenarios.py | tee artifacts/agent-scenarios.txt
70+
71+
- name: Upload transcript
72+
if: always()
73+
uses: actions/upload-artifact@v4
74+
with:
75+
name: agent-scenarios-transcript
76+
path: |
77+
artifacts/agent-scenarios.txt
78+
if-no-files-found: ignore

scripts/run_agent_scenarios.py

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
#!/usr/bin/env python3
2+
"""Run end-to-end agent scenarios for CI proof.
3+
4+
This script is intended for CI environments where you *want proof* that the
5+
Copilot SDK can run real scenarios (network + auth required).
6+
7+
It runs a small, deterministic set of scenarios and prints a short transcript.
8+
9+
Modes
10+
-----
11+
- Copilot mode (default): uses your Copilot CLI auth.
12+
- OpenAI mode: uses BYOK provider config with OPENAI_API_KEY.
13+
14+
Notes
15+
-----
16+
- These are E2E checks, not unit tests.
17+
- Keep prompts short to reduce cost/latency.
18+
"""
19+
20+
from __future__ import annotations
21+
22+
import argparse
23+
import asyncio
24+
import os
25+
import sys
26+
from dataclasses import dataclass
27+
28+
from copilot import CopilotClient
29+
30+
31+
@dataclass
32+
class ScenarioResult:
33+
name: str
34+
ok: bool
35+
details: str = ""
36+
37+
38+
def _provider_config(provider: str) -> dict | None:
39+
if provider == "copilot":
40+
return None
41+
if provider == "openai":
42+
api_key = os.getenv("OPENAI_API_KEY")
43+
if not api_key:
44+
raise RuntimeError("OPENAI_API_KEY is required for provider=openai")
45+
return {
46+
"type": "openai",
47+
"base_url": os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"),
48+
"api_key": api_key,
49+
}
50+
raise ValueError(f"Unknown provider: {provider}")
51+
52+
53+
async def scenario_ping(client: CopilotClient) -> ScenarioResult:
54+
try:
55+
pong = await client.ping("ci")
56+
return ScenarioResult("ping", True, f"protocol={pong.protocolVersion}")
57+
except Exception as e:
58+
return ScenarioResult("ping", False, str(e))
59+
60+
61+
async def scenario_single_prompt(client: CopilotClient, *, model: str, provider_cfg: dict | None) -> ScenarioResult:
62+
try:
63+
cfg: dict = {"model": model}
64+
if provider_cfg is not None:
65+
cfg["provider"] = provider_cfg
66+
session = await client.create_session(cfg)
67+
try:
68+
timeout_s = float(os.getenv("COPILOT_E2E_TIMEOUT", "60"))
69+
resp = await asyncio.wait_for(
70+
session.send_and_wait({
71+
"prompt": "Reply with exactly: OK",
72+
}),
73+
timeout=timeout_s,
74+
)
75+
text = (resp.data.content or "").strip()
76+
ok = text.startswith("OK")
77+
return ScenarioResult("single_prompt", ok, f"response={text[:80]!r}")
78+
finally:
79+
await session.destroy()
80+
except Exception as e:
81+
return ScenarioResult("single_prompt", False, str(e))
82+
83+
84+
async def run(provider: str, model: str) -> int:
85+
provider_cfg = _provider_config(provider)
86+
87+
client_opts: dict = {}
88+
# For unattended CI runs with the Copilot provider, prefer a token-based auth path.
89+
# The Copilot SDK client supports `github_token` for non-interactive authentication.
90+
github_token = os.getenv("COPILOT_GITHUB_TOKEN")
91+
if provider == "copilot" and github_token:
92+
client_opts["github_token"] = github_token
93+
94+
client = CopilotClient(client_opts or None)
95+
await client.start()
96+
try:
97+
results: list[ScenarioResult] = []
98+
results.append(await scenario_ping(client))
99+
results.append(await scenario_single_prompt(client, model=model, provider_cfg=provider_cfg))
100+
101+
print("Agent scenarios")
102+
print(f" provider: {provider}")
103+
print(f" model : {model}")
104+
print()
105+
106+
failed = 0
107+
for r in results:
108+
status = "PASS" if r.ok else "FAIL"
109+
print(f"- {r.name:14} {status} {r.details}")
110+
if not r.ok:
111+
failed += 1
112+
113+
return 0 if failed == 0 else 1
114+
finally:
115+
await client.stop()
116+
117+
118+
def main() -> int:
119+
parser = argparse.ArgumentParser()
120+
parser.add_argument("--provider", default=os.getenv("COPILOT_E2E_PROVIDER", "copilot"), choices=["copilot", "openai"])
121+
parser.add_argument("--model", default=os.getenv("COPILOT_E2E_MODEL", "gpt-5-mini"))
122+
args = parser.parse_args()
123+
124+
try:
125+
return asyncio.run(run(args.provider, args.model))
126+
except Exception as e:
127+
print(f"E2E runner error: {e}", file=sys.stderr)
128+
return 2
129+
130+
131+
if __name__ == "__main__":
132+
raise SystemExit(main())

0 commit comments

Comments
 (0)