|
| 1 | +#!/usr/bin/env python3 |
| 2 | +"""Run end-to-end agent scenarios for CI proof. |
| 3 | +
|
| 4 | +This script is intended for CI environments where you *want proof* that the |
| 5 | +Copilot SDK can run real scenarios (network + auth required). |
| 6 | +
|
| 7 | +It runs a small, deterministic set of scenarios and prints a short transcript. |
| 8 | +
|
| 9 | +Modes |
| 10 | +----- |
| 11 | +- Copilot mode (default): uses your Copilot CLI auth. |
| 12 | +- OpenAI mode: uses BYOK provider config with OPENAI_API_KEY. |
| 13 | +
|
| 14 | +Notes |
| 15 | +----- |
| 16 | +- These are E2E checks, not unit tests. |
| 17 | +- Keep prompts short to reduce cost/latency. |
| 18 | +""" |
| 19 | + |
| 20 | +from __future__ import annotations |
| 21 | + |
| 22 | +import argparse |
| 23 | +import asyncio |
| 24 | +import os |
| 25 | +import sys |
| 26 | +from dataclasses import dataclass |
| 27 | + |
| 28 | +from copilot import CopilotClient |
| 29 | + |
| 30 | + |
| 31 | +@dataclass |
| 32 | +class ScenarioResult: |
| 33 | + name: str |
| 34 | + ok: bool |
| 35 | + details: str = "" |
| 36 | + |
| 37 | + |
| 38 | +def _provider_config(provider: str) -> dict | None: |
| 39 | + if provider == "copilot": |
| 40 | + return None |
| 41 | + if provider == "openai": |
| 42 | + api_key = os.getenv("OPENAI_API_KEY") |
| 43 | + if not api_key: |
| 44 | + raise RuntimeError("OPENAI_API_KEY is required for provider=openai") |
| 45 | + return { |
| 46 | + "type": "openai", |
| 47 | + "base_url": os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1"), |
| 48 | + "api_key": api_key, |
| 49 | + } |
| 50 | + raise ValueError(f"Unknown provider: {provider}") |
| 51 | + |
| 52 | + |
| 53 | +async def scenario_ping(client: CopilotClient) -> ScenarioResult: |
| 54 | + try: |
| 55 | + pong = await client.ping("ci") |
| 56 | + return ScenarioResult("ping", True, f"protocol={pong.protocolVersion}") |
| 57 | + except Exception as e: |
| 58 | + return ScenarioResult("ping", False, str(e)) |
| 59 | + |
| 60 | + |
| 61 | +async def scenario_single_prompt(client: CopilotClient, *, model: str, provider_cfg: dict | None) -> ScenarioResult: |
| 62 | + try: |
| 63 | + cfg: dict = {"model": model} |
| 64 | + if provider_cfg is not None: |
| 65 | + cfg["provider"] = provider_cfg |
| 66 | + session = await client.create_session(cfg) |
| 67 | + try: |
| 68 | + timeout_s = float(os.getenv("COPILOT_E2E_TIMEOUT", "60")) |
| 69 | + resp = await asyncio.wait_for( |
| 70 | + session.send_and_wait({ |
| 71 | + "prompt": "Reply with exactly: OK", |
| 72 | + }), |
| 73 | + timeout=timeout_s, |
| 74 | + ) |
| 75 | + text = (resp.data.content or "").strip() |
| 76 | + ok = text.startswith("OK") |
| 77 | + return ScenarioResult("single_prompt", ok, f"response={text[:80]!r}") |
| 78 | + finally: |
| 79 | + await session.destroy() |
| 80 | + except Exception as e: |
| 81 | + return ScenarioResult("single_prompt", False, str(e)) |
| 82 | + |
| 83 | + |
| 84 | +async def run(provider: str, model: str) -> int: |
| 85 | + provider_cfg = _provider_config(provider) |
| 86 | + |
| 87 | + client_opts: dict = {} |
| 88 | + # For unattended CI runs with the Copilot provider, prefer a token-based auth path. |
| 89 | + # The Copilot SDK client supports `github_token` for non-interactive authentication. |
| 90 | + github_token = os.getenv("COPILOT_GITHUB_TOKEN") |
| 91 | + if provider == "copilot" and github_token: |
| 92 | + client_opts["github_token"] = github_token |
| 93 | + |
| 94 | + client = CopilotClient(client_opts or None) |
| 95 | + await client.start() |
| 96 | + try: |
| 97 | + results: list[ScenarioResult] = [] |
| 98 | + results.append(await scenario_ping(client)) |
| 99 | + results.append(await scenario_single_prompt(client, model=model, provider_cfg=provider_cfg)) |
| 100 | + |
| 101 | + print("Agent scenarios") |
| 102 | + print(f" provider: {provider}") |
| 103 | + print(f" model : {model}") |
| 104 | + print() |
| 105 | + |
| 106 | + failed = 0 |
| 107 | + for r in results: |
| 108 | + status = "PASS" if r.ok else "FAIL" |
| 109 | + print(f"- {r.name:14} {status} {r.details}") |
| 110 | + if not r.ok: |
| 111 | + failed += 1 |
| 112 | + |
| 113 | + return 0 if failed == 0 else 1 |
| 114 | + finally: |
| 115 | + await client.stop() |
| 116 | + |
| 117 | + |
| 118 | +def main() -> int: |
| 119 | + parser = argparse.ArgumentParser() |
| 120 | + parser.add_argument("--provider", default=os.getenv("COPILOT_E2E_PROVIDER", "copilot"), choices=["copilot", "openai"]) |
| 121 | + parser.add_argument("--model", default=os.getenv("COPILOT_E2E_MODEL", "gpt-5-mini")) |
| 122 | + args = parser.parse_args() |
| 123 | + |
| 124 | + try: |
| 125 | + return asyncio.run(run(args.provider, args.model)) |
| 126 | + except Exception as e: |
| 127 | + print(f"E2E runner error: {e}", file=sys.stderr) |
| 128 | + return 2 |
| 129 | + |
| 130 | + |
| 131 | +if __name__ == "__main__": |
| 132 | + raise SystemExit(main()) |
0 commit comments