|
| 1 | +import os |
| 2 | +import subprocess |
| 3 | +from collections import namedtuple |
| 4 | +from pathlib import Path |
| 5 | + |
| 6 | +import pytest |
| 7 | +from dotenv import load_dotenv |
| 8 | + |
| 9 | +from libkernelbot.consts import GitHubGPU, SubmissionMode |
| 10 | +from libkernelbot.launchers import GitHubLauncher |
| 11 | +from libkernelbot.report import RunProgressReporter |
| 12 | +from libkernelbot.task import build_task_config, make_task_definition |
| 13 | +from libkernelbot.utils import get_github_branch_name |
| 14 | + |
| 15 | +# Named tuple for better readability |
| 16 | +GitHubConfig = namedtuple('GitHubConfig', ['token', 'repo', 'branch']) |
| 17 | + |
| 18 | + |
| 19 | +class MockProgressReporter(RunProgressReporter): |
| 20 | + """Test progress reporter that captures messages.""" |
| 21 | + |
| 22 | + def __init__(self, title: str = "Test GitHub Run"): |
| 23 | + super().__init__(title) |
| 24 | + self.messages = [] |
| 25 | + self.updates = [] |
| 26 | + |
| 27 | + async def push(self, message: str): |
| 28 | + self.messages.append(message) |
| 29 | + |
| 30 | + async def update(self, message: str): |
| 31 | + self.updates.append(message) |
| 32 | + |
| 33 | + |
| 34 | +def get_github_repo(): |
| 35 | + """Get GitHub repository from git remote.""" |
| 36 | + try: |
| 37 | + result = subprocess.run( |
| 38 | + ["git", "remote", "get-url", "origin"], |
| 39 | + capture_output=True, |
| 40 | + text=True, |
| 41 | + check=True, |
| 42 | + ) |
| 43 | + remote_url = result.stdout.strip() |
| 44 | + |
| 45 | + # Parse GitHub repo from remote URL |
| 46 | + # Handle both SSH and HTTPS formats |
| 47 | + if remote_url.startswith("git@github.com:"): |
| 48 | + repo = remote_url.replace("git@github.com:", "").replace(".git", "") |
| 49 | + elif remote_url.startswith("https://github.com/"): |
| 50 | + repo = remote_url.replace("https://github.com/", "").replace(".git", "") |
| 51 | + else: |
| 52 | + return None |
| 53 | + |
| 54 | + return repo |
| 55 | + except subprocess.CalledProcessError: |
| 56 | + return None |
| 57 | + |
| 58 | + |
| 59 | +@pytest.fixture(scope="session") |
| 60 | +def github_config(): |
| 61 | + """ |
| 62 | + Get GitHub test configuration from environment or git. |
| 63 | + Skips tests if required configuration is missing. |
| 64 | + """ |
| 65 | + # Load .env file if it exists |
| 66 | + load_dotenv() |
| 67 | + |
| 68 | + token = os.getenv("GITHUB_TOKEN") |
| 69 | + repo = os.getenv("GITHUB_REPO") or get_github_repo() |
| 70 | + branch = os.getenv("GITHUB_BRANCH") or get_github_branch_name() |
| 71 | + |
| 72 | + if not token: |
| 73 | + pytest.skip("GitHub integration tests require GITHUB_TOKEN environment variable") |
| 74 | + |
| 75 | + if not repo: |
| 76 | + pytest.skip( |
| 77 | + "GitHub integration tests require GITHUB_REPO environment variable " |
| 78 | + "or a valid git remote origin" |
| 79 | + ) |
| 80 | + |
| 81 | + return GitHubConfig(token=token, repo=repo, branch=branch) |
| 82 | + |
| 83 | + |
| 84 | +@pytest.mark.integration |
| 85 | +@pytest.mark.asyncio |
| 86 | +@pytest.mark.parametrize("gpu_type", [GitHubGPU.NVIDIA, GitHubGPU.MI300]) |
| 87 | +async def test_github_launcher_python_script(project_root: Path, github_config: GitHubConfig, gpu_type: GitHubGPU): |
| 88 | + """ |
| 89 | + Test GitHubLauncher with a real Python script using real GitHub Actions. |
| 90 | + Tests all GPU types to verify runners are working. |
| 91 | + """ |
| 92 | + launcher = GitHubLauncher(repo=github_config.repo, token=github_config.token, branch=github_config.branch) |
| 93 | + reporter = MockProgressReporter("GitHub Integration Test") |
| 94 | + |
| 95 | + # Load the real identity_py task |
| 96 | + task_path = project_root / "examples" / "identity_py" |
| 97 | + if not task_path.exists(): |
| 98 | + pytest.skip("examples/identity_py not found - skipping GitHub integration test") |
| 99 | + |
| 100 | + task_definition = make_task_definition(task_path) |
| 101 | + submission_content = (task_path / "submission.py").read_text() |
| 102 | + |
| 103 | + config = build_task_config( |
| 104 | + task=task_definition.task, |
| 105 | + submission_content=submission_content, |
| 106 | + arch=0, # Not used for GitHub launcher |
| 107 | + mode=SubmissionMode.TEST, |
| 108 | + ) |
| 109 | + |
| 110 | + result = await launcher.run_submission(config, gpu_type, reporter) |
| 111 | + |
| 112 | + # Basic structure and success |
| 113 | + assert result.success, f"Expected successful run, got: {result.error}" |
| 114 | + assert result.error == "" |
| 115 | + assert isinstance(result.runs, dict) |
| 116 | + |
| 117 | + # System info - test actual expected values based on GPU type |
| 118 | + if gpu_type == GitHubGPU.NVIDIA: |
| 119 | + assert "NVIDIA" in result.system.gpu or "GeForce" in result.system.gpu or "RTX" in result.system.gpu |
| 120 | + else: # AMD GPUs |
| 121 | + assert "MI" in result.system.gpu or "AMD" in result.system.gpu |
| 122 | + |
| 123 | + assert "Linux" in result.system.platform |
| 124 | + |
| 125 | + # Test run structure |
| 126 | + assert "test" in result.runs |
| 127 | + test_run = result.runs["test"] |
| 128 | + |
| 129 | + # For Python runs, compilation is None |
| 130 | + assert test_run.compilation is None |
| 131 | + |
| 132 | + # Run needs to succeed |
| 133 | + assert test_run.run.success is True |
| 134 | + assert test_run.run.passed is True |
| 135 | + assert test_run.run.exit_code == 0 |
| 136 | + assert test_run.run.duration > 0 |
| 137 | + |
| 138 | + # Test results need to succeed |
| 139 | + assert test_run.run.result["check"] == "pass" |
| 140 | + test_count = int(test_run.run.result["test-count"]) |
| 141 | + assert test_count == 5 |
| 142 | + for i in range(test_count): |
| 143 | + assert test_run.run.result[f"test.{i}.status"] == "pass" |
| 144 | + assert "size:" in test_run.run.result[f"test.{i}.spec"] |
| 145 | + assert "seed:" in test_run.run.result[f"test.{i}.spec"] |
| 146 | + |
| 147 | + # Sanity check for timings |
| 148 | + assert test_run.start < test_run.end |
| 149 | + |
| 150 | + # Check reporter messages |
| 151 | + assert any("Waiting for workflow" in msg for msg in reporter.messages) |
| 152 | + assert any("artifacts" in msg.lower() for msg in reporter.messages) |
| 153 | + assert any("completed" in update for update in reporter.updates) |
| 154 | + |
| 155 | + |
| 156 | +@pytest.mark.integration |
| 157 | +@pytest.mark.asyncio |
| 158 | +async def test_github_launcher_failing_script(project_root: Path, github_config: GitHubConfig): |
| 159 | + """ |
| 160 | + Test GitHubLauncher with a script designed to fail. |
| 161 | + Simple test to ensure we don't pass wrong submissions. |
| 162 | + """ |
| 163 | + launcher = GitHubLauncher(repo=github_config.repo, token=github_config.token, branch=github_config.branch) |
| 164 | + reporter = MockProgressReporter("GitHub Failing Test") |
| 165 | + gpu_type = GitHubGPU.NVIDIA # Use NVIDIA for simplicity |
| 166 | + |
| 167 | + # Load the real identity_py task |
| 168 | + task_path = project_root / "examples" / "identity_py" |
| 169 | + if not task_path.exists(): |
| 170 | + pytest.skip("examples/identity_py not found - skipping GitHub integration test") |
| 171 | + |
| 172 | + task_definition = make_task_definition(task_path) |
| 173 | + # Use one of the cheating scripts |
| 174 | + submission_content = (task_path / "cheat-rng.py").read_text() |
| 175 | + |
| 176 | + # Set a specific seed for reproducible results |
| 177 | + task_definition.task.seed = 653212 |
| 178 | + config = build_task_config( |
| 179 | + task=task_definition.task, |
| 180 | + submission_content=submission_content, |
| 181 | + arch=0, |
| 182 | + mode=SubmissionMode.LEADERBOARD, |
| 183 | + ) |
| 184 | + |
| 185 | + result = await launcher.run_submission(config, gpu_type, reporter) |
| 186 | + |
| 187 | + # Basic structure should still be successful (the workflow ran) |
| 188 | + assert result.success, f"Expected successful workflow run, got: {result.error}" |
| 189 | + assert result.error == "" |
| 190 | + |
| 191 | + # But the actual test or benchmark should fail |
| 192 | + test_passed = result.runs.get("test", {}).run.passed if "test" in result.runs else True |
| 193 | + benchmark_passed = result.runs.get("benchmark", {}).run.passed if "benchmark" in result.runs else True |
| 194 | + |
| 195 | + assert not (test_passed and benchmark_passed), "Expected at least one run to fail for cheating script" |
| 196 | + |
| 197 | + |
| 198 | + |
| 199 | + |
| 200 | +@pytest.mark.integration |
| 201 | +@pytest.mark.asyncio |
| 202 | +@pytest.mark.parametrize("gpu_type", [GitHubGPU.MI300x8]) |
| 203 | +async def test_github_launcher_multi_gpu(project_root: Path, github_config: GitHubConfig, gpu_type: GitHubGPU): |
| 204 | + """ |
| 205 | + Test GitHubLauncher with a real Python script using real GitHub Actions. |
| 206 | + Tests all GPU types to verify runners are working. |
| 207 | + """ |
| 208 | + launcher = GitHubLauncher(repo=github_config.repo, token=github_config.token, branch=github_config.branch) |
| 209 | + reporter = MockProgressReporter("GitHub Integration Test") |
| 210 | + |
| 211 | + # Load the real identity_py task |
| 212 | + task_path = project_root / "examples" / "gather" |
| 213 | + if not task_path.exists(): |
| 214 | + pytest.skip("examples/gather not found - skipping GitHub integration test") |
| 215 | + |
| 216 | + task_definition = make_task_definition(task_path) |
| 217 | + submission_content = (task_path / "submission.py").read_text() |
| 218 | + |
| 219 | + config = build_task_config( |
| 220 | + task=task_definition.task, |
| 221 | + submission_content=submission_content, |
| 222 | + arch=0, # Not used for GitHub launcher |
| 223 | + mode=SubmissionMode.TEST, |
| 224 | + ) |
| 225 | + |
| 226 | + result = await launcher.run_submission(config, gpu_type, reporter) |
| 227 | + |
| 228 | + # Basic structure and success |
| 229 | + assert result.success, f"Expected successful run, got: {result.error}" |
| 230 | + assert result.error == "" |
| 231 | + assert isinstance(result.runs, dict) |
| 232 | + |
| 233 | + assert result.system.device_count == 8 |
| 234 | + |
| 235 | + # Test run structure |
| 236 | + assert "test" in result.runs |
| 237 | + test_run = result.runs["test"] |
| 238 | + |
| 239 | + # For Python runs, compilation is None |
| 240 | + assert test_run.compilation is None |
| 241 | + |
| 242 | + # Run needs to succeed |
| 243 | + assert test_run.run.success is True |
| 244 | + assert test_run.run.passed is True |
| 245 | + assert test_run.run.exit_code == 0 |
| 246 | + assert test_run.run.duration > 0 |
| 247 | + |
| 248 | + # Test results need to succeed |
| 249 | + assert test_run.run.result["check"] == "pass" |
| 250 | + test_count = int(test_run.run.result["test-count"]) |
| 251 | + assert test_count == 1 |
| 252 | + for i in range(test_count): |
| 253 | + assert test_run.run.result[f"test.{i}.status"] == "pass" |
| 254 | + assert "size:" in test_run.run.result[f"test.{i}.spec"] |
| 255 | + assert "seed:" in test_run.run.result[f"test.{i}.spec"] |
| 256 | + |
| 257 | + # Sanity check for timings |
| 258 | + assert test_run.start < test_run.end |
| 259 | + |
| 260 | + # Check reporter messages |
| 261 | + assert any("Waiting for workflow" in msg for msg in reporter.messages) |
| 262 | + assert any("artifacts" in msg.lower() for msg in reporter.messages) |
| 263 | + assert any("completed" in update for update in reporter.updates) |
0 commit comments