Skip to content

Commit 1b5318b

Browse files
authored
github tests (#340)
1 parent 203cebe commit 1b5318b

3 files changed

Lines changed: 281 additions & 4 deletions

File tree

.github/workflows/testing.yml

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ jobs:
3636
name: python-coverage-comment-action
3737
path: python-coverage-comment-action.txt
3838

39-
integration-tests:
39+
integration-tests-modal:
4040
runs-on: ubuntu-latest
4141
timeout-minutes: 30
4242
env:
@@ -47,4 +47,15 @@ jobs:
4747
- uses: astral-sh/setup-uv@v4
4848
- run: uv sync --extra dev
4949
- run: uv run modal token set --token-id ${MODAL_TOKEN_ID} --token-secret ${MODAL_TOKEN_SECRET}
50-
- run: uv run pytest -m integration tests -v
50+
- run: uv run pytest -m integration tests/test_modal.py -v
51+
52+
integration-tests-github:
53+
runs-on: ubuntu-latest
54+
timeout-minutes: 45
55+
env:
56+
GITHUB_TOKEN: ${{ secrets.GH_TOKEN }}
57+
steps:
58+
- uses: actions/checkout@v4
59+
- uses: astral-sh/setup-uv@v4
60+
- run: uv sync --extra dev
61+
- run: uv run pytest -m integration tests/test_github.py -v

src/libkernelbot/launchers/github.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
RunResult,
3333
SystemInfo,
3434
)
35-
from libkernelbot.utils import setup_logging
35+
from libkernelbot.utils import KernelBotError, setup_logging
3636

3737
from .launcher import Launcher
3838

@@ -174,7 +174,10 @@ class GitHubArtifact:
174174
class GitHubRun:
175175
def __init__(self, repo: str, token: str, branch: str, workflow_file: str):
176176
gh = Github(token)
177-
self.repo = gh.get_repo(repo)
177+
try:
178+
self.repo = gh.get_repo(repo)
179+
except UnknownObjectException as e:
180+
raise KernelBotError(f"Could not find GitHub repository {repo}: 404") from e
178181
self.token = token
179182
self.branch = branch
180183
self.workflow_file = workflow_file

tests/test_github.py

Lines changed: 263 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,263 @@
1+
import os
2+
import subprocess
3+
from collections import namedtuple
4+
from pathlib import Path
5+
6+
import pytest
7+
from dotenv import load_dotenv
8+
9+
from libkernelbot.consts import GitHubGPU, SubmissionMode
10+
from libkernelbot.launchers import GitHubLauncher
11+
from libkernelbot.report import RunProgressReporter
12+
from libkernelbot.task import build_task_config, make_task_definition
13+
from libkernelbot.utils import get_github_branch_name
14+
15+
# Named tuple for better readability
16+
GitHubConfig = namedtuple('GitHubConfig', ['token', 'repo', 'branch'])
17+
18+
19+
class MockProgressReporter(RunProgressReporter):
20+
"""Test progress reporter that captures messages."""
21+
22+
def __init__(self, title: str = "Test GitHub Run"):
23+
super().__init__(title)
24+
self.messages = []
25+
self.updates = []
26+
27+
async def push(self, message: str):
28+
self.messages.append(message)
29+
30+
async def update(self, message: str):
31+
self.updates.append(message)
32+
33+
34+
def get_github_repo():
35+
"""Get GitHub repository from git remote."""
36+
try:
37+
result = subprocess.run(
38+
["git", "remote", "get-url", "origin"],
39+
capture_output=True,
40+
text=True,
41+
check=True,
42+
)
43+
remote_url = result.stdout.strip()
44+
45+
# Parse GitHub repo from remote URL
46+
# Handle both SSH and HTTPS formats
47+
if remote_url.startswith("git@github.com:"):
48+
repo = remote_url.replace("git@github.com:", "").replace(".git", "")
49+
elif remote_url.startswith("https://github.com/"):
50+
repo = remote_url.replace("https://github.com/", "").replace(".git", "")
51+
else:
52+
return None
53+
54+
return repo
55+
except subprocess.CalledProcessError:
56+
return None
57+
58+
59+
@pytest.fixture(scope="session")
60+
def github_config():
61+
"""
62+
Get GitHub test configuration from environment or git.
63+
Skips tests if required configuration is missing.
64+
"""
65+
# Load .env file if it exists
66+
load_dotenv()
67+
68+
token = os.getenv("GITHUB_TOKEN")
69+
repo = os.getenv("GITHUB_REPO") or get_github_repo()
70+
branch = os.getenv("GITHUB_BRANCH") or get_github_branch_name()
71+
72+
if not token:
73+
pytest.skip("GitHub integration tests require GITHUB_TOKEN environment variable")
74+
75+
if not repo:
76+
pytest.skip(
77+
"GitHub integration tests require GITHUB_REPO environment variable "
78+
"or a valid git remote origin"
79+
)
80+
81+
return GitHubConfig(token=token, repo=repo, branch=branch)
82+
83+
84+
@pytest.mark.integration
85+
@pytest.mark.asyncio
86+
@pytest.mark.parametrize("gpu_type", [GitHubGPU.NVIDIA, GitHubGPU.MI300])
87+
async def test_github_launcher_python_script(project_root: Path, github_config: GitHubConfig, gpu_type: GitHubGPU):
88+
"""
89+
Test GitHubLauncher with a real Python script using real GitHub Actions.
90+
Tests all GPU types to verify runners are working.
91+
"""
92+
launcher = GitHubLauncher(repo=github_config.repo, token=github_config.token, branch=github_config.branch)
93+
reporter = MockProgressReporter("GitHub Integration Test")
94+
95+
# Load the real identity_py task
96+
task_path = project_root / "examples" / "identity_py"
97+
if not task_path.exists():
98+
pytest.skip("examples/identity_py not found - skipping GitHub integration test")
99+
100+
task_definition = make_task_definition(task_path)
101+
submission_content = (task_path / "submission.py").read_text()
102+
103+
config = build_task_config(
104+
task=task_definition.task,
105+
submission_content=submission_content,
106+
arch=0, # Not used for GitHub launcher
107+
mode=SubmissionMode.TEST,
108+
)
109+
110+
result = await launcher.run_submission(config, gpu_type, reporter)
111+
112+
# Basic structure and success
113+
assert result.success, f"Expected successful run, got: {result.error}"
114+
assert result.error == ""
115+
assert isinstance(result.runs, dict)
116+
117+
# System info - test actual expected values based on GPU type
118+
if gpu_type == GitHubGPU.NVIDIA:
119+
assert "NVIDIA" in result.system.gpu or "GeForce" in result.system.gpu or "RTX" in result.system.gpu
120+
else: # AMD GPUs
121+
assert "MI" in result.system.gpu or "AMD" in result.system.gpu
122+
123+
assert "Linux" in result.system.platform
124+
125+
# Test run structure
126+
assert "test" in result.runs
127+
test_run = result.runs["test"]
128+
129+
# For Python runs, compilation is None
130+
assert test_run.compilation is None
131+
132+
# Run needs to succeed
133+
assert test_run.run.success is True
134+
assert test_run.run.passed is True
135+
assert test_run.run.exit_code == 0
136+
assert test_run.run.duration > 0
137+
138+
# Test results need to succeed
139+
assert test_run.run.result["check"] == "pass"
140+
test_count = int(test_run.run.result["test-count"])
141+
assert test_count == 5
142+
for i in range(test_count):
143+
assert test_run.run.result[f"test.{i}.status"] == "pass"
144+
assert "size:" in test_run.run.result[f"test.{i}.spec"]
145+
assert "seed:" in test_run.run.result[f"test.{i}.spec"]
146+
147+
# Sanity check for timings
148+
assert test_run.start < test_run.end
149+
150+
# Check reporter messages
151+
assert any("Waiting for workflow" in msg for msg in reporter.messages)
152+
assert any("artifacts" in msg.lower() for msg in reporter.messages)
153+
assert any("completed" in update for update in reporter.updates)
154+
155+
156+
@pytest.mark.integration
157+
@pytest.mark.asyncio
158+
async def test_github_launcher_failing_script(project_root: Path, github_config: GitHubConfig):
159+
"""
160+
Test GitHubLauncher with a script designed to fail.
161+
Simple test to ensure we don't pass wrong submissions.
162+
"""
163+
launcher = GitHubLauncher(repo=github_config.repo, token=github_config.token, branch=github_config.branch)
164+
reporter = MockProgressReporter("GitHub Failing Test")
165+
gpu_type = GitHubGPU.NVIDIA # Use NVIDIA for simplicity
166+
167+
# Load the real identity_py task
168+
task_path = project_root / "examples" / "identity_py"
169+
if not task_path.exists():
170+
pytest.skip("examples/identity_py not found - skipping GitHub integration test")
171+
172+
task_definition = make_task_definition(task_path)
173+
# Use one of the cheating scripts
174+
submission_content = (task_path / "cheat-rng.py").read_text()
175+
176+
# Set a specific seed for reproducible results
177+
task_definition.task.seed = 653212
178+
config = build_task_config(
179+
task=task_definition.task,
180+
submission_content=submission_content,
181+
arch=0,
182+
mode=SubmissionMode.LEADERBOARD,
183+
)
184+
185+
result = await launcher.run_submission(config, gpu_type, reporter)
186+
187+
# Basic structure should still be successful (the workflow ran)
188+
assert result.success, f"Expected successful workflow run, got: {result.error}"
189+
assert result.error == ""
190+
191+
# But the actual test or benchmark should fail
192+
test_passed = result.runs.get("test", {}).run.passed if "test" in result.runs else True
193+
benchmark_passed = result.runs.get("benchmark", {}).run.passed if "benchmark" in result.runs else True
194+
195+
assert not (test_passed and benchmark_passed), "Expected at least one run to fail for cheating script"
196+
197+
198+
199+
200+
@pytest.mark.integration
201+
@pytest.mark.asyncio
202+
@pytest.mark.parametrize("gpu_type", [GitHubGPU.MI300x8])
203+
async def test_github_launcher_multi_gpu(project_root: Path, github_config: GitHubConfig, gpu_type: GitHubGPU):
204+
"""
205+
Test GitHubLauncher with a real Python script using real GitHub Actions.
206+
Tests all GPU types to verify runners are working.
207+
"""
208+
launcher = GitHubLauncher(repo=github_config.repo, token=github_config.token, branch=github_config.branch)
209+
reporter = MockProgressReporter("GitHub Integration Test")
210+
211+
# Load the real identity_py task
212+
task_path = project_root / "examples" / "gather"
213+
if not task_path.exists():
214+
pytest.skip("examples/gather not found - skipping GitHub integration test")
215+
216+
task_definition = make_task_definition(task_path)
217+
submission_content = (task_path / "submission.py").read_text()
218+
219+
config = build_task_config(
220+
task=task_definition.task,
221+
submission_content=submission_content,
222+
arch=0, # Not used for GitHub launcher
223+
mode=SubmissionMode.TEST,
224+
)
225+
226+
result = await launcher.run_submission(config, gpu_type, reporter)
227+
228+
# Basic structure and success
229+
assert result.success, f"Expected successful run, got: {result.error}"
230+
assert result.error == ""
231+
assert isinstance(result.runs, dict)
232+
233+
assert result.system.device_count == 8
234+
235+
# Test run structure
236+
assert "test" in result.runs
237+
test_run = result.runs["test"]
238+
239+
# For Python runs, compilation is None
240+
assert test_run.compilation is None
241+
242+
# Run needs to succeed
243+
assert test_run.run.success is True
244+
assert test_run.run.passed is True
245+
assert test_run.run.exit_code == 0
246+
assert test_run.run.duration > 0
247+
248+
# Test results need to succeed
249+
assert test_run.run.result["check"] == "pass"
250+
test_count = int(test_run.run.result["test-count"])
251+
assert test_count == 1
252+
for i in range(test_count):
253+
assert test_run.run.result[f"test.{i}.status"] == "pass"
254+
assert "size:" in test_run.run.result[f"test.{i}.spec"]
255+
assert "seed:" in test_run.run.result[f"test.{i}.spec"]
256+
257+
# Sanity check for timings
258+
assert test_run.start < test_run.end
259+
260+
# Check reporter messages
261+
assert any("Waiting for workflow" in msg for msg in reporter.messages)
262+
assert any("artifacts" in msg.lower() for msg in reporter.messages)
263+
assert any("completed" in update for update in reporter.updates)

0 commit comments

Comments
 (0)