Skip to content

Commit 241b868

Browse files
committed
gpuci v0.1.0 - CUDA kernel testing across GPUs
0 parents  commit 241b868

32 files changed

Lines changed: 5641 additions & 0 deletions

.github/workflows/ci.yml

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
name: CI
2+
3+
on:
4+
push:
5+
branches: [main, master]
6+
pull_request:
7+
branches: [main, master]
8+
9+
jobs:
10+
test:
11+
name: Test (Python ${{ matrix.python-version }})
12+
runs-on: ubuntu-latest
13+
strategy:
14+
fail-fast: false
15+
matrix:
16+
python-version: ['3.9', '3.10', '3.11', '3.12']
17+
18+
steps:
19+
- uses: actions/checkout@v4
20+
21+
- name: Set up Python ${{ matrix.python-version }}
22+
uses: actions/setup-python@v5
23+
with:
24+
python-version: ${{ matrix.python-version }}
25+
26+
- name: Install dependencies
27+
run: |
28+
python -m pip install --upgrade pip
29+
pip install -e ".[dev]"
30+
31+
- name: Run tests with coverage
32+
run: |
33+
pytest tests/ -v --cov=gpuci --cov-report=xml --cov-report=term-missing
34+
35+
- name: Upload coverage to Codecov
36+
if: matrix.python-version == '3.11'
37+
uses: codecov/codecov-action@v4
38+
with:
39+
files: ./coverage.xml
40+
fail_ci_if_error: false
41+
42+
lint:
43+
name: Lint
44+
runs-on: ubuntu-latest
45+
steps:
46+
- uses: actions/checkout@v4
47+
48+
- name: Set up Python
49+
uses: actions/setup-python@v5
50+
with:
51+
python-version: '3.11'
52+
53+
- name: Install dependencies
54+
run: |
55+
python -m pip install --upgrade pip
56+
pip install -e ".[dev]"
57+
58+
- name: Run ruff
59+
run: ruff check gpuci/ tests/
60+
61+
- name: Run mypy
62+
run: mypy gpuci/ --ignore-missing-imports
63+
64+
build:
65+
name: Build
66+
runs-on: ubuntu-latest
67+
steps:
68+
- uses: actions/checkout@v4
69+
70+
- name: Set up Python
71+
uses: actions/setup-python@v5
72+
with:
73+
python-version: '3.11'
74+
75+
- name: Install build tools
76+
run: pip install build
77+
78+
- name: Build package
79+
run: python -m build
80+
81+
- name: Check package
82+
run: |
83+
pip install twine
84+
twine check dist/*
85+
86+
- name: Upload artifacts
87+
uses: actions/upload-artifact@v4
88+
with:
89+
name: dist
90+
path: dist/
91+
92+
# Validates the GitHub Action itself
93+
action-test:
94+
name: Test Action
95+
runs-on: ubuntu-latest
96+
steps:
97+
- uses: actions/checkout@v4
98+
99+
- name: Validate action.yml
100+
run: |
101+
python -c "import yaml; yaml.safe_load(open('action.yml'))"
102+
echo "action.yml is valid YAML"
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
# Advanced workflow example with multiple GPU targets and matrix strategy
2+
3+
name: GPU Kernel Tests (Advanced)
4+
5+
on:
6+
pull_request:
7+
paths:
8+
- '**/*.cu'
9+
- '**/*.cuh'
10+
11+
jobs:
12+
# Test on different GPU types in parallel
13+
gpu-matrix:
14+
runs-on: ubuntu-latest
15+
strategy:
16+
fail-fast: false
17+
matrix:
18+
target: [h100, a100, rtx4090]
19+
20+
name: Test on ${{ matrix.target }}
21+
22+
steps:
23+
- uses: actions/checkout@v4
24+
25+
- name: Test kernel on ${{ matrix.target }}
26+
uses: rightnow-ai/gpuci@v1
27+
with:
28+
kernel: 'src/kernels/*.cu'
29+
target: ${{ matrix.target }}
30+
ssh-key: ${{ secrets.GPU_SSH_KEY }}
31+
runs: '20'
32+
warmup: '5'
33+
34+
# Aggregate results
35+
report:
36+
needs: gpu-matrix
37+
runs-on: ubuntu-latest
38+
if: always()
39+
40+
steps:
41+
- name: Check results
42+
run: |
43+
if [ "${{ needs.gpu-matrix.result }}" = "success" ]; then
44+
echo "All GPU tests passed!"
45+
else
46+
echo "Some GPU tests failed"
47+
exit 1
48+
fi

.github/workflows/gpu-test.yml

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
# Example workflow using gpuci GitHub Action
2+
# Copy this to your repo's .github/workflows/ directory
3+
4+
name: GPU Kernel Tests
5+
6+
on:
7+
push:
8+
branches: [main]
9+
paths:
10+
- '**/*.cu'
11+
- 'gpuci.yml'
12+
pull_request:
13+
paths:
14+
- '**/*.cu'
15+
- 'gpuci.yml'
16+
workflow_dispatch: # Manual trigger
17+
18+
jobs:
19+
gpu-test:
20+
runs-on: ubuntu-latest
21+
name: Test CUDA Kernels
22+
23+
steps:
24+
- name: Checkout code
25+
uses: actions/checkout@v4
26+
27+
- name: Run GPU tests
28+
uses: rightnow-ai/gpuci@v1
29+
with:
30+
kernel: 'kernels/*.cu' # or specific file: 'src/matmul.cu'
31+
config: 'gpuci.yml'
32+
ssh-key: ${{ secrets.GPU_SSH_KEY }}
33+
post-comment: 'true'
34+
fail-on-error: 'true'

.github/workflows/publish.yml

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
# Publish to PyPI when a new release is created
2+
name: Publish to PyPI
3+
4+
on:
5+
release:
6+
types: [published]
7+
8+
# Allow manual trigger
9+
workflow_dispatch:
10+
11+
jobs:
12+
publish:
13+
name: Build and Publish
14+
runs-on: ubuntu-latest
15+
16+
# Recommended: use environment for additional protection
17+
environment: pypi
18+
19+
permissions:
20+
contents: read
21+
22+
steps:
23+
- name: Checkout code
24+
uses: actions/checkout@v4
25+
26+
- name: Set up Python
27+
uses: actions/setup-python@v5
28+
with:
29+
python-version: '3.11'
30+
31+
- name: Install build tools
32+
run: |
33+
python -m pip install --upgrade pip
34+
pip install build twine
35+
36+
- name: Build package
37+
run: python -m build
38+
39+
- name: Check package
40+
run: twine check dist/*
41+
42+
- name: Publish to PyPI
43+
env:
44+
TWINE_USERNAME: __token__
45+
TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }}
46+
run: twine upload dist/*
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
# Example using self-hosted GPU runner
2+
# For teams with their own GPU machines running GitHub Actions runner
3+
4+
name: GPU Tests (Self-Hosted)
5+
6+
on:
7+
push:
8+
branches: [main]
9+
pull_request:
10+
11+
jobs:
12+
gpu-test:
13+
# Use self-hosted runner with GPU
14+
runs-on: [self-hosted, gpu, cuda]
15+
name: Test on Local GPU
16+
17+
steps:
18+
- uses: actions/checkout@v4
19+
20+
- name: Setup Python
21+
uses: actions/setup-python@v5
22+
with:
23+
python-version: '3.11'
24+
25+
- name: Install gpuci
26+
run: pip install gpuci
27+
28+
- name: Check GPU
29+
run: nvidia-smi
30+
31+
- name: Run local GPU test
32+
run: |
33+
# For self-hosted runner, can test directly without SSH
34+
# Compile and run kernel locally
35+
nvcc -O3 -o test_kernel kernels/matmul.cu
36+
./test_kernel
37+
38+
# Or use gpuci with local target
39+
- name: Run gpuci (SSH to other machines)
40+
run: gpuci test kernels/*.cu --config gpuci.yml
41+
env:
42+
# SSH key can be pre-installed on self-hosted runner
43+
SSH_AUTH_SOCK: ${{ env.SSH_AUTH_SOCK }}

.gitignore

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
*.egg-info/
24+
.installed.cfg
25+
*.egg
26+
27+
# PyInstaller
28+
*.manifest
29+
*.spec
30+
31+
# Installer logs
32+
pip-log.txt
33+
pip-delete-this-directory.txt
34+
35+
# Unit test / coverage reports
36+
htmlcov/
37+
.tox/
38+
.nox/
39+
.coverage
40+
.coverage.*
41+
.cache
42+
nosetests.xml
43+
coverage.xml
44+
*.cover
45+
*.py,cover
46+
.hypothesis/
47+
.pytest_cache/
48+
49+
# Translations
50+
*.mo
51+
*.pot
52+
53+
# Environments
54+
.env
55+
.venv
56+
env/
57+
venv/
58+
ENV/
59+
env.bak/
60+
venv.bak/
61+
62+
# IDE
63+
.idea/
64+
.vscode/
65+
*.swp
66+
*.swo
67+
*~
68+
69+
# mypy
70+
.mypy_cache/
71+
.dmypy.json
72+
dmypy.json
73+
74+
# ruff
75+
.ruff_cache/
76+
77+
# OS
78+
.DS_Store
79+
Thumbs.db
80+
81+
# Project specific
82+
gpuci.yml
83+
*.cu.out
84+
tmpclaude-*

0 commit comments

Comments
 (0)