-
Notifications
You must be signed in to change notification settings - Fork 204
112 lines (99 loc) · 4.05 KB
/
ci.yml
File metadata and controls
112 lines (99 loc) · 4.05 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
name: CI
on:
pull_request:
branches: [main]
workflow_dispatch:
jobs:
uv-workspace:
name: uv workspace (lock + sync + import smoke)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: astral-sh/setup-uv@v3
with:
version: "0.11.x"
- name: Verify uv lockfile and workspace sync
# Skips the torch wheel in this fast job; the CUDA build below runs a
# full sync and builds megakernel against torch.
run: bash scripts/check_uv_workspace.sh
build:
name: Build (cmake + uv sync --extra megakernel)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
submodules: recursive
- uses: Jimver/cuda-toolkit@v0.2.35
with:
# Keep the existing upstream CUDA toolkit version; the uv workspace
# only replaces Python environment setup.
# Building megakernel's CUDAExtension links against torch's libs,
# so CI uses the matching cu128 PyTorch wheel below.
cuda: '12.8.0'
method: network
sub-packages: '["nvcc", "cudart-dev", "thrust", "driver-dev"]'
non-cuda-sub-packages: '["libcublas-dev"]'
- uses: astral-sh/setup-uv@v3
with:
version: "0.11.x"
# uv reads .python-version (3.12, matching the previous CI) and downloads the matching
# interpreter; no separate setup-python step needed.
- name: Build dflash (smoke + server)
run: |
cd dflash
cmake -B build \
-DCMAKE_CUDA_ARCHITECTURES="86" \
-DDFLASH27B_ENABLE_BSA=OFF \
-DDFLASH27B_FA_ALL_QUANTS=OFF \
-DCMAKE_BUILD_TYPE=Release
cmake --build build --target \
test_dflash test_generate test_flash_attn_sparse \
dflash_server test_server_unit \
-j$(nproc)
- name: Run C++ server unit tests
run: |
cd dflash/build
ctest --output-on-failure -R server_unit --no-tests=error
- name: Run Python server unit tests
run: |
pip install pytest fastapi httpx transformers
cd dflash/scripts
python3 -m pytest test_server.py -v
- name: Populate venv with cu128 torch + setuptools
# First pass: install the workspace's default deps. dflash declares
# torch (which uv pulls from the pytorch-cu128 index per
# [tool.uv.sources]) and pflash drags in setuptools. Megakernel is
# in the optional `megakernel` extra so its build does NOT run yet.
run: uv sync --frozen
- name: Build megakernel via uv sync (sm_75)
env:
CUDA_HOME: ${{ env.CUDA_PATH }}
MEGAKERNEL_CUDA_ARCH: sm_75
# Second pass: builds megakernel against the venv populated above.
# `no-build-isolation-package = ["qwen35-megakernel-bf16"]` (set in
# the root pyproject.toml) forces setup.py to import the main env's
# cu128 torch — same wheel that runtime will use, so the resulting
# .so links against an ABI we can actually load.
run: uv sync --frozen --extra megakernel
- name: Verify megakernel extension imports
run: |
uv run --frozen --no-sync python -c "
import torch
import qwen35_megakernel_bf16_C as M
print('megakernel sm_75 .so loads:', M.__file__)
"
- name: Rebuild megakernel via uv sync (sm_86)
env:
CUDA_HOME: ${{ env.CUDA_PATH }}
MEGAKERNEL_CUDA_ARCH: sm_86
# --reinstall-package forces uv to recompile the CUDAExtension
# with the new MEGAKERNEL_CUDA_ARCH env var; without it, uv
# would skip the already-installed package at the same version.
run: uv sync --frozen --extra megakernel --reinstall-package qwen35-megakernel-bf16
- name: Verify megakernel extension imports (sm_86)
run: |
uv run --frozen --no-sync python -c "
import torch
import qwen35_megakernel_bf16_C as M
print('megakernel sm_86 .so loads:', M.__file__)
"