-
Notifications
You must be signed in to change notification settings - Fork 102
139 lines (122 loc) · 6.45 KB
/
build_kernel_windows.yaml
File metadata and controls
139 lines (122 loc) · 6.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
name: "Build and test kernel - Windows"
on:
push:
branches: [main]
pull_request:
branches: [main]
types: [opened, synchronize, reopened] # trigger on PRs
paths-ignore:
- "docs/**"
- "*.md"
workflow_dispatch:
jobs:
build:
strategy:
matrix:
os: [ windows-2022 ]
python: [ 3.12 ]
torch: [
# { version: '2.9.1', cuda: '12.6.3', wheel: '126' },
{ version: '2.9.1', cuda: '12.8.1', wheel: '128' },
# { version: '2.9.1', cuda: '13.0.1', wheel: '130' }
]
name: Build kernel
runs-on: ${{ matrix.os }}
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
# ---- CUDA toolkit (cache + skip installer on hit) ----
# On a cache hit we restore C:\Program Files\NVIDIA GPU Computing Toolkit
# and skip the cuda-toolkit action entirely (which otherwise spends ~7
# min running the MSI even when the files are already on disk). We then
# replicate the small bit of env setup the action would have done — see
# the next step.
- name: Cache CUDA toolkit
id: cuda-cache
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
with:
path: C:\Program Files\NVIDIA GPU Computing Toolkit
# Key bumps:
# - matrix.torch.cuda — different CUDA versions get separate caches
# - 714c97b3 — pinned SHA of huggingface/cuda-toolkit; bump when the
# action changes so we re-download instead of reusing a stale tree
key: cuda-toolkit-${{ matrix.torch.cuda }}-714c97b3-${{ matrix.os }}
- name: Install CUDA toolkit
if: steps.cuda-cache.outputs.cache-hit != 'true'
uses: huggingface/cuda-toolkit@714c97b32958862237b96401fb253a4261453c3b # v0.1.0
with:
cuda: ${{ matrix.torch.cuda }}
- name: Restore CUDA env vars (cache hit only)
# huggingface/cuda-toolkit's updatePath sets CUDA_PATH, CUDA_PATH_VX_Y,
# and prepends <CUDA_PATH>\bin to PATH. When we skip the action above,
# those env mutations don't happen — replicate them here so nvcc and
# the downstream builds find the toolkit.
#
# Also re-install the MSBuild integration: the CUDA installer normally
# copies CUDA <ver>.{props,targets,xml} from the toolkit's
# extras\visual_studio_integration\MSBuildExtensions\ into the VS
# BuildCustomizations dir. Without that, CMake's CUDA language detection
# fails with "No CUDA toolset found". Cache only restores the toolkit
# tree, so we copy the props in by hand on cache hits.
if: steps.cuda-cache.outputs.cache-hit == 'true'
shell: pwsh
run: |
$parts = "${{ matrix.torch.cuda }}".Split('.')
$major = $parts[0]
$minor = $parts[1]
$cudaPath = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$major.$minor"
"CUDA_PATH=$cudaPath" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
"CUDA_PATH_V${major}_${minor}=$cudaPath" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
"$cudaPath\bin" | Out-File $env:GITHUB_PATH -Append -Encoding utf8
$msBuildExt = Join-Path $cudaPath 'extras\visual_studio_integration\MSBuildExtensions'
if (-not (Test-Path $msBuildExt)) {
throw "MSBuild integration not found in cached toolkit at $msBuildExt — cache may be incomplete."
}
# GitHub-hosted windows-2022 ships VS 2022 Enterprise; glob anyway so
# we don't silently break if the image switches edition.
$vsRoots = Get-ChildItem 'C:\Program Files\Microsoft Visual Studio\2022' -Directory -ErrorAction SilentlyContinue
if (-not $vsRoots) { throw "Visual Studio 2022 not found on runner." }
foreach ($vs in $vsRoots) {
$dest = Join-Path $vs.FullName 'MSBuild\Microsoft\VC\v170\BuildCustomizations'
New-Item -ItemType Directory -Force -Path $dest | Out-Null
Copy-Item -Path (Join-Path $msBuildExt '*') -Destination $dest -Force -Recurse
Write-Host "Installed CUDA MSBuild integration into $dest"
}
- name: "NVCC checks"
run: nvcc -V
# ---- Rust toolchain + cached kernel-builder build ----
- uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
with:
toolchain: stable
profile: minimal
override: true
# Caches the workspace target/ plus ~/.cargo/{registry,git}. Keys on
# Cargo.lock so a clean dep-graph change invalidates the artifact cache
# but unrelated edits reuse it incrementally. Cuts the kernel-builder
# build from ~8 min cold to ~30s warm.
#
# workspaces must point at the actual workspace root (root Cargo.toml
# has `[workspace] members = [..., "kernel-builder", ...]`). Cargo
# always writes target/ at the workspace root, so caching
# ./kernel-builder/target would restore to a path cargo never reads.
- name: Cache cargo + kernel-builder target
uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
with:
workspaces: .
shared-key: kernel-builder-${{ matrix.os }}
- name: Build kernel-builder
run: ( cd kernel-builder && cargo build --release )
# Python environment setup
- uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
with:
python-version: ${{ matrix.python }}
cache: 'pip'
- name: Install PyTorch
run: pip install torch --index-url https://download.pytorch.org/whl/cu${{ matrix.torch.wheel }}
- name: Build cutlass GEMM kernel
run: ( nix-builder\scripts\windows\builder.ps1 -SourceFolder examples/kernels/cutlass-gemm -BuildConfig Release -Backend cuda -Build -Force )
- name: Build relu kernel
run: ( nix-builder\scripts\windows\builder.ps1 -SourceFolder examples/kernels/relu -BuildConfig Release -Backend cuda -Build -Force )
- name: Build relu-backprop-compile kernel
run: ( nix-builder\scripts\windows\builder.ps1 -SourceFolder examples/kernels/relu-backprop-compile -BuildConfig Release -Backend cuda -Build -Force )
- name: Build silu-and-mul kernel
run: ( nix-builder\scripts\windows\builder.ps1 -SourceFolder examples/kernels/silu-and-mul -BuildConfig Release -Backend cuda -Build -Force)