Skip to content

Commit 5c74ce4

Browse files
authored
[ci] try to optimize windows builder ci (#589)
* try to optimize windows builder ci * trigger cache-hit validation run * fix * verbose to debug why kernel-builder isn't picking up cache * fix expression * fix caching in the builder * trigger warm cache run
1 parent 0c582a5 commit 5c74ce4

1 file changed

Lines changed: 76 additions & 13 deletions

File tree

.github/workflows/build_kernel_windows.yaml

Lines changed: 76 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,31 +26,94 @@ jobs:
2626
runs-on: ${{ matrix.os }}
2727

2828
steps:
29-
- uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
30-
with:
31-
key: cuda-toolkit-v${{ matrix.cuda }}-${{ matrix.os }}
32-
path: |
33-
C:\Program Files\NVIDIA GPU Computing Toolkit
34-
~/.cargo/registry
35-
~/.cargo/git
36-
3729
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
3830

39-
# CUDA environment setup
40-
- uses: huggingface/cuda-toolkit@714c97b32958862237b96401fb253a4261453c3b # v0.1.0
41-
id: setup-cuda-toolkit
31+
# ---- CUDA toolkit (cache + skip installer on hit) ----
32+
# On a cache hit we restore C:\Program Files\NVIDIA GPU Computing Toolkit
33+
# and skip the cuda-toolkit action entirely (which otherwise spends ~7
34+
# min running the MSI even when the files are already on disk). We then
35+
# replicate the small bit of env setup the action would have done — see
36+
# the next step.
37+
- name: Cache CUDA toolkit
38+
id: cuda-cache
39+
uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v5.0.5
4240
with:
43-
cuda: ${{ matrix.torch.cuda }} # TODO(mfuntowicz): How can we test multiple CUDA versions than align with torch?
41+
path: C:\Program Files\NVIDIA GPU Computing Toolkit
42+
# Key bumps:
43+
# - matrix.torch.cuda — different CUDA versions get separate caches
44+
# - 714c97b3 — pinned SHA of huggingface/cuda-toolkit; bump when the
45+
# action changes so we re-download instead of reusing a stale tree
46+
key: cuda-toolkit-${{ matrix.torch.cuda }}-714c97b3-${{ matrix.os }}
47+
48+
- name: Install CUDA toolkit
49+
if: steps.cuda-cache.outputs.cache-hit != 'true'
50+
uses: huggingface/cuda-toolkit@714c97b32958862237b96401fb253a4261453c3b # v0.1.0
51+
with:
52+
cuda: ${{ matrix.torch.cuda }}
53+
54+
- name: Restore CUDA env vars (cache hit only)
55+
# huggingface/cuda-toolkit's updatePath sets CUDA_PATH, CUDA_PATH_VX_Y,
56+
# and prepends <CUDA_PATH>\bin to PATH. When we skip the action above,
57+
# those env mutations don't happen — replicate them here so nvcc and
58+
# the downstream builds find the toolkit.
59+
#
60+
# Also re-install the MSBuild integration: the CUDA installer normally
61+
# copies CUDA <ver>.{props,targets,xml} from the toolkit's
62+
# extras\visual_studio_integration\MSBuildExtensions\ into the VS
63+
# BuildCustomizations dir. Without that, CMake's CUDA language detection
64+
# fails with "No CUDA toolset found". Cache only restores the toolkit
65+
# tree, so we copy the props in by hand on cache hits.
66+
if: steps.cuda-cache.outputs.cache-hit == 'true'
67+
shell: pwsh
68+
run: |
69+
$parts = "${{ matrix.torch.cuda }}".Split('.')
70+
$major = $parts[0]
71+
$minor = $parts[1]
72+
$cudaPath = "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v$major.$minor"
73+
"CUDA_PATH=$cudaPath" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
74+
"CUDA_PATH_V${major}_${minor}=$cudaPath" | Out-File $env:GITHUB_ENV -Append -Encoding utf8
75+
"$cudaPath\bin" | Out-File $env:GITHUB_PATH -Append -Encoding utf8
76+
77+
$msBuildExt = Join-Path $cudaPath 'extras\visual_studio_integration\MSBuildExtensions'
78+
if (-not (Test-Path $msBuildExt)) {
79+
throw "MSBuild integration not found in cached toolkit at $msBuildExt — cache may be incomplete."
80+
}
81+
# GitHub-hosted windows-2022 ships VS 2022 Enterprise; glob anyway so
82+
# we don't silently break if the image switches edition.
83+
$vsRoots = Get-ChildItem 'C:\Program Files\Microsoft Visual Studio\2022' -Directory -ErrorAction SilentlyContinue
84+
if (-not $vsRoots) { throw "Visual Studio 2022 not found on runner." }
85+
foreach ($vs in $vsRoots) {
86+
$dest = Join-Path $vs.FullName 'MSBuild\Microsoft\VC\v170\BuildCustomizations'
87+
New-Item -ItemType Directory -Force -Path $dest | Out-Null
88+
Copy-Item -Path (Join-Path $msBuildExt '*') -Destination $dest -Force -Recurse
89+
Write-Host "Installed CUDA MSBuild integration into $dest"
90+
}
91+
4492
- name: "NVCC checks"
4593
run: nvcc -V
4694

47-
# Rust build environment setup
95+
# ---- Rust toolchain + cached kernel-builder build ----
4896
- uses: actions-rs/toolchain@16499b5e05bf2e26879000db0c1d13f7e13fa3af # v1.0.7
4997
with:
5098
toolchain: stable
5199
profile: minimal
52100
override: true
53101

102+
# Caches the workspace target/ plus ~/.cargo/{registry,git}. Keys on
103+
# Cargo.lock so a clean dep-graph change invalidates the artifact cache
104+
# but unrelated edits reuse it incrementally. Cuts the kernel-builder
105+
# build from ~8 min cold to ~30s warm.
106+
#
107+
# workspaces must point at the actual workspace root (root Cargo.toml
108+
# has `[workspace] members = [..., "kernel-builder", ...]`). Cargo
109+
# always writes target/ at the workspace root, so caching
110+
# ./kernel-builder/target would restore to a path cargo never reads.
111+
- name: Cache cargo + kernel-builder target
112+
uses: Swatinem/rust-cache@c19371144df3bb44fab255c43d04cbc2ab54d1c4 # v2.9.1
113+
with:
114+
workspaces: .
115+
shared-key: kernel-builder-${{ matrix.os }}
116+
54117
- name: Build kernel-builder
55118
run: ( cd kernel-builder && cargo build --release )
56119

0 commit comments

Comments
 (0)