Skip to content

Commit fa857bd

Browse files
authored
ci: fix macOS PyTorch wheel cache key for branch-ref pins (#19350)
### Summary `install_pytorch_and_domains` constructs the cached-wheel URL using `${TORCH_VERSION:0:7}`, which gives "release" when the pin is a branch ref like `release/2.11`. The upload code uses the basename of `dist/*.whl`, which is whatever PyTorch's setup.py wrote — always the resolved commit hash (e.g. `+git70d99e9`). The two never match, so every macOS run misses the cache and does a ~30-minute source build even though the wheel for the current pin's HEAD is already in S3. Resolve the hash via `git rev-parse --short=7 HEAD` after `git checkout`, so download and upload agree. Commit-hash pins are unchanged (the first 7 chars already equaled the resolved hash). Authored with Claude Code. ### Test plan CI
1 parent 180edd3 commit fa857bd

1 file changed

Lines changed: 29 additions & 1 deletion

File tree

.ci/scripts/utils.sh

Lines changed: 29 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,11 @@ install_pytorch_and_domains() {
105105
fi
106106
local python_version=$(python -c 'import platform; v=platform.python_version_tuple(); print(f"{v[0]}{v[1]}")')
107107
local torch_release=$(cat version.txt)
108-
local torch_short_hash=${TORCH_VERSION:0:7}
108+
# Download key must match the upload key below (basename of dist/*.whl,
109+
# which always carries setup.py's resolved +gitHASH). Branch-ref pins
110+
# like `release/2.11` would otherwise produce `+gitrelease` here and
111+
# never hit the cache.
112+
local torch_short_hash=$(git rev-parse --short=7 HEAD)
109113
local torch_wheel_path="cached_artifacts/pytorch/executorch/pytorch_wheels/${system_name}/${python_version}"
110114
local torch_wheel_name="torch-${torch_release}%2Bgit${torch_short_hash}-cp${python_version}-cp${python_version}-${platform:-}.whl"
111115

@@ -127,6 +131,30 @@ install_pytorch_and_domains() {
127131
USE_DISTRIBUTED=1 python setup.py bdist_wheel
128132
pip install "$(echo dist/*.whl)"
129133

134+
# Invariant: the basename setup.py just produced must match the cache
135+
# URL we'd reconstruct on the next run. If they diverge (someone edits
136+
# torch_wheel_name above, or PyTorch renames its wheels), the cache
137+
# will silently miss and every macOS run will fall back to a ~30-min
138+
# source build. Fail loudly so the regression is caught immediately.
139+
shopt -s nullglob
140+
local built_wheels=(dist/*.whl)
141+
shopt -u nullglob
142+
if [[ ${#built_wheels[@]} -ne 1 ]]; then
143+
echo "ERROR: expected exactly 1 wheel in dist/, found ${#built_wheels[@]}" >&2
144+
exit 1
145+
fi
146+
local built_wheel_name
147+
built_wheel_name=$(basename "${built_wheels[0]}")
148+
local expected_wheel_name="${torch_wheel_name//\%2B/+}"
149+
if [[ "${built_wheel_name}" != "${expected_wheel_name}" ]]; then
150+
echo "ERROR: built torch wheel name does not match cache URL key:" >&2
151+
echo " built: ${built_wheel_name}" >&2
152+
echo " expected: ${expected_wheel_name}" >&2
153+
echo "Fix torch_wheel_name construction in install_pytorch_and_domains" >&2
154+
echo "in .ci/scripts/utils.sh" >&2
155+
exit 1
156+
fi
157+
130158
# Only AWS runners have access to S3
131159
if command -v aws && [[ -z "${GITHUB_RUNNER:-}" ]]; then
132160
for wheel_path in dist/*.whl; do

0 commit comments

Comments
 (0)