Skip to content

Commit 3a62fac

Browse files
authored
Revert "Move torch pin from the 2.11 to the 2026-04-09 nightly, and drop deprecated CUDA versions from CI" (#19160)
Reverts #19072 Too many failures on https://hud.pytorch.org/hud/pytorch/executorch/main/1?per_page=50 Lots of AOTI/CUDA/Metal failures
1 parent ad2f500 commit 3a62fac

15 files changed

Lines changed: 63 additions & 135 deletions

File tree

.ci/docker/build.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ case "${IMAGE_NAME}" in
8181
LINTRUNNER=""
8282
GCC_VERSION=11
8383
CUDA_WINDOWS_CROSS_COMPILE=yes
84-
CUDA_VERSION=12.6
84+
CUDA_VERSION=12.8
8585
SKIP_PYTORCH=yes
8686
;;
8787
*)
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
358117c166b75167a09bca81ac9925940feda339
1+
release/2.11

.ci/docker/common/install_cuda_windows_cross_compile.sh

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,13 +11,12 @@ set -ex
1111

1212
INSTALL_DIR="${WINDOWS_CUDA_INSTALL_DIR:-/opt/cuda-windows}"
1313

14-
# Mapping of CUDA versions to their corresponding driver versions for Windows installers.
14+
# Mapping of CUDA versions to their corresponding driver versions for Windows installers
1515
# Source: https://developer.nvidia.com/cuda-toolkit-archive
16-
# Format: "PATCH_VERSION:DRIVER_VERSION". Starting with CUDA 13.0, NVIDIA dropped the
17-
# driver suffix from the Windows installer filename, so the driver field is empty.
1816
declare -A CUDA_DRIVER_MAP=(
1917
["12.6"]="12.6.3:561.17"
20-
["13.0"]="13.0.3:"
18+
["12.8"]="12.8.1:572.61"
19+
["12.9"]="12.9.1:576.57"
2120
)
2221

2322
install_mingw() {
@@ -84,8 +83,7 @@ install_windows_cuda() {
8483
mkdir -p "${INSTALL_DIR}"
8584
cd "${INSTALL_DIR}"
8685

87-
# CUDA 13.0+ installers no longer include the driver version in the filename.
88-
CUDA_INSTALLER="cuda_${CUDA_VERSION}${CUDA_DRIVER_VERSION:+_${CUDA_DRIVER_VERSION}}_windows.exe"
86+
CUDA_INSTALLER="cuda_${CUDA_VERSION}_${CUDA_DRIVER_VERSION}_windows.exe"
8987
CUDA_URL="https://developer.download.nvidia.com/compute/cuda/${CUDA_VERSION}/local_installers/${CUDA_INSTALLER}"
9088

9189
# Check if already downloaded and extracted

.ci/docker/common/install_pytorch.sh

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -27,19 +27,14 @@ install_pytorch_and_domains() {
2727
chown -R ci-user .
2828

2929
export _GLIBCXX_USE_CXX11_ABI=1
30-
# PyTorch's FindARM.cmake hard-fails when the SVE+BF16 compile probe
31-
# doesn't pass — gcc-11 in this image is too old to accept the combined
32-
# NEON/SVE/bfloat16 intrinsics the probe exercises. Executorch's aarch64
33-
# runtime targets (phones, embedded) don't use SVE, so bypass the check.
34-
export BUILD_IGNORE_SVE_UNAVAILABLE=1
3530
# Then build and install PyTorch
3631
conda_run python setup.py bdist_wheel
3732
pip_install "$(echo dist/*.whl)"
3833

3934
# Grab the pinned audio and vision commits from PyTorch
40-
TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
35+
TORCHAUDIO_VERSION=release/2.11
4136
export TORCHAUDIO_VERSION
42-
TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt)
37+
TORCHVISION_VERSION=release/0.26
4338
export TORCHVISION_VERSION
4439

4540
install_domains

.ci/scripts/test_model_e2e.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ if [ "$AUDIO_URL" != "" ]; then
260260
elif [[ "$MODEL_NAME" == *whisper* ]] || [ "$MODEL_NAME" = "voxtral_realtime" ]; then
261261
conda install -y -c conda-forge "ffmpeg<8"
262262
pip install datasets soundfile
263-
pip install torchcodec==0.12.0.dev20260409 --extra-index-url https://download.pytorch.org/whl/nightly/cpu
263+
pip install torchcodec==0.11.0 --extra-index-url https://download.pytorch.org/whl/test/cpu
264264
python -c "from datasets import load_dataset;import soundfile as sf;sample = load_dataset('distil-whisper/librispeech_long', 'clean', split='validation')[0]['audio'];sf.write('${MODEL_DIR}/$AUDIO_FILE', sample['array'][:sample['sampling_rate']*30], sample['sampling_rate'])"
265265
fi
266266

.ci/scripts/test_wheel_package_qnn.sh

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -158,17 +158,17 @@ print(module_vars["TORCH_VERSION"])
158158
PY
159159
)
160160

161-
NIGHTLY_VERSION=$(
162-
"$PYBIN" - <<'PY'
163-
import runpy
164-
module_vars = runpy.run_path("torch_pin.py")
165-
print(module_vars["NIGHTLY_VERSION"])
166-
PY
167-
)
168-
echo "=== [$LABEL] Install torch==${TORCH_VERSION}.${NIGHTLY_VERSION} ==="
169-
170-
# Install torchao based on the pinned PyTorch version
171-
"$PIPBIN" install torch=="${TORCH_VERSION}.${NIGHTLY_VERSION}" --index-url "https://download.pytorch.org/whl/nightly/cpu"
161+
# NIGHTLY_VERSION=$(
162+
# "$PYBIN" - <<'PY'
163+
# import runpy
164+
# module_vars = runpy.run_path("torch_pin.py")
165+
# print(module_vars["NIGHTLY_VERSION"])
166+
# PY
167+
# )
168+
echo "=== [$LABEL] Install torch==${TORCH_VERSION} ==="
169+
170+
# Install torch based on the pinned PyTorch version, preferring the PyTorch test index
171+
"$PIPBIN" install torch=="${TORCH_VERSION}" --extra-index-url "https://download.pytorch.org/whl/test"
172172
"$PIPBIN" install wheel
173173

174174
# Install torchao based on the pinned commit from third-party/ao submodule

.ci/scripts/utils.sh

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@ dedupe_macos_loader_path_rpaths() {
5353
pushd ..
5454
torch_lib_dir=$(python -c "import importlib.util; print(importlib.util.find_spec('torch').submodule_search_locations[0])")/lib
5555
popd
56-
56+
5757
if [[ -z "${torch_lib_dir}" || ! -d "${torch_lib_dir}" ]]; then
5858
return
5959
fi
@@ -141,9 +141,9 @@ install_pytorch_and_domains() {
141141

142142
dedupe_macos_loader_path_rpaths
143143
# Grab the pinned audio and vision commits from PyTorch
144-
TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
144+
TORCHAUDIO_VERSION=release/2.11
145145
export TORCHAUDIO_VERSION
146-
TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt)
146+
TORCHVISION_VERSION=release/0.26
147147
export TORCHVISION_VERSION
148148

149149
install_domains

.github/workflows/cuda-windows.yml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ jobs:
6464
secrets-env: EXECUTORCH_HF_TOKEN
6565
runner: linux.g5.4xlarge.nvidia.gpu
6666
gpu-arch-type: cuda
67-
gpu-arch-version: 12.6
67+
gpu-arch-version: 12.8
6868
docker-image: ci-image:executorch-ubuntu-22.04-cuda-windows
6969
submodules: recursive
7070
upload-artifact: ${{ matrix.model_repo }}-${{ matrix.model_name }}-cuda-windows-${{ matrix.quant }}
@@ -146,7 +146,7 @@ jobs:
146146
timeout: 240
147147
runner: windows.g5.4xlarge.nvidia.gpu
148148
gpu-arch-type: cuda
149-
gpu-arch-version: 12.6
149+
gpu-arch-version: 12.8
150150
download-artifact: ${{ matrix.model_repo }}-${{ matrix.model_name }}-cuda-windows-${{ matrix.quant }}
151151
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
152152
script: |
@@ -158,7 +158,7 @@ jobs:
158158
\$ErrorActionPreference = 'Stop'
159159
\$PSNativeCommandUseErrorActionPreference = \$true
160160
161-
\$env:CUDA_HOME = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.6'
161+
\$env:CUDA_HOME = 'C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.8'
162162
\$env:CUDA_PATH = \$env:CUDA_HOME
163163
\$env:PATH = \"\$env:CUDA_HOME\bin;\$env:PATH\"
164164
nvcc --version
@@ -169,5 +169,5 @@ jobs:
169169
throw 'RUNNER_ARTIFACT_DIR is empty. Ensure download-artifact is configured for windows_job.yml.'
170170
}
171171
172-
.ci/scripts/test_model_e2e_windows.ps1 -Device cuda-windows -HfModel '${{ matrix.model_repo }}/${{ matrix.model_name }}' -QuantName '${{ matrix.quant }}' -ModelDir \$artifactDir -ExpectedCudaVersion '12.6'
172+
.ci/scripts/test_model_e2e_windows.ps1 -Device cuda-windows -HfModel '${{ matrix.model_repo }}/${{ matrix.model_name }}' -QuantName '${{ matrix.quant }}' -ModelDir \$artifactDir -ExpectedCudaVersion '12.8'
173173
}"

.github/workflows/cuda.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Test ExecuTorch CUDA Build Compatibility
22
# This workflow tests whether ExecuTorch can be successfully built with CUDA support
3-
# across different CUDA versions (12.6, 13.0) using the command:
3+
# across different CUDA versions (12.6, 12.8, 12.9, 13.0) using the command:
44
# ./install_executorch.sh
55
#
66
# Note: ExecuTorch automatically detects the system CUDA version using nvcc and
@@ -31,7 +31,7 @@ jobs:
3131
strategy:
3232
fail-fast: false
3333
matrix:
34-
cuda-version: ["12.6", "13.0"]
34+
cuda-version: ["12.6", "12.8", "12.9", "13.0"]
3535

3636
name: test-executorch-cuda-build-${{ matrix.cuda-version }}
3737
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
@@ -66,7 +66,7 @@ jobs:
6666
echo "CUDA build results: ${{ needs.test-cuda-builds.result }}"
6767
exit 1
6868
else
69-
echo "SUCCESS: All ExecuTorch CUDA builds (12.6, 13.0) completed successfully!"
69+
echo "SUCCESS: All ExecuTorch CUDA builds (12.6, 12.8, 12.9, 13.0) completed successfully!"
7070
fi
7171
7272
test-models-cuda:

.github/workflows/docker-builds.yml

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,20 +33,17 @@ jobs:
3333
matrix:
3434
runner: [linux.4xlarge]
3535
docker-image-name: [
36+
executorch-ubuntu-22.04-gcc11,
3637
executorch-ubuntu-22.04-gcc9-nopytorch,
3738
executorch-ubuntu-22.04-clang12,
3839
executorch-ubuntu-22.04-linter,
3940
executorch-ubuntu-22.04-arm-sdk,
41+
executorch-ubuntu-22.04-zephyr-sdk,
4042
executorch-ubuntu-22.04-qnn-sdk,
4143
executorch-ubuntu-22.04-mediatek-sdk,
4244
executorch-ubuntu-22.04-clang12-android
4345
]
4446
include:
45-
# PyTorch is built from source in these images; 4xlarge OOMs mid-build.
46-
- docker-image-name: executorch-ubuntu-22.04-gcc11
47-
runner: linux.12xlarge
48-
- docker-image-name: executorch-ubuntu-22.04-zephyr-sdk
49-
runner: linux.12xlarge
5047
- docker-image-name: executorch-ubuntu-22.04-gcc11-aarch64
5148
runner: linux.arm64.2xlarge
5249
- docker-image-name: executorch-ubuntu-22.04-gcc11-aarch64-android

0 commit comments

Comments
 (0)