pytorch
diff --git a/‎.ci/docker/build.sh‎
Lines changed: 0 additions & 17 deletions b/‎.ci/docker/build.sh‎
Lines changed: 0 additions & 17 deletions
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_pytorch.sh‎
Lines changed: 3 additions & 26 deletions b/‎.ci/docker/common/install_pytorch.sh‎
Lines changed: 3 additions & 26 deletions
diff --git a/‎.ci/docker/ubuntu/Dockerfile‎
Lines changed: 0 additions & 5 deletions b/‎.ci/docker/ubuntu/Dockerfile‎
Lines changed: 0 additions & 5 deletions
diff --git a/‎.ci/scripts/download_hf_hub.sh‎
Lines changed: 3 additions & 0 deletions b/‎.ci/scripts/download_hf_hub.sh‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 30 additions & 1 deletion b/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 30 additions & 1 deletion
diff --git a/‎.ci/scripts/setup-macos.sh‎
Lines changed: 9 additions & 3 deletions b/‎.ci/scripts/setup-macos.sh‎
Lines changed: 9 additions & 3 deletions
diff --git a/‎.ci/scripts/test_backend.sh‎
Lines changed: 10 additions & 1 deletion b/‎.ci/scripts/test_backend.sh‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎.ci/scripts/test_coreml_bc.sh‎
Lines changed: 3 additions & 3 deletions b/‎.ci/scripts/test_coreml_bc.sh‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.ci/scripts/test_cortex_m_e2e.sh‎
Lines changed: 2 additions & 2 deletions b/‎.ci/scripts/test_cortex_m_e2e.sh‎
Lines changed: 2 additions & 2 deletions
@@ -92,18 +92,6 @@ esac
 TORCH_VERSION=$(cat ci_commit_pins/pytorch.txt)
 BUILD_DOCS=1
 
-# Pull channel + spec/url helpers out of torch_pin.py so install_pytorch.sh
-# (which runs inside the docker build, where torch_pin.py isn't available)
-# can decide between wheel install (test/release) and source build (nightly).
-# Self-hosted runners often have python3 but not the unversioned python alias.
-PYTHON_BIN=$(command -v python3 || command -v python)
-TORCH_PIN_HELPERS=$(cd ../.. && "$PYTHON_BIN" -c "from torch_pin import CHANNEL, torch_spec, torchaudio_spec, torchvision_spec, torch_index_url_base; print(CHANNEL); print(torch_spec()); print(torchaudio_spec()); print(torchvision_spec()); print(torch_index_url_base())")
-TORCH_CHANNEL=$(echo "${TORCH_PIN_HELPERS}" | sed -n '1p')
-TORCH_SPEC=$(echo "${TORCH_PIN_HELPERS}" | sed -n '2p')
-TORCHAUDIO_SPEC=$(echo "${TORCH_PIN_HELPERS}" | sed -n '3p')
-TORCHVISION_SPEC=$(echo "${TORCH_PIN_HELPERS}" | sed -n '4p')
-TORCH_INDEX_URL=$(echo "${TORCH_PIN_HELPERS}" | sed -n '5p')
-
 # Copy requirements-lintrunner.txt from root to here
 cp ../../requirements-lintrunner.txt ./
 
@@ -116,11 +104,6 @@ docker build \
   --build-arg "PYTHON_VERSION=${PYTHON_VERSION}" \
   --build-arg "MINICONDA_VERSION=${MINICONDA_VERSION}" \
   --build-arg "TORCH_VERSION=${TORCH_VERSION}" \
-  --build-arg "TORCH_CHANNEL=${TORCH_CHANNEL}" \
-  --build-arg "TORCH_SPEC=${TORCH_SPEC}" \
-  --build-arg "TORCHAUDIO_SPEC=${TORCHAUDIO_SPEC}" \
-  --build-arg "TORCHVISION_SPEC=${TORCHVISION_SPEC}" \
-  --build-arg "TORCH_INDEX_URL=${TORCH_INDEX_URL}" \
   --build-arg "BUCK2_VERSION=${BUCK2_VERSION}" \
   --build-arg "LINTRUNNER=${LINTRUNNER:-}" \
   --build-arg "BUILD_DOCS=${BUILD_DOCS}" \
 
@@ -1 +1 @@
-release/2.11
+release/2.11
@@ -17,24 +17,6 @@ install_domains() {
 }
 
 install_pytorch_and_domains() {
-  if [ "${TORCH_CHANNEL}" != "nightly" ]; then
-    # Test/release: install the published wheels directly. The specs and URL
-    # are passed in as docker build args (computed from torch_pin.py by
-    # .ci/docker/build.sh). RC wheels at /whl/test/ get re-uploaded under the
-    # same version, so use --no-cache-dir there to avoid stale cache hits.
-    local cache_flag=""
-    if [ "${TORCH_CHANNEL}" = "test" ]; then
-      cache_flag="--no-cache-dir"
-    fi
-    pip_install --force-reinstall ${cache_flag} \
-      "${TORCH_SPEC}" "${TORCHVISION_SPEC}" "${TORCHAUDIO_SPEC}" \
-      --index-url "${TORCH_INDEX_URL}/cpu"
-    return
-  fi
-
-  # Nightly: build pytorch from source against the pinned SHA in pytorch.txt
-  # so we catch upstream regressions, then install audio/vision from the
-  # commits that pytorch itself pins.
   git clone https://github.com/pytorch/pytorch.git
 
   # Fetch the target commit
@@ -45,19 +27,14 @@ install_pytorch_and_domains() {
   chown -R ci-user .
 
   export _GLIBCXX_USE_CXX11_ABI=1
-  # PyTorch's FindARM.cmake hard-fails when the SVE+BF16 compile probe
-  # doesn't pass — gcc-11 in this image is too old to accept the combined
-  # NEON/SVE/bfloat16 intrinsics the probe exercises. Executorch's aarch64
-  # runtime targets (phones, embedded) don't use SVE, so bypass the check.
-  export BUILD_IGNORE_SVE_UNAVAILABLE=1
   # Then build and install PyTorch
   conda_run python setup.py bdist_wheel
   pip_install "$(echo dist/*.whl)"
 
-  # Defer to PyTorch's own pinned audio/vision commits.
-  TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
+  # Grab the pinned audio and vision commits from PyTorch
+  TORCHAUDIO_VERSION=release/2.11
   export TORCHAUDIO_VERSION
-  TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt)
+  TORCHVISION_VERSION=release/0.26
   export TORCHVISION_VERSION
 
   install_domains
 
@@ -64,11 +64,6 @@ ENV SCCACHE_S3_KEY_PREFIX executorch
 ENV SCCACHE_REGION us-east-1
 
 ARG TORCH_VERSION
-ARG TORCH_CHANNEL
-ARG TORCH_SPEC
-ARG TORCHAUDIO_SPEC
-ARG TORCHVISION_SPEC
-ARG TORCH_INDEX_URL
 ARG SKIP_PYTORCH
 COPY ./common/install_pytorch.sh install_pytorch.sh
 COPY ./common/utils.sh utils.sh
 
@@ -1,5 +1,8 @@
 #!/bin/bash
 
+# Disable HF Xet storage to avoid stalled downloads on CI runners
+export HF_HUB_DISABLE_XET=1
+
 # Function to download files from the Hugging Face Hub
 # Arguments:
 # 1. model_id: The Hugging Face repository ID (e.g., "organization/model_name")
 
@@ -67,6 +67,9 @@ if [ -z "${1:-}" ]; then
   exit 1
 fi
 
+# Disable HF Xet storage to avoid stalled downloads on CI runners
+export HF_HUB_DISABLE_XET=1
+
 set -eux
 
 DEVICE="$1"
@@ -415,14 +418,40 @@ if [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
 
   # Export to .pte/.ptd (short cache dir avoids objcopy symbol length issues)
   echo "::group::Export"
+  EXPORT_LOG=$(mktemp)
   TORCHINDUCTOR_CACHE_DIR="$INDUCTOR_CACHE" \
   python -m executorch.examples.models.qwen3_5_moe.export \
       --prequantized "$LOCAL_MODEL_DIR" \
       --output-dir "${OUTPUT_DIR}" \
       --dense-prefill dequant \
-      --moe-activation-dtype int8
+      --moe-activation-dtype int8 2>&1 | tee "$EXPORT_LOG"
+  EXPORT_RC=${PIPESTATUS[0]}
   echo "::endgroup::"
 
+  if [ "$EXPORT_RC" -ne 0 ]; then
+    echo "ERROR: Qwen3.5 MoE export failed (exit $EXPORT_RC)"
+    rm -f "$EXPORT_LOG"
+    exit "$EXPORT_RC"
+  fi
+
+  # Gate peak GPU memory so we keep the export viable on consumer GPUs
+  # (e.g. RTX 4090 with 24 GB). The export script prints a machine-
+  # parseable marker line "EXPORT_GPU_PEAK_MEMORY_MB: <float>".
+  EXPORT_GPU_PEAK_MB_LIMIT="${EXPORT_GPU_PEAK_MB_LIMIT:-20480}"
+  PEAK_LINE=$(grep -E '^EXPORT_GPU_PEAK_MEMORY_MB:' "$EXPORT_LOG" | tail -1)
+  rm -f "$EXPORT_LOG"
+  if [ -z "$PEAK_LINE" ]; then
+    echo "ERROR: export did not emit EXPORT_GPU_PEAK_MEMORY_MB marker; cannot enforce GPU memory budget"
+    exit 1
+  fi
+  PEAK_MB=$(echo "$PEAK_LINE" | awk '{print $2}')
+  echo "Export GPU peak memory: ${PEAK_MB} MB (limit ${EXPORT_GPU_PEAK_MB_LIMIT} MB)"
+  if awk -v p="$PEAK_MB" -v l="$EXPORT_GPU_PEAK_MB_LIMIT" 'BEGIN{exit !(p>l)}'; then
+    echo "ERROR: export exceeded GPU memory budget (${PEAK_MB} MB > ${EXPORT_GPU_PEAK_MB_LIMIT} MB)"
+    echo "       — this would prevent the model from being exported on a 24 GB consumer GPU."
+    exit 1
+  fi
+
   test -f "${OUTPUT_DIR}/model.pte"
   test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
   ls -al "${OUTPUT_DIR}"
 
@@ -116,7 +116,6 @@ setup_macos_env_variables
 # buck2 atm
 install_buck
 brew install libomp
-install_pip_dependencies
 
 # TODO(huydhn): Unlike our self-hosted runner, GitHub runner doesn't have access
 # to our infra, so compiler caching needs to be setup differently using GitHub
@@ -125,10 +124,17 @@ if [[ -z "${GITHUB_RUNNER:-}" ]]; then
   install_sccache
 fi
 
+# Install pinned torch before requirements-ci.txt so torchsr's transitive
+# torch dep is satisfied by the existing install and pip does not pull a
+# separate copy from PyPI. sccache is initialized above so source-build
+# cache misses still hit the cache.
 print_cmake_info
 install_pytorch_and_domains
-# We build PyTorch from source here instead of using nightly. This allows CI to test against
-# the pinned commit from PyTorch
+
+install_pip_dependencies
+
+# install_executorch's --use-pt-pinned-commit skips re-installing torch since
+# install_pytorch_and_domains already installed the pinned build above.
 if [[ "$EDITABLE" == "true" ]]; then
   install_executorch --use-pt-pinned-commit --editable
 else
 
@@ -35,6 +35,7 @@ export PYTHON_EXECUTABLE=python
 
 # CMake options to use, in addition to the defaults.
 EXTRA_BUILD_ARGS=""
+PYTEST_RETRY_ARGS=()
 
 if [[ "$FLOW" == *qnn* ]]; then
     # Setup QNN sdk and deps - note that this is a bit hacky due to the nature of the
@@ -57,6 +58,9 @@ if [[ "$FLOW" == *vulkan* ]]; then
 fi
 
 if [[ "$FLOW" == *arm* ]]; then
+    if [[ "$SUITE" == "operators" ]]; then
+        PYTEST_RETRY_ARGS=(--reruns 2 --reruns-delay 1)
+    fi
 
     # Setup ARM deps.
     if [[ "$FLOW" == *vgf* ]]; then
@@ -95,6 +99,11 @@ GOLDEN_DIR="${ARTIFACT_DIR}/golden-artifacts"
 export GOLDEN_ARTIFACTS_DIR="${GOLDEN_DIR}"
 
 EXIT_CODE=0
-${CONDA_RUN_CMD} pytest -c /dev/null -n auto backends/test/suite/$SUITE/ -m flow_$FLOW --json-report --json-report-file="$REPORT_FILE" || EXIT_CODE=$?
+PYTEST_ARGS=(-c /dev/null -n auto)
+if [[ ${#PYTEST_RETRY_ARGS[@]} -gt 0 ]]; then
+    PYTEST_ARGS+=("${PYTEST_RETRY_ARGS[@]}")
+fi
+PYTEST_ARGS+=("backends/test/suite/$SUITE/" -m "flow_$FLOW" --json-report --json-report-file="$REPORT_FILE")
+${CONDA_RUN_CMD} pytest "${PYTEST_ARGS[@]}" || EXIT_CODE=$?
 # Generate markdown summary.
 ${CONDA_RUN_CMD} python -m executorch.backends.test.suite.generate_markdown_summary_json "$REPORT_FILE" > ${GITHUB_STEP_SUMMARY:-"step_summary.md"} --exit-code $EXIT_CODE
@@ -23,7 +23,7 @@ source "${REPO_ROOT}/.ci/scripts/utils.sh"
 # Create a conda environment with Python 3.10 for compatibility with old ET versions
 # ET 1.0.0 only supports Python >=3.10,<3.13
 CONDA_ENV_NAME="coreml_bc_test_env"
-conda create -y -n "${CONDA_ENV_NAME}" python=3.10
+conda create -y -n "${CONDA_ENV_NAME}" python=3.10 pip packaging
 
 # Use conda run to execute commands in the new environment
 CONDA_RUN="conda run --no-capture-output -n ${CONDA_ENV_NAME}"
@@ -69,7 +69,7 @@ git submodule sync --recursive
 git submodule update --init --recursive
 
 # Install executorch
-${CONDA_RUN} pip install --upgrade pip
+${CONDA_RUN} python -m pip install --upgrade pip
 ${CONDA_RUN} python install_executorch.py
 
 # Step 3: Export model
@@ -129,7 +129,7 @@ git submodule update --init --recursive
 
 # Step 5: Install current version
 echo "=== Step 5: Installing current ET version ==="
-${CONDA_RUN} pip install --upgrade pip
+${CONDA_RUN} python -m pip install --upgrade pip
 ${CONDA_RUN} python install_executorch.py
 
 # Step 6: Run the old pte file
 
@@ -17,9 +17,9 @@ MODEL=$1
 script_dir=$(realpath "$(dirname "${BASH_SOURCE[0]}")")
 et_root_dir=$(realpath "${script_dir}/../..")
 
-# Quantization is the default for the cortex-m55+int8 target; run.sh's
+# Quantization is the default for the cortex-m55 target; run.sh's
 # arg parser only recognizes --no_quantize, so we omit any explicit flag.
 bash "${et_root_dir}/examples/arm/run.sh" \
     --model_name="${MODEL}" \
-    --target=cortex-m55+int8 \
+    --target=cortex-m55 \
     --bundleio