pytorch
diff --git a/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/pytorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_cuda_windows_cross_compile.sh‎
Lines changed: 14 additions & 11 deletions b/‎.ci/docker/common/install_cuda_windows_cross_compile.sh‎
Lines changed: 14 additions & 11 deletions
diff --git a/‎.ci/docker/common/install_pytorch.sh‎
Lines changed: 2 additions & 2 deletions b/‎.ci/docker/common/install_pytorch.sh‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.ci/docker/ubuntu/Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ubuntu/Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/build-qnn-sdk.sh‎
Lines changed: 1 addition & 1 deletion b/‎.ci/scripts/build-qnn-sdk.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 122 additions & 1 deletion b/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 122 additions & 1 deletion
diff --git a/‎.ci/scripts/setup-openvino.sh‎
Lines changed: 31 additions & 7 deletions b/‎.ci/scripts/setup-openvino.sh‎
Lines changed: 31 additions & 7 deletions
diff --git a/‎.ci/scripts/test_backend.sh‎
Lines changed: 7 additions & 1 deletion b/‎.ci/scripts/test_backend.sh‎
Lines changed: 7 additions & 1 deletion
@@ -1 +1 @@
-a9592258daacad7423fd5f39aaa59c6e36471520
+585799cf7039d376d2ac4848b5ef0b501f60679e
@@ -1 +1 @@
-659af3c353e49b35c191cdd2dba3b3c79d0e6822
+release/2.11
@@ -48,20 +48,23 @@ get_torch_cuda_version() {
 }
 
 install_windows_cuda() {
-    # Get CUDA version from torch
-    TORCH_CUDA_VERSION=$(get_torch_cuda_version)
+    # Use CUDA_VERSION env var if set (from Docker build arg), otherwise query PyTorch
+    if [ -n "${CUDA_VERSION:-}" ]; then
+        echo "Using CUDA version from environment: ${CUDA_VERSION}"
+        CUDA_MAJOR_MINOR=$(echo "${CUDA_VERSION}" | cut -d. -f1,2)
+    else
+        TORCH_CUDA_VERSION=$(get_torch_cuda_version)
+
+        if [ -z "${TORCH_CUDA_VERSION}" ] || [ "${TORCH_CUDA_VERSION}" = "None" ]; then
+            echo "ERROR: Could not detect CUDA version from PyTorch."
+            echo "Make sure PyTorch with CUDA support is installed or set CUDA_VERSION."
+            exit 1
+        fi
 
-    if [ -z "${TORCH_CUDA_VERSION}" ] || [ "${TORCH_CUDA_VERSION}" = "None" ]; then
-        echo "ERROR: Could not detect CUDA version from PyTorch."
-        echo "Make sure PyTorch with CUDA support is installed before running this script."
-        exit 1
+        echo "Detected PyTorch CUDA version: ${TORCH_CUDA_VERSION}"
+        CUDA_MAJOR_MINOR=$(echo "${TORCH_CUDA_VERSION}" | cut -d. -f1,2)
     fi
 
-    echo "Detected PyTorch CUDA version: ${TORCH_CUDA_VERSION}"
-
-    # Extract major.minor version (e.g., "12.8" from "12.8.1" or "12.8")
-    CUDA_MAJOR_MINOR=$(echo "${TORCH_CUDA_VERSION}" | cut -d. -f1,2)
-
     # Look up the full version and driver version
     if [ -z "${CUDA_DRIVER_MAP[${CUDA_MAJOR_MINOR}]}" ]; then
         echo "ERROR: CUDA version ${CUDA_MAJOR_MINOR} is not in the known version map."
 
@@ -32,9 +32,9 @@ install_pytorch_and_domains() {
   pip_install "$(echo dist/*.whl)"
 
   # Grab the pinned audio and vision commits from PyTorch
-  TORCHAUDIO_VERSION=$(cat .github/ci_commit_pins/audio.txt)
+  TORCHAUDIO_VERSION=release/2.11
   export TORCHAUDIO_VERSION
-  TORCHVISION_VERSION=$(cat .github/ci_commit_pins/vision.txt)
+  TORCHVISION_VERSION=release/0.26
   export TORCHVISION_VERSION
 
   install_domains
 
@@ -105,7 +105,7 @@ COPY ./common/install_cuda_windows_cross_compile.sh install_cuda_windows_cross_c
 COPY ./common/utils.sh utils.sh
 RUN if [ -n "${CUDA_WINDOWS_CROSS_COMPILE}" ]; then \
     CUDA_VERSION=${CUDA_VERSION} bash ./install_cuda.sh && \
-    bash ./install_cuda_windows_cross_compile.sh; \
+    CUDA_VERSION=${CUDA_VERSION} bash ./install_cuda_windows_cross_compile.sh; \
     fi
 RUN rm -f install_cuda.sh install_cuda_windows_cross_compile.sh utils.sh
 # Set up CUDA environment for Linux compilation (nvcc, etc.)
 
@@ -18,7 +18,7 @@ build_qnn_backend() {
   export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"
 
   parallelism=$(( $(nproc) - 1 ))
-  bash backends/qualcomm/scripts/build.sh --skip_linux_android --skip_linux_embedded --job_number ${parallelism} --release
+  bash backends/qualcomm/scripts/build.sh --skip_linux_android --job_number ${parallelism} --release
 }
 
 set_up_aot() {
 
@@ -22,7 +22,9 @@ Arguments:
                  - mistralai/Voxtral-Mini-4B-Realtime-2602
                  - openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo})
                  - google/gemma-3-4b-it
+                 - nvidia/diar_streaming_sortformer_4spk-v2
                  - nvidia/parakeet-tdt
+                 - facebook/dinov2-small-imagenet1k-1-layer
 
   quant_name   Quantization type (optional, default: non-quantized)
                Options:
@@ -45,6 +47,7 @@ Examples:
   export_model_artifact.sh metal "mistralai/Voxtral-Mini-4B-Realtime-2602" "quantized-int4-metal"
   export_model_artifact.sh metal "mistralai/Voxtral-Mini-4B-Realtime-2602" "non-quantized" "." "vr-streaming"
   export_model_artifact.sh cuda "mistralai/Voxtral-Mini-3B-2507" "quantized-int4-tile-packed"
+  export_model_artifact.sh cuda-windows "nvidia/diar_streaming_sortformer_4spk-v2" "non-quantized" "./output"
   export_model_artifact.sh cuda "google/gemma-3-4b-it" "non-quantized" "./output"
   export_model_artifact.sh cuda "nvidia/parakeet-tdt" "non-quantized" "./output"
   export_model_artifact.sh xnnpack "nvidia/parakeet-tdt" "quantized-8da4w" "./output"
@@ -157,6 +160,22 @@ case "$HF_MODEL" in
     PREPROCESSOR_FEATURE_SIZE=""
     PREPROCESSOR_OUTPUT=""
     ;;
+  nvidia/diar_streaming_sortformer_4spk-v2)
+    MODEL_NAME="sortformer"
+    TASK=""
+    MAX_SEQ_LEN=""
+    EXTRA_PIP=""
+    PREPROCESSOR_FEATURE_SIZE=""
+    PREPROCESSOR_OUTPUT=""
+    ;;
+  facebook/dinov2-small-imagenet1k-1-layer)
+    MODEL_NAME="dinov2"
+    TASK=""
+    MAX_SEQ_LEN=""
+    EXTRA_PIP=""
+    PREPROCESSOR_FEATURE_SIZE=""
+    PREPROCESSOR_OUTPUT=""
+    ;;
   mistralai/Voxtral-Mini-4B-Realtime-2602)
     MODEL_NAME="voxtral_realtime"
     TASK=""
@@ -165,9 +184,17 @@ case "$HF_MODEL" in
     PREPROCESSOR_FEATURE_SIZE=""
     PREPROCESSOR_OUTPUT=""
     ;;
+  SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4)
+    MODEL_NAME="qwen3_5_moe"
+    TASK=""
+    MAX_SEQ_LEN=""
+    EXTRA_PIP=""
+    PREPROCESSOR_FEATURE_SIZE=""
+    PREPROCESSOR_OUTPUT=""
+    ;;
   *)
     echo "Error: Unsupported model '$HF_MODEL'"
-    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt"
+    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4"
     exit 1
     ;;
 esac
@@ -247,6 +274,59 @@ if [ "$MODEL_NAME" = "parakeet" ]; then
   exit 0
 fi
 
+# Sortformer uses a custom export script
+if [ "$MODEL_NAME" = "sortformer" ]; then
+  if [ "$QUANT_NAME" != "non-quantized" ]; then
+    echo "Error: Sortformer currently supports only non-quantized export"
+    exit 1
+  fi
+
+  pip install -r examples/models/sortformer/install_requirements.txt
+
+  SORTFORMER_BACKEND="$DEVICE"
+  if [ "$DEVICE" = "cuda-windows" ]; then
+    SORTFORMER_BACKEND="cuda-windows"
+  elif [ "$DEVICE" = "cuda" ]; then
+    SORTFORMER_BACKEND="cuda"
+  elif [ "$DEVICE" = "xnnpack" ]; then
+    SORTFORMER_BACKEND="xnnpack"
+  else
+    SORTFORMER_BACKEND="portable"
+  fi
+
+  python -m executorch.examples.models.sortformer.export_sortformer \
+      --hf-model "${HF_MODEL}" \
+      --backend "${SORTFORMER_BACKEND}" \
+      --output-dir "${OUTPUT_DIR}"
+
+  test -f "${OUTPUT_DIR}/sortformer.pte"
+  mv "${OUTPUT_DIR}/sortformer.pte" "${OUTPUT_DIR}/model.pte"
+  # CUDA saves named data to separate .ptd file, XNNPACK/portable do not.
+  if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
+    test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
+  fi
+  ls -al "${OUTPUT_DIR}"
+  echo "::endgroup::"
+  exit 0
+fi
+
+# DINOv2 uses a custom export script
+if [ "$MODEL_NAME" = "dinov2" ]; then
+  pip install -r examples/models/dinov2/install_requirements.txt
+
+  python -m executorch.examples.models.dinov2.export_dinov2 \
+      --backend "$DEVICE" \
+      --output-dir "${OUTPUT_DIR}"
+
+  test -f "${OUTPUT_DIR}/model.pte"
+  if [ "$DEVICE" = "cuda" ] || [ "$DEVICE" = "cuda-windows" ]; then
+    test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
+  fi
+  ls -al "${OUTPUT_DIR}"
+  echo "::endgroup::"
+  exit 0
+fi
+
 # Voxtral Realtime uses a custom export script
 if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
   pip install safetensors huggingface_hub
@@ -262,6 +342,7 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
     VR_QUANT_ARGS="--qlinear-encoder 8da4w --qlinear 8da4w --qlinear-group-size 32 --qembedding 8w"
   elif [ "$QUANT_NAME" = "quantized-int4-metal" ]; then
     VR_QUANT_ARGS="--qlinear-encoder fpa4w --qlinear fpa4w"
+    VR_DTYPE_ARGS="--dtype bf16"
   elif [ "$QUANT_NAME" = "quantized-int4-tile-packed" ]; then
     VR_QUANT_ARGS="--qlinear-encoder 4w --qlinear-encoder-packing-format tile_packed_to_4d --qlinear 4w --qlinear-packing-format tile_packed_to_4d --qembedding 8w"
     VR_DTYPE_ARGS="--dtype bf16"
@@ -301,11 +382,51 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
   fi
   # Copy tokenizer from downloaded model weights
   cp "$LOCAL_MODEL_DIR/tekken.json" "${OUTPUT_DIR}/tekken.json"
+  rm -rf "$LOCAL_MODEL_DIR"
   ls -al "${OUTPUT_DIR}"
   echo "::endgroup::"
   exit 0
 fi
 
+# Qwen 3.5 MoE uses a prequantized checkpoint and custom export script
+if [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
+  pip install safetensors huggingface_hub
+  pip install -r examples/models/qwen3_5_moe/requirements.txt
+
+  # Download prequantized model outside OUTPUT_DIR to avoid uploading on failure
+  LOCAL_MODEL_DIR=$(mktemp -d)
+  INDUCTOR_CACHE=$(mktemp -d)
+  trap 'rm -rf "$LOCAL_MODEL_DIR" "$INDUCTOR_CACHE"' EXIT
+
+  python -c "from huggingface_hub import snapshot_download; snapshot_download('${HF_MODEL}', local_dir='${LOCAL_MODEL_DIR}')"
+
+  # Sanity check: run inference on the prequantized model
+  echo "::group::Inference sanity check"
+  python -m executorch.examples.models.qwen3_5_moe.inference \
+      --prequantized "$LOCAL_MODEL_DIR" \
+      --prompt "What is the capital of France?" \
+      --max-new-tokens 32 \
+      --temperature 0 \
+      --no-compile
+  echo "::endgroup::"
+
+  # Copy tokenizer for the runner
+  cp "$LOCAL_MODEL_DIR/tokenizer.json" "${OUTPUT_DIR}/tokenizer.json"
+
+  # Export to .pte/.ptd (short cache dir avoids objcopy symbol length issues)
+  echo "::group::Export"
+  TORCHINDUCTOR_CACHE_DIR="$INDUCTOR_CACHE" \
+  python -m executorch.examples.models.qwen3_5_moe.export \
+      --prequantized "$LOCAL_MODEL_DIR" \
+      --output-dir "${OUTPUT_DIR}"
+  echo "::endgroup::"
+
+  test -f "${OUTPUT_DIR}/model.pte"
+  test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
+  ls -al "${OUTPUT_DIR}"
+  exit 0
+fi
+
 MAX_SEQ_LEN_ARG=""
 if [ -n "$MAX_SEQ_LEN" ]; then
   MAX_SEQ_LEN_ARG="--max_seq_len $MAX_SEQ_LEN"
 
@@ -10,17 +10,41 @@ set -ex
 # shellcheck source=/dev/null
 source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 
+# Parse arguments
+USE_NIGHTLY=false
+for arg in "$@"; do
+  case $arg in
+    --nightly) USE_NIGHTLY=true ;;
+  esac
+done
+
 # Download and install OpenVINO from release packages
-OPENVINO_VERSION="2025.3"
-OPENVINO_BUILD="2025.3.0.19807.44526285f24"
-OPENVINO_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION}/linux/openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64.tgz"
+OPENVINO_VERSION="2026.0"
+OPENVINO_BUILD="2026.0.0.20965.c6d6a13a886"
+OPENVINO_STABLE_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION}/linux/openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64.tgz"
+
+OPENVINO_NIGHTLY_BUILD_ID="2026.1.0-21310-c694fbc2b6d"
+OPENVINO_NIGHTLY_BUILD="2026.1.0.dev20260312"
+OPENVINO_NIGHTLY_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/${OPENVINO_NIGHTLY_BUILD_ID}/openvino_toolkit_ubuntu22_${OPENVINO_NIGHTLY_BUILD}_x86_64.tgz"
+
+if [ "${USE_NIGHTLY}" = true ]; then
+  OPENVINO_URL="${OPENVINO_NIGHTLY_URL}"
+  OPENVINO_EXTRACTED_DIR="openvino_toolkit_ubuntu22_${OPENVINO_NIGHTLY_BUILD}_x86_64"
+  echo "Using OpenVINO nightly build: ${OPENVINO_NIGHTLY_BUILD_ID}"
+else
+  OPENVINO_URL="${OPENVINO_STABLE_URL}"
+  OPENVINO_EXTRACTED_DIR="openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64"
+  echo "Using OpenVINO stable release: ${OPENVINO_BUILD}"
+fi
 
 curl -Lo /tmp/openvino_toolkit.tgz --retry 3 --fail ${OPENVINO_URL}
 tar -xzf /tmp/openvino_toolkit.tgz
-mv openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64 openvino
+mv "${OPENVINO_EXTRACTED_DIR}" openvino
 
+set +u
 source openvino/setupvars.sh
-cd backends/openvino
-pip install -r requirements.txt
-cd scripts
+set -u
+pip install -r backends/openvino/requirements.txt
+pushd backends/openvino/scripts
 ./openvino_build.sh --enable_python
+popd
@@ -46,7 +46,7 @@ if [[ "$FLOW" == *qnn* ]]; then
     export LD_LIBRARY_PATH"=$QNN_X86_LIB_DIR:$QNN_SDK_ROOT/lib/x86_64-linux-clang/:${LD_LIBRARY_PATH:-}"
 
     # TODO Get SDK root from install scripts
-    EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT"
+    EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON"
 fi
 
 if [[ "$FLOW" == *vulkan* ]]; then
@@ -78,6 +78,12 @@ if [[ "$FLOW" == *arm* ]]; then
     fi
 fi
 
+if [[ "$FLOW" == *openvino* ]]; then
+    # Setup OpenVINO environment
+    source .ci/scripts/setup-openvino.sh --nightly
+    EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_OPENVINO=ON"
+fi
+
 if [[ $IS_MACOS -eq 1 ]]; then
     SETUP_SCRIPT=.ci/scripts/setup-macos.sh
 else
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-a9592258daacad7423fd5f39aaa59c6e36471520`
	`1`	`+585799cf7039d376d2ac4848b5ef0b501f60679e`
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-659af3c353e49b35c191cdd2dba3b3c79d0e6822`
	`1`	`+release/2.11`
Original file line number	Diff line number	Diff line change
`@@ -18,7 +18,7 @@ build_qnn_backend() {`
`18`	`18`	`export EXECUTORCH_ROOT="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")/../.." && pwd)"`
`19`	`19`
`20`	`20`	`parallelism=$(( $(nproc) - 1 ))`
`21`		`- bash backends/qualcomm/scripts/build.sh --skip_linux_android --skip_linux_embedded --job_number ${parallelism} --release`
	`21`	`+ bash backends/qualcomm/scripts/build.sh --skip_linux_android --job_number ${parallelism} --release`
`22`	`22`	`}`
`23`	`23`
`24`	`24`	`set_up_aot() {`