pytorch
diff --git a/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ci_commit_pins/optimum-executorch.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/docker/common/install_cuda_windows_cross_compile.sh‎
Lines changed: 14 additions & 11 deletions b/‎.ci/docker/common/install_cuda_windows_cross_compile.sh‎
Lines changed: 14 additions & 11 deletions
diff --git a/‎.ci/docker/ubuntu/Dockerfile‎
Lines changed: 1 addition & 1 deletion b/‎.ci/docker/ubuntu/Dockerfile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 50 additions & 2 deletions b/‎.ci/scripts/export_model_artifact.sh‎
Lines changed: 50 additions & 2 deletions
diff --git a/‎.ci/scripts/setup-openvino.sh‎
Lines changed: 31 additions & 7 deletions b/‎.ci/scripts/setup-openvino.sh‎
Lines changed: 31 additions & 7 deletions
diff --git a/‎.ci/scripts/test_backend.sh‎
Lines changed: 6 additions & 0 deletions b/‎.ci/scripts/test_backend.sh‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎.ci/scripts/test_cortex_m_e2e.sh‎
Lines changed: 80 additions & 0 deletions b/‎.ci/scripts/test_cortex_m_e2e.sh‎
Lines changed: 80 additions & 0 deletions
diff --git a/‎.ci/scripts/test_model_e2e.sh‎
Lines changed: 18 additions & 3 deletions b/‎.ci/scripts/test_model_e2e.sh‎
Lines changed: 18 additions & 3 deletions
diff --git a/‎.ci/scripts/test_model_e2e_windows.ps1‎
Lines changed: 13 additions & 1 deletion b/‎.ci/scripts/test_model_e2e_windows.ps1‎
Lines changed: 13 additions & 1 deletion
diff --git a/‎.ci/scripts/wheel/test_linux.py‎
Lines changed: 13 additions & 6 deletions b/‎.ci/scripts/wheel/test_linux.py‎
Lines changed: 13 additions & 6 deletions
@@ -1 +1 @@
-a9592258daacad7423fd5f39aaa59c6e36471520
+585799cf7039d376d2ac4848b5ef0b501f60679e
@@ -48,20 +48,23 @@ get_torch_cuda_version() {
 }
 
 install_windows_cuda() {
-    # Get CUDA version from torch
-    TORCH_CUDA_VERSION=$(get_torch_cuda_version)
+    # Use CUDA_VERSION env var if set (from Docker build arg), otherwise query PyTorch
+    if [ -n "${CUDA_VERSION:-}" ]; then
+        echo "Using CUDA version from environment: ${CUDA_VERSION}"
+        CUDA_MAJOR_MINOR=$(echo "${CUDA_VERSION}" | cut -d. -f1,2)
+    else
+        TORCH_CUDA_VERSION=$(get_torch_cuda_version)
+
+        if [ -z "${TORCH_CUDA_VERSION}" ] || [ "${TORCH_CUDA_VERSION}" = "None" ]; then
+            echo "ERROR: Could not detect CUDA version from PyTorch."
+            echo "Make sure PyTorch with CUDA support is installed or set CUDA_VERSION."
+            exit 1
+        fi
 
-    if [ -z "${TORCH_CUDA_VERSION}" ] || [ "${TORCH_CUDA_VERSION}" = "None" ]; then
-        echo "ERROR: Could not detect CUDA version from PyTorch."
-        echo "Make sure PyTorch with CUDA support is installed before running this script."
-        exit 1
+        echo "Detected PyTorch CUDA version: ${TORCH_CUDA_VERSION}"
+        CUDA_MAJOR_MINOR=$(echo "${TORCH_CUDA_VERSION}" | cut -d. -f1,2)
     fi
 
-    echo "Detected PyTorch CUDA version: ${TORCH_CUDA_VERSION}"
-
-    # Extract major.minor version (e.g., "12.8" from "12.8.1" or "12.8")
-    CUDA_MAJOR_MINOR=$(echo "${TORCH_CUDA_VERSION}" | cut -d. -f1,2)
-
     # Look up the full version and driver version
     if [ -z "${CUDA_DRIVER_MAP[${CUDA_MAJOR_MINOR}]}" ]; then
         echo "ERROR: CUDA version ${CUDA_MAJOR_MINOR} is not in the known version map."
 
@@ -105,7 +105,7 @@ COPY ./common/install_cuda_windows_cross_compile.sh install_cuda_windows_cross_c
 COPY ./common/utils.sh utils.sh
 RUN if [ -n "${CUDA_WINDOWS_CROSS_COMPILE}" ]; then \
     CUDA_VERSION=${CUDA_VERSION} bash ./install_cuda.sh && \
-    bash ./install_cuda_windows_cross_compile.sh; \
+    CUDA_VERSION=${CUDA_VERSION} bash ./install_cuda_windows_cross_compile.sh; \
     fi
 RUN rm -f install_cuda.sh install_cuda_windows_cross_compile.sh utils.sh
 # Set up CUDA environment for Linux compilation (nvcc, etc.)
 
@@ -184,9 +184,17 @@ case "$HF_MODEL" in
     PREPROCESSOR_FEATURE_SIZE=""
     PREPROCESSOR_OUTPUT=""
     ;;
+  SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4)
+    MODEL_NAME="qwen3_5_moe"
+    TASK=""
+    MAX_SEQ_LEN=""
+    EXTRA_PIP=""
+    PREPROCESSOR_FEATURE_SIZE=""
+    PREPROCESSOR_OUTPUT=""
+    ;;
   *)
     echo "Error: Unsupported model '$HF_MODEL'"
-    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
+    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4"
     exit 1
     ;;
 esac
@@ -350,7 +358,7 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
   STREAMING_ARG=""
   PREPROCESSOR_ARGS="--feature_size 128 --output_file ${OUTPUT_DIR}/preprocessor.pte"
   if [ "$USE_STREAMING" = "true" ]; then
-    STREAMING_ARG="--streaming"
+    STREAMING_ARG="--streaming --sliding-window 2048"
     PREPROCESSOR_ARGS="$PREPROCESSOR_ARGS --streaming"
   else
     PREPROCESSOR_ARGS="$PREPROCESSOR_ARGS --stack_output --max_audio_len 300"
@@ -380,6 +388,46 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
   exit 0
 fi
 
+# Qwen 3.5 MoE uses a prequantized checkpoint and custom export script
+if [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
+  pip install safetensors huggingface_hub
+  pip install -r examples/models/qwen3_5_moe/requirements.txt
+
+  # Download prequantized model outside OUTPUT_DIR to avoid uploading on failure
+  LOCAL_MODEL_DIR=$(mktemp -d)
+  INDUCTOR_CACHE=$(mktemp -d)
+  trap 'rm -rf "$LOCAL_MODEL_DIR" "$INDUCTOR_CACHE"' EXIT
+
+  python -c "from huggingface_hub import snapshot_download; snapshot_download('${HF_MODEL}', local_dir='${LOCAL_MODEL_DIR}')"
+
+  # Sanity check: run inference on the prequantized model
+  echo "::group::Inference sanity check"
+  python -m executorch.examples.models.qwen3_5_moe.inference \
+      --prequantized "$LOCAL_MODEL_DIR" \
+      --prompt "What is the capital of France?" \
+      --max-new-tokens 32 \
+      --temperature 0 \
+      --no-compile
+  echo "::endgroup::"
+
+  # Copy tokenizer for the runner
+  cp "$LOCAL_MODEL_DIR/tokenizer.json" "${OUTPUT_DIR}/tokenizer.json"
+
+  # Export to .pte/.ptd (short cache dir avoids objcopy symbol length issues)
+  echo "::group::Export"
+  TORCHINDUCTOR_CACHE_DIR="$INDUCTOR_CACHE" \
+  python -m executorch.examples.models.qwen3_5_moe.export \
+      --prequantized "$LOCAL_MODEL_DIR" \
+      --output-dir "${OUTPUT_DIR}"
+  echo "::endgroup::"
+
+  test -f "${OUTPUT_DIR}/model.pte"
+  test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
+  ls -al "${OUTPUT_DIR}"
+
+  exit 0
+fi
+
 MAX_SEQ_LEN_ARG=""
 if [ -n "$MAX_SEQ_LEN" ]; then
   MAX_SEQ_LEN_ARG="--max_seq_len $MAX_SEQ_LEN"
 
@@ -10,17 +10,41 @@ set -ex
 # shellcheck source=/dev/null
 source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
 
+# Parse arguments
+USE_NIGHTLY=false
+for arg in "$@"; do
+  case $arg in
+    --nightly) USE_NIGHTLY=true ;;
+  esac
+done
+
 # Download and install OpenVINO from release packages
-OPENVINO_VERSION="2025.3"
-OPENVINO_BUILD="2025.3.0.19807.44526285f24"
-OPENVINO_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION}/linux/openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64.tgz"
+OPENVINO_VERSION="2026.0"
+OPENVINO_BUILD="2026.0.0.20965.c6d6a13a886"
+OPENVINO_STABLE_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION}/linux/openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64.tgz"
+
+OPENVINO_NIGHTLY_BUILD_ID="2026.1.0-21310-c694fbc2b6d"
+OPENVINO_NIGHTLY_BUILD="2026.1.0.dev20260312"
+OPENVINO_NIGHTLY_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/${OPENVINO_NIGHTLY_BUILD_ID}/openvino_toolkit_ubuntu22_${OPENVINO_NIGHTLY_BUILD}_x86_64.tgz"
+
+if [ "${USE_NIGHTLY}" = true ]; then
+  OPENVINO_URL="${OPENVINO_NIGHTLY_URL}"
+  OPENVINO_EXTRACTED_DIR="openvino_toolkit_ubuntu22_${OPENVINO_NIGHTLY_BUILD}_x86_64"
+  echo "Using OpenVINO nightly build: ${OPENVINO_NIGHTLY_BUILD_ID}"
+else
+  OPENVINO_URL="${OPENVINO_STABLE_URL}"
+  OPENVINO_EXTRACTED_DIR="openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64"
+  echo "Using OpenVINO stable release: ${OPENVINO_BUILD}"
+fi
 
 curl -Lo /tmp/openvino_toolkit.tgz --retry 3 --fail ${OPENVINO_URL}
 tar -xzf /tmp/openvino_toolkit.tgz
-mv openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64 openvino
+mv "${OPENVINO_EXTRACTED_DIR}" openvino
 
+set +u
 source openvino/setupvars.sh
-cd backends/openvino
-pip install -r requirements.txt
-cd scripts
+set -u
+pip install -r backends/openvino/requirements.txt
+pushd backends/openvino/scripts
 ./openvino_build.sh --enable_python
+popd
@@ -78,6 +78,12 @@ if [[ "$FLOW" == *arm* ]]; then
     fi
 fi
 
+if [[ "$FLOW" == *openvino* ]]; then
+    # Setup OpenVINO environment
+    source .ci/scripts/setup-openvino.sh --nightly
+    EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_OPENVINO=ON"
+fi
+
 if [[ $IS_MACOS -eq 1 ]]; then
     SETUP_SCRIPT=.ci/scripts/setup-macos.sh
 else
 
@@ -0,0 +1,80 @@
+#!/usr/bin/env bash
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
+
+# End-to-end test for Cortex-M backend: export a model via aot_arm_compiler
+# with cortex-m55+int8 target, then run the .bpte on Corstone-300 FVP.
+#
+# Usage: bash .ci/scripts/test_cortex_m_e2e.sh <model_name>
+# Example: bash .ci/scripts/test_cortex_m_e2e.sh mv2
+
+set -eux
+
+MODEL=$1
+mkdir -p "./cortex_m_e2e/${MODEL}"
+WORK_DIR=$(realpath "./cortex_m_e2e/${MODEL}")
+
+echo "=== Exporting ${MODEL} with cortex-m55+int8 ==="
+python -m examples.arm.aot_arm_compiler \
+    -m "${MODEL}" \
+    --target=cortex-m55+int8 \
+    --quantize \
+    --bundleio \
+    --intermediates="${WORK_DIR}/intermediates" \
+    --output="${WORK_DIR}/${MODEL}.bpte"
+
+BPTE="${WORK_DIR}/${MODEL}.bpte"
+test -f "${BPTE}" || { echo "FAIL: ${BPTE} not produced"; exit 1; }
+echo "=== Exported ${BPTE} ($(stat --printf='%s' "${BPTE}") bytes) ==="
+
+ELF="arm_test/arm_semihosting_executor_runner_corstone-300/arm_executor_runner"
+test -f "${ELF}" || { echo "FAIL: executor runner not found at ${ELF}"; exit 1; }
+
+LOG_FILE=$(mktemp)
+
+# Create a tiny dummy input file — the runner requires -i but BundleIO
+# ignores it and uses the embedded test inputs instead.
+dd if=/dev/zero of="${WORK_DIR}/dummy.bin" bs=4 count=1 2>/dev/null
+
+echo "=== Running ${MODEL} on Corstone-300 FVP ==="
+FVP_Corstone_SSE-300_Ethos-U55 \
+    -C ethosu.num_macs=128 \
+    -C mps3_board.visualisation.disable-visualisation=1 \
+    -C mps3_board.telnetterminal0.start_telnet=0 \
+    -C mps3_board.uart0.out_file='-' \
+    -C mps3_board.uart0.shutdown_on_eot=1 \
+    -C cpu0.semihosting-enable=1 \
+    -C cpu0.semihosting-stack_base=0 \
+    -C cpu0.semihosting-heap_limit=0 \
+    -C "cpu0.semihosting-cwd=${WORK_DIR}" \
+    -C "ethosu.extra_args='--fast'" \
+    -C "cpu0.semihosting-cmd_line='executor_runner -m ${MODEL}.bpte -i dummy.bin -o out'" \
+    -a "${ELF}" \
+    --timelimit 300 2>&1 | tee "${LOG_FILE}" || true
+
+echo "=== Checking FVP output ==="
+
+if grep -q "Test_result: PASS" "${LOG_FILE}"; then
+    echo "=== SUCCESS: ${MODEL} e2e BundleIO test PASSED on FVP ==="
+    rm "${LOG_FILE}"
+    exit 0
+fi
+
+if grep -q "Test_result: FAIL" "${LOG_FILE}"; then
+    echo "FAIL: ${MODEL} BundleIO output mismatch"
+    rm "${LOG_FILE}"
+    exit 1
+fi
+
+if grep -qE "(^[EF][: ].*$)|(^.*Hard fault.*$)|(^.*Assertion.*$)" "${LOG_FILE}"; then
+    echo "FAIL: ${MODEL} FVP run hit a fatal error"
+    rm "${LOG_FILE}"
+    exit 1
+fi
+
+echo "FAIL: ${MODEL} no BundleIO test result found in FVP output"
+rm "${LOG_FILE}"
+exit 1
@@ -216,9 +216,21 @@ case "$HF_MODEL" in
     AUDIO_FILE="test_audio.wav"
     IMAGE_PATH=""
     ;;
+  SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4)
+    MODEL_NAME="qwen3_5_moe"
+    RUNNER_TARGET="qwen3_5_moe_runner"
+    RUNNER_PATH="qwen3_5_moe"
+    EXPECTED_OUTPUT="Paris"
+    PREPROCESSOR=""
+    TOKENIZER_URL=""
+    TOKENIZER_FILE="tokenizer.json"
+    AUDIO_URL=""
+    AUDIO_FILE=""
+    IMAGE_PATH=""
+    ;;
   *)
     echo "Error: Unsupported model '$HF_MODEL'"
-    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
+    echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4"
     exit 1
     ;;
 esac
@@ -232,7 +244,7 @@ echo "::group::Prepare $MODEL_NAME Artifacts"
 
 
 # Download tokenizer files (skip for models that bundle tokenizer in export or do not use one)
-if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ]; then
+if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ] && [ "$MODEL_NAME" != "qwen3_5_moe" ]; then
   if [ "$TOKENIZER_FILE" != "" ]; then
     curl -L $TOKENIZER_URL/$TOKENIZER_FILE -o $MODEL_DIR/$TOKENIZER_FILE
   else
@@ -341,6 +353,9 @@ EOF
       RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
     fi
     ;;
+  qwen3_5_moe)
+    RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --prompt 'What is the capital of France?' --max_new_tokens 128 --temperature 0"
+    ;;
   voxtral_realtime)
     RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"
     # Add CUDA data path if present
@@ -359,7 +374,7 @@ EOF
     ;;
 esac
 
-OUTPUT=$($RUNNER_BIN $RUNNER_ARGS 2>&1)
+OUTPUT=$(eval $RUNNER_BIN $RUNNER_ARGS 2>&1)
 EXIT_CODE=$?
 set -e
 
 
@@ -135,7 +135,19 @@ try {
     Write-Host "::group::Check CUDA toolchain"
     $nvccOutput = nvcc --version | Out-String
     Write-Host $nvccOutput
-    nvidia-smi
+    $nvidiaSmiCmd = Get-Command nvidia-smi -ErrorAction SilentlyContinue
+    if ($null -eq $nvidiaSmiCmd) {
+        Write-Host "nvidia-smi not available (command not found; driver may not be installed)"
+    }
+    else {
+        try {
+            nvidia-smi
+        }
+        catch {
+            Write-Host "nvidia-smi failed (driver or GPU issue). Error details:"
+            Write-Host $_
+        }
+    }
     if (-not [string]::IsNullOrWhiteSpace($ExpectedCudaVersion)) {
         $versionMatch = [Regex]::Match($nvccOutput, "release\s+(\d+\.\d+)")
         if (-not $versionMatch.Success) {
 
@@ -11,18 +11,25 @@
 from examples.models import Backend, Model
 
 if __name__ == "__main__":
-    # On Linux x86_64 the wheel is built with the Qualcomm backend.
-    # Verify that it was registered correctly.
-    if platform.system() == "Linux" and platform.machine() in ("x86_64", "amd64"):
+    if platform.system() == "Linux":
         from executorch.extension.pybindings.portable_lib import (
             _get_registered_backend_names,
         )
 
         registered = _get_registered_backend_names()
+
+        # QNN backend is only available on x86_64.
+        if platform.machine() in ("x86_64", "amd64"):
+            assert (
+                "QnnBackend" in registered
+            ), f"QnnBackend not found in registered backends: {registered}"
+            print("✓ QnnBackend is registered")
+
+        # OpenVINO backend is available on all Linux architectures.
         assert (
-            "QnnBackend" in registered
-        ), f"QnnBackend not found in registered backends: {registered}"
-        print("✓ QnnBackend is registered")
+            "OpenvinoBackend" in registered
+        ), f"OpenvinoBackend not found in registered backends: {registered}"
+        print("✓ OpenvinoBackend is registered")
 
     test_base.run_tests(
         model_tests=[
Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-a9592258daacad7423fd5f39aaa59c6e36471520`
	`1`	`+585799cf7039d376d2ac4848b5ef0b501f60679e`