Skip to content

Commit d113379

Browse files
committed
Update base for Update on "[executorch] Propagate device metadata from partitioner result onto TensorSpecs"
Add end-to-end device type annotation support from export to runtime. Currently we only support one device per graph The overall pipeline is: a. Partitioner use `compile_spec` to determine which device the partitoned blob is runing on b. after lowered partitioned graph to backend, the new-introed propagate_device_pass will annotate the input and output tensors of delegate blob as target device. Differential Revision: [D95842511](https://our.internmc.facebook.com/intern/diff/D95842511/) [ghstack-poisoned]
2 parents 8825f8c + 19bbeac commit d113379

498 files changed

Lines changed: 24718 additions & 5841 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
a9592258daacad7423fd5f39aaa59c6e36471520
1+
585799cf7039d376d2ac4848b5ef0b501f60679e

.ci/docker/common/install_cuda_windows_cross_compile.sh

Lines changed: 14 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -48,20 +48,23 @@ get_torch_cuda_version() {
4848
}
4949

5050
install_windows_cuda() {
51-
# Get CUDA version from torch
52-
TORCH_CUDA_VERSION=$(get_torch_cuda_version)
51+
# Use CUDA_VERSION env var if set (from Docker build arg), otherwise query PyTorch
52+
if [ -n "${CUDA_VERSION:-}" ]; then
53+
echo "Using CUDA version from environment: ${CUDA_VERSION}"
54+
CUDA_MAJOR_MINOR=$(echo "${CUDA_VERSION}" | cut -d. -f1,2)
55+
else
56+
TORCH_CUDA_VERSION=$(get_torch_cuda_version)
57+
58+
if [ -z "${TORCH_CUDA_VERSION}" ] || [ "${TORCH_CUDA_VERSION}" = "None" ]; then
59+
echo "ERROR: Could not detect CUDA version from PyTorch."
60+
echo "Make sure PyTorch with CUDA support is installed or set CUDA_VERSION."
61+
exit 1
62+
fi
5363

54-
if [ -z "${TORCH_CUDA_VERSION}" ] || [ "${TORCH_CUDA_VERSION}" = "None" ]; then
55-
echo "ERROR: Could not detect CUDA version from PyTorch."
56-
echo "Make sure PyTorch with CUDA support is installed before running this script."
57-
exit 1
64+
echo "Detected PyTorch CUDA version: ${TORCH_CUDA_VERSION}"
65+
CUDA_MAJOR_MINOR=$(echo "${TORCH_CUDA_VERSION}" | cut -d. -f1,2)
5866
fi
5967

60-
echo "Detected PyTorch CUDA version: ${TORCH_CUDA_VERSION}"
61-
62-
# Extract major.minor version (e.g., "12.8" from "12.8.1" or "12.8")
63-
CUDA_MAJOR_MINOR=$(echo "${TORCH_CUDA_VERSION}" | cut -d. -f1,2)
64-
6568
# Look up the full version and driver version
6669
if [ -z "${CUDA_DRIVER_MAP[${CUDA_MAJOR_MINOR}]}" ]; then
6770
echo "ERROR: CUDA version ${CUDA_MAJOR_MINOR} is not in the known version map."

.ci/docker/ubuntu/Dockerfile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,7 +105,7 @@ COPY ./common/install_cuda_windows_cross_compile.sh install_cuda_windows_cross_c
105105
COPY ./common/utils.sh utils.sh
106106
RUN if [ -n "${CUDA_WINDOWS_CROSS_COMPILE}" ]; then \
107107
CUDA_VERSION=${CUDA_VERSION} bash ./install_cuda.sh && \
108-
bash ./install_cuda_windows_cross_compile.sh; \
108+
CUDA_VERSION=${CUDA_VERSION} bash ./install_cuda_windows_cross_compile.sh; \
109109
fi
110110
RUN rm -f install_cuda.sh install_cuda_windows_cross_compile.sh utils.sh
111111
# Set up CUDA environment for Linux compilation (nvcc, etc.)

.ci/scripts/export_model_artifact.sh

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,17 @@ case "$HF_MODEL" in
184184
PREPROCESSOR_FEATURE_SIZE=""
185185
PREPROCESSOR_OUTPUT=""
186186
;;
187+
SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4)
188+
MODEL_NAME="qwen3_5_moe"
189+
TASK=""
190+
MAX_SEQ_LEN=""
191+
EXTRA_PIP=""
192+
PREPROCESSOR_FEATURE_SIZE=""
193+
PREPROCESSOR_OUTPUT=""
194+
;;
187195
*)
188196
echo "Error: Unsupported model '$HF_MODEL'"
189-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
197+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4"
190198
exit 1
191199
;;
192200
esac
@@ -350,7 +358,7 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
350358
STREAMING_ARG=""
351359
PREPROCESSOR_ARGS="--feature_size 128 --output_file ${OUTPUT_DIR}/preprocessor.pte"
352360
if [ "$USE_STREAMING" = "true" ]; then
353-
STREAMING_ARG="--streaming"
361+
STREAMING_ARG="--streaming --sliding-window 2048"
354362
PREPROCESSOR_ARGS="$PREPROCESSOR_ARGS --streaming"
355363
else
356364
PREPROCESSOR_ARGS="$PREPROCESSOR_ARGS --stack_output --max_audio_len 300"
@@ -380,6 +388,46 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
380388
exit 0
381389
fi
382390

391+
# Qwen 3.5 MoE uses a prequantized checkpoint and custom export script
392+
if [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
393+
pip install safetensors huggingface_hub
394+
pip install -r examples/models/qwen3_5_moe/requirements.txt
395+
396+
# Download prequantized model outside OUTPUT_DIR to avoid uploading on failure
397+
LOCAL_MODEL_DIR=$(mktemp -d)
398+
INDUCTOR_CACHE=$(mktemp -d)
399+
trap 'rm -rf "$LOCAL_MODEL_DIR" "$INDUCTOR_CACHE"' EXIT
400+
401+
python -c "from huggingface_hub import snapshot_download; snapshot_download('${HF_MODEL}', local_dir='${LOCAL_MODEL_DIR}')"
402+
403+
# Sanity check: run inference on the prequantized model
404+
echo "::group::Inference sanity check"
405+
python -m executorch.examples.models.qwen3_5_moe.inference \
406+
--prequantized "$LOCAL_MODEL_DIR" \
407+
--prompt "What is the capital of France?" \
408+
--max-new-tokens 32 \
409+
--temperature 0 \
410+
--no-compile
411+
echo "::endgroup::"
412+
413+
# Copy tokenizer for the runner
414+
cp "$LOCAL_MODEL_DIR/tokenizer.json" "${OUTPUT_DIR}/tokenizer.json"
415+
416+
# Export to .pte/.ptd (short cache dir avoids objcopy symbol length issues)
417+
echo "::group::Export"
418+
TORCHINDUCTOR_CACHE_DIR="$INDUCTOR_CACHE" \
419+
python -m executorch.examples.models.qwen3_5_moe.export \
420+
--prequantized "$LOCAL_MODEL_DIR" \
421+
--output-dir "${OUTPUT_DIR}"
422+
echo "::endgroup::"
423+
424+
test -f "${OUTPUT_DIR}/model.pte"
425+
test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
426+
ls -al "${OUTPUT_DIR}"
427+
428+
exit 0
429+
fi
430+
383431
MAX_SEQ_LEN_ARG=""
384432
if [ -n "$MAX_SEQ_LEN" ]; then
385433
MAX_SEQ_LEN_ARG="--max_seq_len $MAX_SEQ_LEN"

.ci/scripts/setup-openvino.sh

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,41 @@ set -ex
1010
# shellcheck source=/dev/null
1111
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1212

13+
# Parse arguments
14+
USE_NIGHTLY=false
15+
for arg in "$@"; do
16+
case $arg in
17+
--nightly) USE_NIGHTLY=true ;;
18+
esac
19+
done
20+
1321
# Download and install OpenVINO from release packages
14-
OPENVINO_VERSION="2025.3"
15-
OPENVINO_BUILD="2025.3.0.19807.44526285f24"
16-
OPENVINO_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION}/linux/openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64.tgz"
22+
OPENVINO_VERSION="2026.0"
23+
OPENVINO_BUILD="2026.0.0.20965.c6d6a13a886"
24+
OPENVINO_STABLE_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION}/linux/openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64.tgz"
25+
26+
OPENVINO_NIGHTLY_BUILD_ID="2026.1.0-21310-c694fbc2b6d"
27+
OPENVINO_NIGHTLY_BUILD="2026.1.0.dev20260312"
28+
OPENVINO_NIGHTLY_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/${OPENVINO_NIGHTLY_BUILD_ID}/openvino_toolkit_ubuntu22_${OPENVINO_NIGHTLY_BUILD}_x86_64.tgz"
29+
30+
if [ "${USE_NIGHTLY}" = true ]; then
31+
OPENVINO_URL="${OPENVINO_NIGHTLY_URL}"
32+
OPENVINO_EXTRACTED_DIR="openvino_toolkit_ubuntu22_${OPENVINO_NIGHTLY_BUILD}_x86_64"
33+
echo "Using OpenVINO nightly build: ${OPENVINO_NIGHTLY_BUILD_ID}"
34+
else
35+
OPENVINO_URL="${OPENVINO_STABLE_URL}"
36+
OPENVINO_EXTRACTED_DIR="openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64"
37+
echo "Using OpenVINO stable release: ${OPENVINO_BUILD}"
38+
fi
1739

1840
curl -Lo /tmp/openvino_toolkit.tgz --retry 3 --fail ${OPENVINO_URL}
1941
tar -xzf /tmp/openvino_toolkit.tgz
20-
mv openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64 openvino
42+
mv "${OPENVINO_EXTRACTED_DIR}" openvino
2143

44+
set +u
2245
source openvino/setupvars.sh
23-
cd backends/openvino
24-
pip install -r requirements.txt
25-
cd scripts
46+
set -u
47+
pip install -r backends/openvino/requirements.txt
48+
pushd backends/openvino/scripts
2649
./openvino_build.sh --enable_python
50+
popd

.ci/scripts/test_backend.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,12 @@ if [[ "$FLOW" == *arm* ]]; then
7878
fi
7979
fi
8080

81+
if [[ "$FLOW" == *openvino* ]]; then
82+
# Setup OpenVINO environment
83+
source .ci/scripts/setup-openvino.sh --nightly
84+
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_OPENVINO=ON"
85+
fi
86+
8187
if [[ $IS_MACOS -eq 1 ]]; then
8288
SETUP_SCRIPT=.ci/scripts/setup-macos.sh
8389
else

.ci/scripts/test_cortex_m_e2e.sh

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
#!/usr/bin/env bash
2+
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# All rights reserved.
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
# End-to-end test for Cortex-M backend: export a model via aot_arm_compiler
9+
# with cortex-m55+int8 target, then run the .bpte on Corstone-300 FVP.
10+
#
11+
# Usage: bash .ci/scripts/test_cortex_m_e2e.sh <model_name>
12+
# Example: bash .ci/scripts/test_cortex_m_e2e.sh mv2
13+
14+
set -eux
15+
16+
MODEL=$1
17+
mkdir -p "./cortex_m_e2e/${MODEL}"
18+
WORK_DIR=$(realpath "./cortex_m_e2e/${MODEL}")
19+
20+
echo "=== Exporting ${MODEL} with cortex-m55+int8 ==="
21+
python -m examples.arm.aot_arm_compiler \
22+
-m "${MODEL}" \
23+
--target=cortex-m55+int8 \
24+
--quantize \
25+
--bundleio \
26+
--intermediates="${WORK_DIR}/intermediates" \
27+
--output="${WORK_DIR}/${MODEL}.bpte"
28+
29+
BPTE="${WORK_DIR}/${MODEL}.bpte"
30+
test -f "${BPTE}" || { echo "FAIL: ${BPTE} not produced"; exit 1; }
31+
echo "=== Exported ${BPTE} ($(stat --printf='%s' "${BPTE}") bytes) ==="
32+
33+
ELF="arm_test/arm_semihosting_executor_runner_corstone-300/arm_executor_runner"
34+
test -f "${ELF}" || { echo "FAIL: executor runner not found at ${ELF}"; exit 1; }
35+
36+
LOG_FILE=$(mktemp)
37+
38+
# Create a tiny dummy input file — the runner requires -i but BundleIO
39+
# ignores it and uses the embedded test inputs instead.
40+
dd if=/dev/zero of="${WORK_DIR}/dummy.bin" bs=4 count=1 2>/dev/null
41+
42+
echo "=== Running ${MODEL} on Corstone-300 FVP ==="
43+
FVP_Corstone_SSE-300_Ethos-U55 \
44+
-C ethosu.num_macs=128 \
45+
-C mps3_board.visualisation.disable-visualisation=1 \
46+
-C mps3_board.telnetterminal0.start_telnet=0 \
47+
-C mps3_board.uart0.out_file='-' \
48+
-C mps3_board.uart0.shutdown_on_eot=1 \
49+
-C cpu0.semihosting-enable=1 \
50+
-C cpu0.semihosting-stack_base=0 \
51+
-C cpu0.semihosting-heap_limit=0 \
52+
-C "cpu0.semihosting-cwd=${WORK_DIR}" \
53+
-C "ethosu.extra_args='--fast'" \
54+
-C "cpu0.semihosting-cmd_line='executor_runner -m ${MODEL}.bpte -i dummy.bin -o out'" \
55+
-a "${ELF}" \
56+
--timelimit 300 2>&1 | tee "${LOG_FILE}" || true
57+
58+
echo "=== Checking FVP output ==="
59+
60+
if grep -q "Test_result: PASS" "${LOG_FILE}"; then
61+
echo "=== SUCCESS: ${MODEL} e2e BundleIO test PASSED on FVP ==="
62+
rm "${LOG_FILE}"
63+
exit 0
64+
fi
65+
66+
if grep -q "Test_result: FAIL" "${LOG_FILE}"; then
67+
echo "FAIL: ${MODEL} BundleIO output mismatch"
68+
rm "${LOG_FILE}"
69+
exit 1
70+
fi
71+
72+
if grep -qE "(^[EF][: ].*$)|(^.*Hard fault.*$)|(^.*Assertion.*$)" "${LOG_FILE}"; then
73+
echo "FAIL: ${MODEL} FVP run hit a fatal error"
74+
rm "${LOG_FILE}"
75+
exit 1
76+
fi
77+
78+
echo "FAIL: ${MODEL} no BundleIO test result found in FVP output"
79+
rm "${LOG_FILE}"
80+
exit 1

.ci/scripts/test_model_e2e.sh

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,9 +216,21 @@ case "$HF_MODEL" in
216216
AUDIO_FILE="test_audio.wav"
217217
IMAGE_PATH=""
218218
;;
219+
SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4)
220+
MODEL_NAME="qwen3_5_moe"
221+
RUNNER_TARGET="qwen3_5_moe_runner"
222+
RUNNER_PATH="qwen3_5_moe"
223+
EXPECTED_OUTPUT="Paris"
224+
PREPROCESSOR=""
225+
TOKENIZER_URL=""
226+
TOKENIZER_FILE="tokenizer.json"
227+
AUDIO_URL=""
228+
AUDIO_FILE=""
229+
IMAGE_PATH=""
230+
;;
219231
*)
220232
echo "Error: Unsupported model '$HF_MODEL'"
221-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
233+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4"
222234
exit 1
223235
;;
224236
esac
@@ -232,7 +244,7 @@ echo "::group::Prepare $MODEL_NAME Artifacts"
232244

233245

234246
# Download tokenizer files (skip for models that bundle tokenizer in export or do not use one)
235-
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ]; then
247+
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ] && [ "$MODEL_NAME" != "qwen3_5_moe" ]; then
236248
if [ "$TOKENIZER_FILE" != "" ]; then
237249
curl -L $TOKENIZER_URL/$TOKENIZER_FILE -o $MODEL_DIR/$TOKENIZER_FILE
238250
else
@@ -341,6 +353,9 @@ EOF
341353
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
342354
fi
343355
;;
356+
qwen3_5_moe)
357+
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --prompt 'What is the capital of France?' --max_new_tokens 128 --temperature 0"
358+
;;
344359
voxtral_realtime)
345360
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"
346361
# Add CUDA data path if present
@@ -359,7 +374,7 @@ EOF
359374
;;
360375
esac
361376

362-
OUTPUT=$($RUNNER_BIN $RUNNER_ARGS 2>&1)
377+
OUTPUT=$(eval $RUNNER_BIN $RUNNER_ARGS 2>&1)
363378
EXIT_CODE=$?
364379
set -e
365380

.ci/scripts/test_model_e2e_windows.ps1

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,19 @@ try {
135135
Write-Host "::group::Check CUDA toolchain"
136136
$nvccOutput = nvcc --version | Out-String
137137
Write-Host $nvccOutput
138-
nvidia-smi
138+
$nvidiaSmiCmd = Get-Command nvidia-smi -ErrorAction SilentlyContinue
139+
if ($null -eq $nvidiaSmiCmd) {
140+
Write-Host "nvidia-smi not available (command not found; driver may not be installed)"
141+
}
142+
else {
143+
try {
144+
nvidia-smi
145+
}
146+
catch {
147+
Write-Host "nvidia-smi failed (driver or GPU issue). Error details:"
148+
Write-Host $_
149+
}
150+
}
139151
if (-not [string]::IsNullOrWhiteSpace($ExpectedCudaVersion)) {
140152
$versionMatch = [Regex]::Match($nvccOutput, "release\s+(\d+\.\d+)")
141153
if (-not $versionMatch.Success) {

.ci/scripts/wheel/test_linux.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,25 @@
1111
from examples.models import Backend, Model
1212

1313
if __name__ == "__main__":
14-
# On Linux x86_64 the wheel is built with the Qualcomm backend.
15-
# Verify that it was registered correctly.
16-
if platform.system() == "Linux" and platform.machine() in ("x86_64", "amd64"):
14+
if platform.system() == "Linux":
1715
from executorch.extension.pybindings.portable_lib import (
1816
_get_registered_backend_names,
1917
)
2018

2119
registered = _get_registered_backend_names()
20+
21+
# QNN backend is only available on x86_64.
22+
if platform.machine() in ("x86_64", "amd64"):
23+
assert (
24+
"QnnBackend" in registered
25+
), f"QnnBackend not found in registered backends: {registered}"
26+
print("✓ QnnBackend is registered")
27+
28+
# OpenVINO backend is available on all Linux architectures.
2229
assert (
23-
"QnnBackend" in registered
24-
), f"QnnBackend not found in registered backends: {registered}"
25-
print("✓ QnnBackend is registered")
30+
"OpenvinoBackend" in registered
31+
), f"OpenvinoBackend not found in registered backends: {registered}"
32+
print("✓ OpenvinoBackend is registered")
2633

2734
test_base.run_tests(
2835
model_tests=[

0 commit comments

Comments
 (0)