Skip to content

Commit e61d2f6

Browse files
Merge branch 'main' into change-1223321
2 parents 5e1b577 + c7f1d72 commit e61d2f6

431 files changed

Lines changed: 22232 additions & 5128 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.ci/scripts/export_model_artifact.sh

Lines changed: 50 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,17 @@ case "$HF_MODEL" in
184184
PREPROCESSOR_FEATURE_SIZE=""
185185
PREPROCESSOR_OUTPUT=""
186186
;;
187+
SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4)
188+
MODEL_NAME="qwen3_5_moe"
189+
TASK=""
190+
MAX_SEQ_LEN=""
191+
EXTRA_PIP=""
192+
PREPROCESSOR_FEATURE_SIZE=""
193+
PREPROCESSOR_OUTPUT=""
194+
;;
187195
*)
188196
echo "Error: Unsupported model '$HF_MODEL'"
189-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
197+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, openai/whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}, google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/diar_streaming_sortformer_4spk-v2, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4"
190198
exit 1
191199
;;
192200
esac
@@ -350,7 +358,7 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
350358
STREAMING_ARG=""
351359
PREPROCESSOR_ARGS="--feature_size 128 --output_file ${OUTPUT_DIR}/preprocessor.pte"
352360
if [ "$USE_STREAMING" = "true" ]; then
353-
STREAMING_ARG="--streaming"
361+
STREAMING_ARG="--streaming --sliding-window 2048"
354362
PREPROCESSOR_ARGS="$PREPROCESSOR_ARGS --streaming"
355363
else
356364
PREPROCESSOR_ARGS="$PREPROCESSOR_ARGS --stack_output --max_audio_len 300"
@@ -380,6 +388,46 @@ if [ "$MODEL_NAME" = "voxtral_realtime" ]; then
380388
exit 0
381389
fi
382390

391+
# Qwen 3.5 MoE uses a prequantized checkpoint and custom export script
392+
if [ "$MODEL_NAME" = "qwen3_5_moe" ]; then
393+
pip install safetensors huggingface_hub
394+
pip install -r examples/models/qwen3_5_moe/requirements.txt
395+
396+
# Download prequantized model outside OUTPUT_DIR to avoid uploading on failure
397+
LOCAL_MODEL_DIR=$(mktemp -d)
398+
INDUCTOR_CACHE=$(mktemp -d)
399+
trap 'rm -rf "$LOCAL_MODEL_DIR" "$INDUCTOR_CACHE"' EXIT
400+
401+
python -c "from huggingface_hub import snapshot_download; snapshot_download('${HF_MODEL}', local_dir='${LOCAL_MODEL_DIR}')"
402+
403+
# Sanity check: run inference on the prequantized model
404+
echo "::group::Inference sanity check"
405+
python -m executorch.examples.models.qwen3_5_moe.inference \
406+
--prequantized "$LOCAL_MODEL_DIR" \
407+
--prompt "What is the capital of France?" \
408+
--max-new-tokens 32 \
409+
--temperature 0 \
410+
--no-compile
411+
echo "::endgroup::"
412+
413+
# Copy tokenizer for the runner
414+
cp "$LOCAL_MODEL_DIR/tokenizer.json" "${OUTPUT_DIR}/tokenizer.json"
415+
416+
# Export to .pte/.ptd (short cache dir avoids objcopy symbol length issues)
417+
echo "::group::Export"
418+
TORCHINDUCTOR_CACHE_DIR="$INDUCTOR_CACHE" \
419+
python -m executorch.examples.models.qwen3_5_moe.export \
420+
--prequantized "$LOCAL_MODEL_DIR" \
421+
--output-dir "${OUTPUT_DIR}"
422+
echo "::endgroup::"
423+
424+
test -f "${OUTPUT_DIR}/model.pte"
425+
test -f "${OUTPUT_DIR}/aoti_cuda_blob.ptd"
426+
ls -al "${OUTPUT_DIR}"
427+
428+
exit 0
429+
fi
430+
383431
MAX_SEQ_LEN_ARG=""
384432
if [ -n "$MAX_SEQ_LEN" ]; then
385433
MAX_SEQ_LEN_ARG="--max_seq_len $MAX_SEQ_LEN"

.ci/scripts/setup-openvino.sh

Lines changed: 31 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -10,17 +10,41 @@ set -ex
1010
# shellcheck source=/dev/null
1111
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1212

13+
# Parse arguments
14+
USE_NIGHTLY=false
15+
for arg in "$@"; do
16+
case $arg in
17+
--nightly) USE_NIGHTLY=true ;;
18+
esac
19+
done
20+
1321
# Download and install OpenVINO from release packages
14-
OPENVINO_VERSION="2025.3"
15-
OPENVINO_BUILD="2025.3.0.19807.44526285f24"
16-
OPENVINO_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION}/linux/openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64.tgz"
22+
OPENVINO_VERSION="2026.0"
23+
OPENVINO_BUILD="2026.0.0.20965.c6d6a13a886"
24+
OPENVINO_STABLE_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/${OPENVINO_VERSION}/linux/openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64.tgz"
25+
26+
OPENVINO_NIGHTLY_BUILD_ID="2026.1.0-21310-c694fbc2b6d"
27+
OPENVINO_NIGHTLY_BUILD="2026.1.0.dev20260312"
28+
OPENVINO_NIGHTLY_URL="https://storage.openvinotoolkit.org/repositories/openvino/packages/nightly/${OPENVINO_NIGHTLY_BUILD_ID}/openvino_toolkit_ubuntu22_${OPENVINO_NIGHTLY_BUILD}_x86_64.tgz"
29+
30+
if [ "${USE_NIGHTLY}" = true ]; then
31+
OPENVINO_URL="${OPENVINO_NIGHTLY_URL}"
32+
OPENVINO_EXTRACTED_DIR="openvino_toolkit_ubuntu22_${OPENVINO_NIGHTLY_BUILD}_x86_64"
33+
echo "Using OpenVINO nightly build: ${OPENVINO_NIGHTLY_BUILD_ID}"
34+
else
35+
OPENVINO_URL="${OPENVINO_STABLE_URL}"
36+
OPENVINO_EXTRACTED_DIR="openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64"
37+
echo "Using OpenVINO stable release: ${OPENVINO_BUILD}"
38+
fi
1739

1840
curl -Lo /tmp/openvino_toolkit.tgz --retry 3 --fail ${OPENVINO_URL}
1941
tar -xzf /tmp/openvino_toolkit.tgz
20-
mv openvino_toolkit_ubuntu22_${OPENVINO_BUILD}_x86_64 openvino
42+
mv "${OPENVINO_EXTRACTED_DIR}" openvino
2143

44+
set +u
2245
source openvino/setupvars.sh
23-
cd backends/openvino
24-
pip install -r requirements.txt
25-
cd scripts
46+
set -u
47+
pip install -r backends/openvino/requirements.txt
48+
pushd backends/openvino/scripts
2649
./openvino_build.sh --enable_python
50+
popd

.ci/scripts/test_backend.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,12 @@ if [[ "$FLOW" == *arm* ]]; then
7878
fi
7979
fi
8080

81+
if [[ "$FLOW" == *openvino* ]]; then
82+
# Setup OpenVINO environment
83+
source .ci/scripts/setup-openvino.sh --nightly
84+
EXTRA_BUILD_ARGS+=" -DEXECUTORCH_BUILD_OPENVINO=ON"
85+
fi
86+
8187
if [[ $IS_MACOS -eq 1 ]]; then
8288
SETUP_SCRIPT=.ci/scripts/setup-macos.sh
8389
else

.ci/scripts/test_model_e2e.sh

Lines changed: 18 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -216,9 +216,21 @@ case "$HF_MODEL" in
216216
AUDIO_FILE="test_audio.wav"
217217
IMAGE_PATH=""
218218
;;
219+
SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4)
220+
MODEL_NAME="qwen3_5_moe"
221+
RUNNER_TARGET="qwen3_5_moe_runner"
222+
RUNNER_PATH="qwen3_5_moe"
223+
EXPECTED_OUTPUT="Paris"
224+
PREPROCESSOR=""
225+
TOKENIZER_URL=""
226+
TOKENIZER_FILE="tokenizer.json"
227+
AUDIO_URL=""
228+
AUDIO_FILE=""
229+
IMAGE_PATH=""
230+
;;
219231
*)
220232
echo "Error: Unsupported model '$HF_MODEL'"
221-
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer"
233+
echo "Supported models: mistralai/Voxtral-Mini-3B-2507, mistralai/Voxtral-Mini-4B-Realtime-2602, nvidia/diar_streaming_sortformer_4spk-v2, openai/whisper series (whisper-{small, medium, large, large-v2, large-v3, large-v3-turbo}), google/gemma-3-4b-it, Qwen/Qwen3-0.6B, nvidia/parakeet-tdt, facebook/dinov2-small-imagenet1k-1-layer, SocialLocalMobile/Qwen3.5-35B-A3B-HQQ-INT4"
222234
exit 1
223235
;;
224236
esac
@@ -232,7 +244,7 @@ echo "::group::Prepare $MODEL_NAME Artifacts"
232244

233245

234246
# Download tokenizer files (skip for models that bundle tokenizer in export or do not use one)
235-
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ]; then
247+
if [ "$MODEL_NAME" != "parakeet" ] && [ "$MODEL_NAME" != "voxtral_realtime" ] && [ "$MODEL_NAME" != "sortformer" ] && [ "$MODEL_NAME" != "dinov2" ] && [ "$MODEL_NAME" != "qwen3_5_moe" ]; then
236248
if [ "$TOKENIZER_FILE" != "" ]; then
237249
curl -L $TOKENIZER_URL/$TOKENIZER_FILE -o $MODEL_DIR/$TOKENIZER_FILE
238250
else
@@ -341,6 +353,9 @@ EOF
341353
RUNNER_ARGS="$RUNNER_ARGS --data_path ${MODEL_DIR}/aoti_cuda_blob.ptd"
342354
fi
343355
;;
356+
qwen3_5_moe)
357+
RUNNER_ARGS="$RUNNER_ARGS --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --prompt 'What is the capital of France?' --max_new_tokens 128 --temperature 0"
358+
;;
344359
voxtral_realtime)
345360
RUNNER_ARGS="--model_path ${MODEL_DIR}/model.pte --tokenizer_path ${MODEL_DIR}/$TOKENIZER_FILE --preprocessor_path ${MODEL_DIR}/$PREPROCESSOR --audio_path ${MODEL_DIR}/$AUDIO_FILE --temperature 0"
346361
# Add CUDA data path if present
@@ -359,7 +374,7 @@ EOF
359374
;;
360375
esac
361376

362-
OUTPUT=$($RUNNER_BIN $RUNNER_ARGS 2>&1)
377+
OUTPUT=$(eval $RUNNER_BIN $RUNNER_ARGS 2>&1)
363378
EXIT_CODE=$?
364379
set -e
365380

.ci/scripts/test_model_e2e_windows.ps1

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,7 +135,19 @@ try {
135135
Write-Host "::group::Check CUDA toolchain"
136136
$nvccOutput = nvcc --version | Out-String
137137
Write-Host $nvccOutput
138-
nvidia-smi
138+
$nvidiaSmiCmd = Get-Command nvidia-smi -ErrorAction SilentlyContinue
139+
if ($null -eq $nvidiaSmiCmd) {
140+
Write-Host "nvidia-smi not available (command not found; driver may not be installed)"
141+
}
142+
else {
143+
try {
144+
nvidia-smi
145+
}
146+
catch {
147+
Write-Host "nvidia-smi failed (driver or GPU issue). Error details:"
148+
Write-Host $_
149+
}
150+
}
139151
if (-not [string]::IsNullOrWhiteSpace($ExpectedCudaVersion)) {
140152
$versionMatch = [Regex]::Match($nvccOutput, "release\s+(\d+\.\d+)")
141153
if (-not $versionMatch.Success) {

.ci/scripts/wheel/test_linux.py

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,18 +11,25 @@
1111
from examples.models import Backend, Model
1212

1313
if __name__ == "__main__":
14-
# On Linux x86_64 the wheel is built with the Qualcomm backend.
15-
# Verify that it was registered correctly.
16-
if platform.system() == "Linux" and platform.machine() in ("x86_64", "amd64"):
14+
if platform.system() == "Linux":
1715
from executorch.extension.pybindings.portable_lib import (
1816
_get_registered_backend_names,
1917
)
2018

2119
registered = _get_registered_backend_names()
20+
21+
# QNN backend is only available on x86_64.
22+
if platform.machine() in ("x86_64", "amd64"):
23+
assert (
24+
"QnnBackend" in registered
25+
), f"QnnBackend not found in registered backends: {registered}"
26+
print("✓ QnnBackend is registered")
27+
28+
# OpenVINO backend is available on all Linux architectures.
2229
assert (
23-
"QnnBackend" in registered
24-
), f"QnnBackend not found in registered backends: {registered}"
25-
print("✓ QnnBackend is registered")
30+
"OpenvinoBackend" in registered
31+
), f"OpenvinoBackend not found in registered backends: {registered}"
32+
print("✓ OpenvinoBackend is registered")
2633

2734
test_base.run_tests(
2835
model_tests=[

.ci/scripts/wheel/test_linux_aarch64.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,20 @@
1212
# coremltools does not support linux aarch64 yet and install from the source fails on runtime
1313
# https://github.com/apple/coremltools/issues/1254
1414
# https://github.com/apple/coremltools/issues/2195
15+
16+
from executorch.extension.pybindings.portable_lib import (
17+
_get_registered_backend_names,
18+
)
19+
20+
registered = _get_registered_backend_names()
21+
22+
# OpenVINO backend uses dlopen (no build-time SDK dependency), so it
23+
# is compiled into the wheel on all Linux architectures.
24+
assert (
25+
"OpenvinoBackend" in registered
26+
), f"OpenvinoBackend not found in registered backends: {registered}"
27+
print("✓ OpenvinoBackend is registered")
28+
1529
test_base.run_tests(
1630
model_tests=[
1731
test_base.ModelTest(

.github/workflows/_test_backend.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ on:
44
workflow_call:
55
inputs:
66
backend:
7-
description: 'Backend to test (xnnpack, coreml, vulkan, qnn)'
7+
description: 'Backend to test (xnnpack, coreml, vulkan, qnn, openvino)'
88
required: true
99
type: string
1010
flows:
Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
name: Test Cadence
2+
3+
permissions:
4+
id-token: write
5+
contents: read
6+
7+
on:
8+
workflow_call:
9+
inputs:
10+
docker-image:
11+
description: 'Docker image to use'
12+
required: false
13+
type: string
14+
default: ci-image:executorch-ubuntu-22.04-clang12
15+
runner:
16+
description: 'Runner type'
17+
required: false
18+
type: string
19+
default: linux.8xlarge.memory
20+
ref:
21+
description: 'Git ref to checkout'
22+
required: false
23+
type: string
24+
default: ${{ github.sha }}
25+
timeout:
26+
description: 'Job timeout in minutes'
27+
required: false
28+
type: number
29+
default: 90
30+
31+
jobs:
32+
test-aot:
33+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
34+
with:
35+
job-name: test-aot
36+
runner: ${{ inputs.runner }}
37+
docker-image: ${{ inputs.docker-image }}
38+
submodules: recursive
39+
ref: ${{ inputs.ref }}
40+
timeout: ${{ inputs.timeout }}
41+
script: |
42+
set -eux
43+
conda create -y -n cadence_test python=3.12 > /dev/null
44+
conda activate cadence_test
45+
46+
./install_requirements.sh > /dev/null
47+
pip install -e . --no-build-isolation > /dev/null
48+
pip install beartype later pyre_extensions pytest-xdist
49+
50+
python -m pytest backends/cadence/aot/tests/ -v -n auto
51+
52+
test-ops:
53+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
54+
with:
55+
job-name: test-ops
56+
runner: ${{ inputs.runner }}
57+
docker-image: ${{ inputs.docker-image }}
58+
submodules: recursive
59+
ref: ${{ inputs.ref }}
60+
timeout: ${{ inputs.timeout }}
61+
download-artifact: cadence-runner-build
62+
script: |
63+
set -eux
64+
conda create -y -n cadence_test python=3.12 > /dev/null
65+
conda activate cadence_test
66+
67+
./install_requirements.sh > /dev/null
68+
pip install -e . --no-build-isolation > /dev/null
69+
pip install beartype later pyre_extensions pytest-xdist
70+
71+
# Use the pre-built runner from the build job
72+
mkdir -p cmake-out/backends/cadence
73+
cp "${RUNNER_ARTIFACT_DIR}/cadence_runner" cmake-out/backends/cadence/cadence_runner
74+
chmod +x cmake-out/backends/cadence/cadence_runner
75+
76+
export PYTHONPATH="${PYTHONPATH:-}:$(pwd)/backends/cadence/utils/FACTO"
77+
python -m pytest examples/cadence/operators/ -v -n auto

.github/workflows/android-release-artifacts.yml

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -145,9 +145,9 @@ jobs:
145145
export BUILD_AAR_DIR=aar-out
146146
bash scripts/build_android_library.sh
147147
mkdir -p "${ARTIFACTS_DIR_NAME}"
148-
cp aar-out/executorch.aar "${ARTIFACTS_DIR_NAME}/executorch.aar"
148+
cp aar-out/executorch.aar "${ARTIFACTS_DIR_NAME}/executorch-${FLAVOR}.aar"
149149
150-
shasum -a 256 "${ARTIFACTS_DIR_NAME}/executorch.aar"
150+
shasum -a 256 "${ARTIFACTS_DIR_NAME}/executorch-${FLAVOR}.aar"
151151
152152
# Publish to maven staging
153153
UPLOAD_TO_MAVEN="${{ inputs.upload_to_maven }}"
@@ -165,18 +165,13 @@ jobs:
165165
contents: read
166166
steps:
167167
- name: configure aws credentials
168-
uses: aws-actions/configure-aws-credentials@v1.7.0
168+
uses: aws-actions/configure-aws-credentials@v4
169169
with:
170170
role-to-assume: arn:aws:iam::308535385114:role/gha_executorch_upload-frameworks-android
171171
aws-region: us-east-1
172172
- name: Upload AAR RC to AWS S3
173173
shell: bash
174174
run: |
175-
wget https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/executorch.aar
176-
shasum -a 256 executorch.aar > executorch.aar.sha256sums
177-
178-
pip install awscli==1.32.18
179-
AWS_CMD="aws s3 cp"
180175
VERSION="${{ inputs.version }}"
181176
FLAVOR="${{ inputs.flavor }}"
182177
if [ -z "$VERSION" ]; then
@@ -185,5 +180,11 @@ jobs:
185180
if [ -z "$FLAVOR" ]; then
186181
FLAVOR="xnnpack"
187182
fi
183+
wget https://gha-artifacts.s3.amazonaws.com/${{ github.repository }}/${{ github.run_id }}/artifacts/executorch-${FLAVOR}.aar
184+
mv executorch-${FLAVOR}.aar executorch.aar
185+
shasum -a 256 executorch.aar > executorch.aar.sha256sums
186+
187+
pip install awscli==1.32.18
188+
AWS_CMD="aws s3 cp"
188189
${AWS_CMD} executorch.aar s3://ossci-android/executorch/release/${VERSION}-${FLAVOR}/executorch.aar --acl public-read
189190
${AWS_CMD} executorch.aar.sha256sums s3://ossci-android/executorch/release/${VERSION}-${FLAVOR}/executorch.aar.sha256sums --acl public-read

0 commit comments

Comments
 (0)