Skip to content

Commit 2c46ed2

Browse files
authored
Merge branch 'main' into cuda-graph
2 parents 8fc7355 + e281726 commit 2c46ed2

159 files changed

Lines changed: 5708 additions & 1418 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.ci/scripts/test_cortex_m_e2e.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#!/usr/bin/env bash
22
# Copyright (c) Meta Platforms, Inc. and affiliates.
3+
# Copyright 2026 Arm Limited and/or its affiliates.
34
# All rights reserved.
45
#
56
# This source code is licensed under the BSD-style license found in the
@@ -18,7 +19,7 @@ mkdir -p "./cortex_m_e2e/${MODEL}"
1819
WORK_DIR=$(realpath "./cortex_m_e2e/${MODEL}")
1920

2021
echo "=== Exporting ${MODEL} with cortex-m55+int8 ==="
21-
python -m examples.arm.aot_arm_compiler \
22+
python -m backends.arm.scripts.aot_arm_compiler \
2223
-m "${MODEL}" \
2324
--target=cortex-m55+int8 \
2425
--quantize \
Lines changed: 29 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
# LICENSE file in the root directory of this source tree.
77

88
set -ex
9+
910
# shellcheck source=/dev/null
1011
source "$(dirname "${BASH_SOURCE[0]}")/utils.sh"
1112

@@ -50,21 +51,21 @@ PT2E_QUANTIZE="${PT2E_QUANTIZE:-}"
5051
# Default CMake Build Type to release mode
5152
CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE:-Release}
5253

53-
if [[ $# -lt 5 ]]; then # Assuming 4 mandatory args
54-
echo "Expecting atleast 5 positional arguments"
55-
echo "Usage: [...]"
56-
fi
5754
if [[ -z "${MODEL_NAME:-}" ]]; then
5855
echo "Missing model name, exiting..."
5956
exit 1
6057
fi
6158

62-
6359
if [[ -z "${MODE:-}" ]]; then
6460
echo "Missing mode, choose openvino or xnnpack, exiting..."
6561
exit 1
6662
fi
6763

64+
if [[ -z "${VIDEO_PATH:-}" ]]; then
65+
echo "Missing video path, exiting..."
66+
exit 1
67+
fi
68+
6869
if [[ -z "${PYTHON_EXECUTABLE:-}" ]]; then
6970
PYTHON_EXECUTABLE=python3
7071
fi
@@ -75,21 +76,13 @@ if [[ "${MODE}" =~ .*openvino.* ]]; then
7576
OPENVINO=ON
7677
TARGET_LIBS="$TARGET_LIBS openvino_backend "
7778

78-
git clone https://github.com/openvinotoolkit/openvino.git
79-
cd openvino && git b16b776ac119dafda51f69a80f1e6b7376d02c3b
80-
git submodule update --init --recursive
81-
sudo ./install_build_dependencies.sh
82-
mkdir build && cd build
83-
cmake .. -DCMAKE_BUILD_TYPE=Release -DENABLE_PYTHON=ON
84-
make -j$(nproc)
85-
86-
cd ..
87-
cmake --install build --prefix dist
88-
89-
source dist/setupvars.sh
90-
cd ../backends/openvino
91-
pip install -r requirements.txt
92-
cd ../../
79+
# Install specific OpenVINO runtime from pip.
80+
$PYTHON_EXECUTABLE -m pip install --pre openvino==2026.1.0.dev20260131 --extra-index-url https://storage.openvinotoolkit.org/simple/wheels/nightly
81+
$PYTHON_EXECUTABLE -m pip install -r backends/openvino/requirements.txt
82+
83+
# Set OPENVINO_LIB_PATH so the C++ demo runner can also find libopenvino_c.so.
84+
OPENVINO_LIB_PATH=$($PYTHON_EXECUTABLE -c "import openvino, os, glob; print(sorted(glob.glob(os.path.join(os.path.dirname(openvino.__file__), 'libs', 'libopenvino_c.so*')))[-1])")
85+
export OPENVINO_LIB_PATH
9386
else
9487
OPENVINO=OFF
9588
fi
@@ -103,9 +96,10 @@ fi
10396

10497
which "${PYTHON_EXECUTABLE}"
10598

99+
TORCH_URL=https://download.pytorch.org/whl/cpu
106100

107-
DIR="examples/models/yolo12"
108-
$PYTHON_EXECUTABLE -m pip install -r ${DIR}/requirements.txt
101+
DIR="examples/models/yolo26"
102+
$PYTHON_EXECUTABLE -m pip install --upgrade-strategy only-if-needed --extra-index-url "$TORCH_URL" -r ${DIR}/requirements.txt
109103

110104
cmake_install_executorch_libraries() {
111105
rm -rf cmake-out
@@ -142,11 +136,11 @@ cmake_install_executorch_libraries() {
142136

143137
echo $TARGET_LIBS
144138
export CMAKE_BUILD_ARGS="--target $TARGET_LIBS"
145-
pip install . --no-build-isolation
139+
$PYTHON_EXECUTABLE -m pip install . --no-build-isolation
146140
}
147141

148142
cmake_build_demo() {
149-
echo "Building yolo12 runner"
143+
echo "Building yolo26 runner"
150144
retry cmake \
151145
-DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \
152146
-DUSE_OPENVINO_BACKEND="$OPENVINO" \
@@ -174,24 +168,29 @@ prepare_artifacts_upload() {
174168

175169

176170
# Export model.
177-
EXPORTED_MODEL_NAME="${MODEL_NAME}_fp32_${MODE}.pte"
178-
echo "Exporting ${EXPORTED_MODEL_NAME}"
179171
EXPORT_ARGS="--model_name=${MODEL_NAME} --backend=${MODE}"
172+
if [[ -n "${PT2E_QUANTIZE}" ]]; then
173+
EXPORTED_MODEL_NAME="${MODEL_NAME}_int8_${MODE}.pte"
174+
EXPORT_ARGS="${EXPORT_ARGS} --quantize --video_path=${VIDEO_PATH}"
175+
else
176+
EXPORTED_MODEL_NAME="${MODEL_NAME}_fp32_${MODE}.pte"
177+
fi
178+
echo "Exporting ${EXPORTED_MODEL_NAME}"
180179

181180
# Add dynamically linked library location
182181
cmake_install_executorch_libraries
183182

184-
$PYTHON_EXECUTABLE -m examples.models.yolo12.export_and_validate ${EXPORT_ARGS}
183+
$PYTHON_EXECUTABLE -m examples.models.yolo26.export_and_validate ${EXPORT_ARGS}
185184

186185

187186
RUNTIME_ARGS="--model_path=${EXPORTED_MODEL_NAME} --input_path=${VIDEO_PATH}"
188187
# Check build tool.
189188
cmake_build_demo
190-
# Run yolo12 runner
189+
# Run yolo26 runner
191190
NOW=$(date +"%H:%M:%S")
192-
echo "Starting to run yolo12 runner at ${NOW}"
191+
echo "Starting to run yolo26 runner at ${NOW}"
193192
# shellcheck source=/dev/null
194-
cmake-out/examples/models/yolo12/Yolo12DetectionDemo ${RUNTIME_ARGS} > result.txt
193+
cmake-out/examples/models/yolo26/Yolo26DetectionDemo ${RUNTIME_ARGS} > result.txt
195194
NOW=$(date +"%H:%M:%S")
196195
echo "Finished at ${NOW}"
197196

.github/workflows/pull.yml

Lines changed: 87 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -674,6 +674,40 @@ jobs:
674674
build-tool: buck2
675675
docker-image: ci-image:executorch-ubuntu-22.04-clang12
676676

677+
test-qnn-buck-build-linux:
678+
name: test-qnn-buck-build-linux
679+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
680+
permissions:
681+
id-token: write
682+
contents: read
683+
strategy:
684+
fail-fast: false
685+
with:
686+
runner: linux.2xlarge
687+
docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
688+
submodules: 'recursive'
689+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
690+
timeout: 90
691+
script: |
692+
set -eux
693+
694+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
695+
conda activate "${CONDA_ENV}"
696+
697+
# Download QNN SDK and get the path
698+
QNN_SDK_ROOT=$(python3 backends/qualcomm/scripts/download_qnn_sdk.py --print-sdk-path)
699+
echo "QNN_SDK_ROOT=${QNN_SDK_ROOT}"
700+
701+
# Configure Buck to find the QNN SDK
702+
echo "[qualcomm]" >> .buckconfig
703+
echo " qnn_sdk_root = ${QNN_SDK_ROOT}" >> .buckconfig
704+
705+
# Setup buck2
706+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool buck2
707+
708+
# Build QNN backend with Buck
709+
buck2 build //backends/qualcomm/...
710+
677711
unittest-arm-backend-with-no-deps:
678712
name: unittest-arm-backend-with-no-deps
679713
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
@@ -823,6 +857,8 @@ jobs:
823857
id-token: write
824858
contents: read
825859
strategy:
860+
matrix:
861+
model: [mv2, mv3, dl3]
826862
fail-fast: false
827863
with:
828864
runner: linux.2xlarge
@@ -834,9 +870,58 @@ jobs:
834870
# The generic Linux job chooses to use base env, not the one setup by the image
835871
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
836872
conda activate "${CONDA_ENV}"
873+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake
874+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
875+
PYTHON_EXECUTABLE=python bash .ci/scripts/build-qnn-sdk.sh
876+
PYTHON_EXECUTABLE=python bash .ci/scripts/test_model.sh ${{ matrix.model }} "cmake" "qnn"
877+
878+
test-qnn-testsuite-linux:
879+
name: test-qnn-testsuite-linux
880+
permissions:
881+
id-token: write
882+
contents: read
883+
uses: ./.github/workflows/_test_backend.yml
884+
with:
885+
backend: qnn
886+
flows: '["qnn"]'
887+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
888+
timeout: 120
889+
run-linux: true
890+
runner-linux: linux.2xlarge
891+
892+
test-qnn-passes-linux:
893+
name: test-qnn-passes-linux
894+
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
895+
permissions:
896+
id-token: write
897+
contents: read
898+
strategy:
899+
fail-fast: false
900+
with:
901+
runner: linux.2xlarge
902+
docker-image: ci-image:executorch-ubuntu-22.04-qnn-sdk
903+
submodules: 'recursive'
904+
ref: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
905+
timeout: 30
906+
script: |
907+
# The generic Linux job chooses to use base env, not the one setup by the image
908+
CONDA_ENV=$(conda env list --json | jq -r ".envs | .[-1]")
909+
conda activate "${CONDA_ENV}"
910+
911+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-qnn-deps.sh
912+
# Source (not bash) so QNN_SDK_ROOT stays in the environment
913+
PYTHON_EXECUTABLE=python source .ci/scripts/build-qnn-sdk.sh
914+
915+
# Editable install so the PyQnnManagerAdaptor .so built by build-qnn-sdk.sh
916+
# is visible in the source tree (the _passes import chain pulls it in transitively)
917+
CMAKE_ARGS="-DEXECUTORCH_BUILD_QNN=ON -DQNN_SDK_ROOT=$QNN_SDK_ROOT -DEXECUTORCH_BUILD_EXTENSION_TENSOR=ON" \
918+
PYTHON_EXECUTABLE=python bash .ci/scripts/setup-linux.sh --build-tool cmake --editable true
919+
920+
# test_mha_to_sha imports oss_scripts/llama which transitively needs torchtune
921+
pip install -r requirements-examples.txt
837922
838-
# placeholder for running test_qnn_delegate.py, can use matrix such that we can trigger different jobs, refers to test-llama-runner-qnn-linux
839-
# reminder: make sure each job runs fast
923+
# Run QNN pass unit tests
924+
pytest -xvs backends/qualcomm/tests/test_passes.py
840925
841926
test-phi-3-mini-runner-linux:
842927
name: test-phi-3-mini-runner-linux

.lintrunner.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,7 @@ exclude_patterns = [
225225
'extension/llm/tokenizers/**',
226226
'backends/cadence/utils/FACTO',
227227
'examples/cuda',
228+
'examples/qualcomm',
228229
'kernels/portable',
229230
# File contains @generated
230231
'extension/llm/custom_ops/spinquant/fast_hadamard_transform_special.h',

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -204,7 +204,7 @@ ExecuTorch powers on-device AI at scale across Meta's family of apps, VR/AR devi
204204

205205
**Multimodal:** [Llava](examples/models/llava/README.md) (vision-language), [Voxtral](examples/models/voxtral/README.md) (audio-language), [Gemma](examples/models/gemma3) (vision-language)
206206

207-
**Vision/Speech:** [MobileNetV2](https://github.com/meta-pytorch/executorch-examples/tree/main/mv2), [DeepLabV3](https://github.com/meta-pytorch/executorch-examples/tree/main/dl3), [Whisper](examples/models/whisper/README.md) <!-- @lint-ignore -->
207+
**Vision/Speech:** [MobileNetV2](https://github.com/meta-pytorch/executorch-examples/tree/main/mv2), [DeepLabV3](https://github.com/meta-pytorch/executorch-examples/tree/main/dl3), [YOLO26](examples/models/yolo26/README.md), [Whisper](examples/models/whisper/README.md) <!-- @lint-ignore -->
208208

209209
**Resources:** [`examples/`](examples/) directory • [executorch-examples](https://github.com/meta-pytorch/executorch-examples) out-of-tree demos • [Optimum-ExecuTorch](https://github.com/huggingface/optimum-executorch) for HuggingFace models • [Unsloth](https://docs.unsloth.ai/new/deploy-llms-phone) for fine-tuned LLM deployment <!-- @lint-ignore -->
210210

backends/apple/metal/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ set(_aoti_metal_sources
4848
runtime/ops/op_linear_4bit.mm
4949
runtime/ops/op_mm.mm
5050
runtime/ops/op_sdpa.mm
51+
runtime/ops/op_topk.mm
5152
)
5253

5354
add_library(metal_backend STATIC ${_aoti_metal_sources})

backends/apple/metal/metal_backend.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ def get_supported_fallback_kernels(cls) -> Dict[str, Any]:
3636
"aoti_torch_mps_mm_out": None,
3737
"at::_ops::_scaled_dot_product_attention_math_for_mps::call": None,
3838
"torchao::_linear_fp_act_4bit_weight": None,
39+
"at::_ops::topk::call": None,
3940
}
4041

4142
@classmethod

backends/apple/metal/runtime/ops/op_sdpa.mm

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,8 @@
226226
#define INSTANTIATE_SDPA_VECTOR_HEADS(DTYPE) \
227227
INSTANTIATE_SDPA_VECTOR(DTYPE, 64, 64); \
228228
INSTANTIATE_SDPA_VECTOR(DTYPE, 96, 96); \
229-
INSTANTIATE_SDPA_VECTOR(DTYPE, 128, 128);
229+
INSTANTIATE_SDPA_VECTOR(DTYPE, 128, 128); \
230+
INSTANTIATE_SDPA_VECTOR(DTYPE, 256, 256);
230231
231232
INSTANTIATE_SDPA_VECTOR_HEADS(float);
232233
INSTANTIATE_SDPA_VECTOR_HEADS(bfloat);
@@ -430,11 +431,11 @@ AOTITorchError aoti_torch_mps__scaled_dot_product_attention_math_for_mps(
430431
throw std::runtime_error("Unsupported dtype for Metal SDPA kernel");
431432
}
432433

433-
// Select head_dim - must match exactly one of the supported sizes (64, 96, 128)
434+
// Select head_dim - must match exactly one of the supported sizes (64, 96, 128, 256)
434435
int64_t head_dim = headSize;
435-
if (head_dim != 64 && head_dim != 96 && head_dim != 128) {
436-
ET_LOG(Error, "aoti_torch_mps__scaled_dot_product_attention_math_for_mps: Unsupported head_dim %lld (must be 64, 96, or 128)", head_dim);
437-
throw std::runtime_error("Unsupported head_dim for Metal SDPA kernel - must be exactly 64, 96, or 128");
436+
if (head_dim != 64 && head_dim != 96 && head_dim != 128 && head_dim != 256) {
437+
ET_LOG(Error, "aoti_torch_mps__scaled_dot_product_attention_math_for_mps: Unsupported head_dim %lld (must be 64, 96, 128, or 256)", head_dim);
438+
throw std::runtime_error("Unsupported head_dim for Metal SDPA kernel - must be exactly 64, 96, 128, or 256");
438439
}
439440

440441
std::string kernel_name = "sdpa_vector_" + type_name + "_" + std::to_string(head_dim) + "_" + std::to_string(head_dim);

0 commit comments

Comments
 (0)