Skip to content

Commit 13b7ddc

Browse files
authored
Merge branch 'main' into aot_pre
2 parents be42e3b + e95555a commit 13b7ddc

27 files changed

Lines changed: 1420 additions & 253 deletions

.ci/scripts/wheel/pre_build_script.sh

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -44,16 +44,3 @@ fi
4444
# able to see the installed torch package.
4545

4646
"${GITHUB_WORKSPACE}/${REPOSITORY}/install_requirements.sh" --example
47-
48-
# Download Qualcomm QNN SDK on Linux x86_64 so the wheel build can include the
49-
# QNN backend. The SDK is large, so we download it here (outside CMake) rather
50-
# than during cmake configure.
51-
if [[ "$(uname -s)" == "Linux" && "$(uname -m)" == "x86_64" ]]; then
52-
echo "Downloading Qualcomm QNN SDK..."
53-
QNN_SDK_ROOT=$(python3 \
54-
"${GITHUB_WORKSPACE}/${REPOSITORY}/backends/qualcomm/scripts/download_qnn_sdk.py" \
55-
--print-sdk-path)
56-
export QNN_SDK_ROOT
57-
echo "QNN_SDK_ROOT=${QNN_SDK_ROOT}" >> "${GITHUB_ENV}"
58-
echo "QNN SDK downloaded to ${QNN_SDK_ROOT}"
59-
fi

.ci/scripts/wheel/test_linux.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,25 +5,10 @@
55
# This source code is licensed under the BSD-style license found in the
66
# LICENSE file in the root directory of this source tree.
77

8-
import platform
9-
108
import test_base
119
from examples.models import Backend, Model
1210

1311
if __name__ == "__main__":
14-
# On Linux x86_64 the wheel is built with the Qualcomm backend.
15-
# Verify that it was registered correctly.
16-
if platform.system() == "Linux" and platform.machine() in ("x86_64", "amd64"):
17-
from executorch.extension.pybindings.portable_lib import (
18-
_get_registered_backend_names,
19-
)
20-
21-
registered = _get_registered_backend_names()
22-
assert (
23-
"QnnBackend" in registered
24-
), f"QnnBackend not found in registered backends: {registered}"
25-
print("✓ QnnBackend is registered")
26-
2712
test_base.run_tests(
2813
model_tests=[
2914
test_base.ModelTest(

.github/workflows/pull.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ jobs:
1616
test-qnn-wheel-packages-linux:
1717
name: test-qnn-wheel-packages-linux
1818
uses: pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
19+
if: false
1920
permissions:
2021
id-token: write
2122
contents: read

CMakePresets.json

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -290,13 +290,8 @@
290290
"name": "arm-ethosu-linux",
291291
"displayName": "Build ExecuTorch for Arm Ethos-U Linux",
292292
"inherits": ["common"],
293-
"description": "musl declares __assert_fail with int for line; avoid NDEBUG forward-decl mismatch in Release builds",
294293
"cacheVariables": {
295-
"EXECUTORCH_BUILD_ARM_ETHOSU_LINUX": "ON",
296-
"EXECUTORCH_BUILD_EXECUTOR_RUNNER": "ON",
297-
"EXECUTORCH_BUILD_KERNELS_QUANTIZED": "ON",
298-
"CMAKE_C_FLAGS_RELEASE": "-UNDEBUG",
299-
"CMAKE_CXX_FLAGS_RELEASE": "-UNDEBUG",
294+
"EXECUTORCH_BUILD_PRESET_FILE": "${sourceDir}/tools/cmake/preset/arm_ethosu_linux.cmake",
300295
"CMAKE_TOOLCHAIN_FILE": "${sourceDir}/examples/arm/ethos-u-setup/aarch64-linux-musl-toolchain.cmake"
301296
}
302297
}

backends/arm/README.md

Lines changed: 83 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -76,27 +76,104 @@ The Arm backend can be built using the following command:
7676
./install_executorch.sh
7777
```
7878

79-
One of the following commands should also be run once to gather the necessary dependencies for your chosen target(s):
79+
**NOTE:** While developing, it can be convenient to use `./install_executorch.sh --editable`, which creates an editable installation of ExecuTorch.
8080

81-
For the Ethos-U target:
81+
### Target-specific setup and build
82+
83+
Pick one of the target flows below. Each flow has a one-time setup step and a build command.
84+
85+
### Baremetal (Ethos-U) workflow
86+
87+
Builds ExecuTorch runtime libraries for Cortex-M with Ethos-U acceleration.
88+
89+
Setup:
8290

8391
```
8492
./examples/arm/setup.sh --i-agree-to-the-contained-eula
8593
```
8694

87-
For the VGF target:
95+
Build:
96+
97+
```
98+
./backends/arm/scripts/build_executorch.sh
99+
```
100+
101+
### VGF (Vulkan ML extensions) workflow
102+
103+
Setup:
88104

89105
```
90106
./examples/arm/setup.sh --disable-ethos-u-deps --enable-mlsdk-deps
91107
```
92108

93-
For both Ethos-U & VGF targets:
109+
The current flow lowers to TOSA and converts to VGF for use in external projects,
110+
so the `executor_runner` is not typically used here.
111+
112+
### Direct Drive (experimental, Ethos-U85 on Linux) workflow
113+
114+
Direct Drive enables execution on Ethos-U85 via the Linux driver stack.
115+
116+
Driver stack (Linux) and API:
117+
118+
```
119+
https://gitlab.arm.com/artificial-intelligence/ethos-u/ethos-u-linux-driver-stack
120+
```
121+
122+
An FVP with Linux is available for Direct Drive, but it must be built and run
123+
manually. See:
94124

95125
```
96-
./examples/arm/setup.sh --i-agree-to-the-contained-eula --enable-mlsdk-deps
126+
https://corstone1000.docs.arm.com/en/corstone1000-2025.12/
97127
```
98128

99-
**NOTE:** While developing, it can be convenient to use`./install_executorch.sh --editable`, which creates an editable installation of ExecuTorch.
129+
Setup:
130+
131+
```
132+
./examples/arm/setup.sh --i-agree-to-the-contained-eula --target-toolchain linux-musl
133+
source ./examples/arm/arm-scratch/setup_path.sh
134+
```
135+
136+
Build:
137+
138+
```
139+
./backends/arm/scripts/build_executorch.sh \
140+
--toolchain=aarch64-linux-musl-gcc \
141+
--build_type=Debug
142+
```
143+
144+
Note: setup selects the linux-musl toolchain; build uses the aarch64-linux-musl GCC toolchain name.
145+
146+
If your Yocto image enables the dropbear SSH server, you can copy the
147+
`executor_runner` binary into the running FVP via scp:
148+
149+
```
150+
scp -P 2222 arm_test/cmake-out/executor_runner root@127.0.0.1:/tmp/
151+
```
152+
153+
#### Direct Drive model (PTE) workflow
154+
155+
Create a PTE file:
156+
157+
```
158+
python3 -m examples.arm.aot_arm_compiler \
159+
--model_name examples/arm/example_modules/add.py \
160+
--delegate \
161+
--quantize \
162+
--target ethos-u85-256 \
163+
--direct_drive
164+
```
165+
166+
Copy the `executor_runner` binary and the generated PTE file to the running FVP:
167+
168+
```
169+
scp -P 2222 arm_test/cmake-out/executor_runner add_arm_delegate_ethos-u85-256.pte root@127.0.0.1:/tmp/
170+
```
171+
172+
Run the model on the FVP:
173+
174+
```
175+
ssh -p 2222 root@127.0.0.1 -t "/tmp/executor_runner -model_path /tmp/add_arm_delegate_ethos-u85-256.pte -num_executions 1"
176+
```
100177

101178
## Testing
102179

backends/arm/runtime/EthosUBackend_Cortex_A.cpp

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -347,19 +347,13 @@ Error platform_execute(
347347
int output_count,
348348
Span<executorch::runtime::EValue*> args,
349349
char* /*ethosu_scratch*/) {
350-
std::vector<size_t> input_copy_sizes;
351-
std::vector<const char*> linux_input_ptrs;
352-
if (input_count > 0) {
353-
input_copy_sizes.resize(input_count, 0);
354-
linux_input_ptrs.resize(input_count, nullptr);
355-
}
350+
std::vector<size_t> input_copy_sizes(input_count, 0);
351+
std::vector<const char*> linux_input_ptrs(input_count, nullptr);
356352

357-
std::vector<size_t> output_io_bytes;
358-
std::vector<char*> linux_output_ptrs;
359-
if (output_count > 0) {
360-
output_io_bytes.resize(output_count, 0);
361-
linux_output_ptrs.resize(output_count, nullptr);
362-
}
353+
std::vector<size_t> output_io_bytes(output_count, 0);
354+
std::vector<char*> linux_output_ptrs(output_count, nullptr);
355+
std::vector<std::vector<char>> output_scratch_buffers(output_count);
356+
std::vector<bool> output_needs_adjustment(output_count, false);
363357

364358
for (int i = 0; i < input_count; ++i) {
365359
auto tensor_in = args[i]->toTensor();
@@ -380,16 +374,12 @@ Error platform_execute(
380374
const size_t tensor_nbytes = tensor_out.nbytes();
381375
if (i < static_cast<int>(output_io_bytes.size()) &&
382376
output_io_bytes[i] != tensor_nbytes) {
383-
ET_LOG(
384-
Error,
385-
"Ethos-U Linux backend output size mismatch for index %d: "
386-
"driver IO bytes = %zu, tensor bytes = %zu",
387-
i,
388-
output_io_bytes[i],
389-
tensor_nbytes);
390-
return Error::InvalidState;
377+
output_scratch_buffers[i].resize(output_io_bytes[i]);
378+
linux_output_ptrs[i] = output_scratch_buffers[i].data();
379+
output_needs_adjustment[i] = true;
380+
} else {
381+
linux_output_ptrs[i] = tensor_out.mutable_data_ptr<char>();
391382
}
392-
linux_output_ptrs[i] = tensor_out.mutable_data_ptr<char>();
393383
}
394384
}
395385

@@ -399,13 +389,37 @@ Error platform_execute(
399389
return Error::InvalidState;
400390
}
401391

402-
return invoke_linux_driver(
392+
Error status = invoke_linux_driver(
403393
handles,
404394
linux_input_ptrs,
405395
linux_output_ptrs,
406396
input_copy_sizes,
407397
output_io_bytes,
408398
state->options);
399+
if (status != Error::Ok) {
400+
return status;
401+
}
402+
403+
if (handles.outputs != nullptr) {
404+
for (int i = 0; i < output_count; ++i) {
405+
if (!output_needs_adjustment[i]) {
406+
continue;
407+
}
408+
auto tensor_out = args[input_count + i]->toTensor();
409+
const size_t tensor_nbytes = tensor_out.nbytes();
410+
Error adjust_status = copy_with_layout_adjustment(
411+
handles.outputs->io[i],
412+
i,
413+
output_scratch_buffers[i].data(),
414+
tensor_out,
415+
tensor_nbytes);
416+
if (adjust_status != Error::Ok) {
417+
return adjust_status;
418+
}
419+
}
420+
}
421+
422+
return Error::Ok;
409423
}
410424

411425
} // namespace arm

backends/qualcomm/CMakeLists.txt

Lines changed: 4 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -23,19 +23,9 @@ get_filename_component(
2323
_common_include_directories "${EXECUTORCH_SOURCE_DIR}/.." ABSOLUTE
2424
)
2525

26-
# If QNN_SDK_ROOT was not passed as a CMake variable, fall back to the
27-
# environment variable. Prefer downloading the SDK *outside* of CMake (e.g. via
28-
# backends/qualcomm/scripts/download_qnn_sdk.py) and passing the path in.
29-
if(NOT DEFINED QNN_SDK_ROOT AND DEFINED ENV{QNN_SDK_ROOT})
30-
set(QNN_SDK_ROOT
31-
$ENV{QNN_SDK_ROOT}
32-
CACHE PATH "Qualcomm SDK root directory" FORCE
33-
)
34-
endif()
35-
36-
# Last-resort fallback: download during cmake configure when building wheels and
37-
# QNN_SDK_ROOT was not provided externally.
38-
if(NOT DEFINED QNN_SDK_ROOT AND EXECUTORCH_BUILD_WHEEL_DO_NOT_USE)
26+
# We only download QNN SDK when we build pip wheel for ExecuTorch. Please don't
27+
# change this code unless you know what you are doing.
28+
if(EXECUTORCH_BUILD_WHEEL_DO_NOT_USE)
3929
set(_qnn_default_sdk_dir "${CMAKE_CURRENT_BINARY_DIR}/sdk/qnn")
4030

4131
if(EXISTS "${_qnn_default_sdk_dir}" AND EXISTS "${_qnn_default_sdk_dir}/lib")
@@ -45,7 +35,7 @@ if(NOT DEFINED QNN_SDK_ROOT AND EXECUTORCH_BUILD_WHEEL_DO_NOT_USE)
4535
CACHE PATH "Qualcomm SDK root directory" FORCE
4636
)
4737
else()
48-
message(STATUS "Downloading Qualcomm SDK (fallback)")
38+
message(STATUS "Downloading Qualcomm SDK")
4939
execute_process(
5040
COMMAND
5141
${PYTHON_EXECUTABLE}

0 commit comments

Comments
 (0)