Skip to content

Commit 782d064

Browse files
committed
Zephyr: Add MobileNetV2 image classification sample with Ethos-U NPU
Add a new Zephyr sample that runs a quantized INT8 MobileNetV2 model on Arm Ethos-U NPU using ExecuTorch. The sample classifies a static 224x224x3 RGB test image into 1000 ImageNet classes and prints the top-5 predictions. Validated end-to-end on Alif Ensemble E8 DevKit (Cortex-M55 + Ethos-U55 256 MAC) achieving 19ms inference with 100% NPU delegation (110 ops). This addresses part of #17654 (Zephyr: Expand samples and documentation) by adding a second sample app (MV2) beyond the existing hello-executorch. Authored with assistance from Claude.
1 parent cb94506 commit 782d064

10 files changed

Lines changed: 10261 additions & 0 deletions

File tree

Lines changed: 249 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,249 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
# Copyright 2025-2026 Arm Limited and/or its affiliates.
4+
#
5+
# SPDX-License-Identifier: Apache-2.0
6+
7+
cmake_minimum_required(VERSION 3.24)
8+
9+
set(CMAKE_SKIP_INSTALL_RULES
10+
ON
11+
CACHE BOOL "" FORCE
12+
)
13+
14+
set(ET_PTE_FILE_PATH
15+
""
16+
CACHE FILEPATH "Path to the ExecuTorch .pte (or .bpte) model to embed"
17+
)
18+
set(ET_PTE_SECTION
19+
"network_model_sec"
20+
CACHE STRING "Section attribute used for the generated model data"
21+
)
22+
23+
if(NOT ET_PTE_FILE_PATH)
24+
message(
25+
FATAL_ERROR
26+
"ET_PTE_FILE_PATH must point to the ExecuTorch .pte (or .bpte) model to embed."
27+
)
28+
endif()
29+
30+
if(NOT IS_ABSOLUTE "${ET_PTE_FILE_PATH}")
31+
get_filename_component(
32+
ET_PTE_FILE_PATH "${ET_PTE_FILE_PATH}" ABSOLUTE BASE_DIR
33+
"${CMAKE_CURRENT_SOURCE_DIR}"
34+
)
35+
endif()
36+
37+
if(NOT EXISTS "${ET_PTE_FILE_PATH}")
38+
message(
39+
FATAL_ERROR
40+
"Could not find ExecuTorch model at ET_PTE_FILE_PATH: ${ET_PTE_FILE_PATH}"
41+
)
42+
endif()
43+
44+
set(ET_PTE_FILE_PATH
45+
"${ET_PTE_FILE_PATH}"
46+
CACHE FILEPATH "Path to the ExecuTorch .pte (or .bpte) model to embed"
47+
FORCE
48+
)
49+
50+
execute_process(
51+
COMMAND
52+
python "${CMAKE_CURRENT_LIST_DIR}/../../../codegen/tools/gen_oplist.py"
53+
--model_file_path=${ET_PTE_FILE_PATH}
54+
--output_path=${CMAKE_CURRENT_BINARY_DIR}/temp.yaml
55+
OUTPUT_VARIABLE CMD_RESULT
56+
)
57+
58+
if(CMD_RESULT MATCHES "aten::" OR CMD_RESULT MATCHES "dim_order_ops::")
59+
set(FOUND_OPS_IN_FILE "true")
60+
else()
61+
set(FOUND_OPS_IN_FILE "false")
62+
endif()
63+
64+
if(${FOUND_OPS_IN_FILE})
65+
set(EXECUTORCH_SELECT_OPS_LIST "")
66+
set(EXECUTORCH_SELECT_OPS_MODEL
67+
"${ET_PTE_FILE_PATH}"
68+
CACHE STRING "Select operators from this ExecuTorch model" FORCE
69+
)
70+
set(_EXECUTORCH_GEN_ZEPHYR_PORTABLE_OPS ON)
71+
message(
72+
"gen_oplist: EXECUTORCH_SELECT_OPS_MODEL=${ET_PTE_FILE_PATH} is used to auto generate ops from"
73+
)
74+
else()
75+
set(EXECUTORCH_SELECT_OPS_LIST "")
76+
set(EXECUTORCH_SELECT_OPS_MODEL "")
77+
set(_EXECUTORCH_GEN_ZEPHYR_PORTABLE_OPS OFF)
78+
message(
79+
"gen_oplist: No non delegated ops was found in ${ET_PTE_FILE_PATH} no ops added to build"
80+
)
81+
endif()
82+
83+
find_package(Zephyr REQUIRED HINTS $ENV{ZEPHYR_BASE})
84+
project(executorch_mv2_ethosu)
85+
86+
find_package(
87+
Python3
88+
COMPONENTS Interpreter
89+
REQUIRED
90+
)
91+
92+
set(CMAKE_CXX_FLAGS
93+
"${CMAKE_CXX_FLAGS} -Wall -Wno-switch -Wno-float-conversion -Wno-double-promotion -ffunction-sections -fdata-sections"
94+
)
95+
96+
if(NOT DEFINED EXECUTORCH_DIR)
97+
message(
98+
STATUS
99+
"ZEPHYR_EXECUTORCH_MODULE_DIR set to : ${ZEPHYR_EXECUTORCH_MODULE_DIR}"
100+
)
101+
if(DEFINED ZEPHYR_EXECUTORCH_MODULE_DIR)
102+
set(EXECUTORCH_DIR ${ZEPHYR_EXECUTORCH_MODULE_DIR})
103+
message(
104+
STATUS "Using Zephyr module discovery: EXECUTORCH_DIR=${EXECUTORCH_DIR}"
105+
)
106+
else()
107+
message(
108+
FATAL_ERROR
109+
"ExecuTorch module not found. Ensure it's properly configured in your Zephyr workspace."
110+
)
111+
endif()
112+
else()
113+
message(STATUS "Using predefined EXECUTORCH_DIR=${EXECUTORCH_DIR}")
114+
endif()
115+
116+
set(EXECUTORCH_ROOT ${EXECUTORCH_DIR})
117+
include(${EXECUTORCH_DIR}/tools/cmake/Utils.cmake)
118+
119+
if(NOT TARGET portable_kernels)
120+
set(EXECUTORCH_PORTABLE_BUILD_KERNELS_ONLY ON)
121+
add_subdirectory(
122+
${EXECUTORCH_DIR}/kernels/portable
123+
${CMAKE_CURRENT_BINARY_DIR}/executorch/kernels/portable
124+
)
125+
unset(EXECUTORCH_PORTABLE_BUILD_KERNELS_ONLY)
126+
endif()
127+
set(EXECUTORCH_OPS_LIB "")
128+
if(_EXECUTORCH_GEN_ZEPHYR_PORTABLE_OPS)
129+
include(${EXECUTORCH_DIR}/tools/cmake/Codegen.cmake)
130+
if(NOT DEFINED EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD)
131+
set(EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD "")
132+
endif()
133+
gen_selected_ops(
134+
LIB_NAME
135+
"cpu_portable_ops_lib"
136+
OPS_SCHEMA_YAML
137+
""
138+
ROOT_OPS
139+
"${EXECUTORCH_SELECT_OPS_LIST}"
140+
INCLUDE_ALL_OPS
141+
""
142+
OPS_FROM_MODEL
143+
"${EXECUTORCH_SELECT_OPS_MODEL}"
144+
DTYPE_SELECTIVE_BUILD
145+
"${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
146+
)
147+
generate_bindings_for_kernels(
148+
LIB_NAME "cpu_portable_ops_lib" FUNCTIONS_YAML
149+
${EXECUTORCH_DIR}/kernels/portable/functions.yaml DTYPE_SELECTIVE_BUILD
150+
"${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
151+
)
152+
gen_operators_lib(
153+
LIB_NAME
154+
"cpu_portable_ops_lib"
155+
KERNEL_LIBS
156+
portable_kernels
157+
DEPS
158+
executorch
159+
DTYPE_SELECTIVE_BUILD
160+
"${EXECUTORCH_ENABLE_DTYPE_SELECTIVE_BUILD}"
161+
)
162+
set(EXECUTORCH_OPS_LIB "cpu_portable_ops_lib")
163+
endif()
164+
165+
set(_local_flatcc_root ${CMAKE_BINARY_DIR}/flatcc_src)
166+
if(NOT EXISTS ${_local_flatcc_root}/CMakeLists.txt)
167+
file(MAKE_DIRECTORY ${_local_flatcc_root})
168+
execute_process(
169+
COMMAND ${CMAKE_COMMAND} -E copy_directory
170+
${EXECUTORCH_DIR}/third-party/flatcc ${_local_flatcc_root}
171+
)
172+
endif()
173+
set(EXECUTORCH_FLATCC_SOURCE_ROOT
174+
${_local_flatcc_root}
175+
CACHE PATH "" FORCE
176+
)
177+
set(EXECUTORCH_FLATCC_INSTALL_ROOT
178+
${_local_flatcc_root}
179+
CACHE PATH "" FORCE
180+
)
181+
182+
set(app_sources
183+
src/main.cpp
184+
${EXECUTORCH_DIR}/examples/arm/executor_runner/arm_memory_allocator.cpp
185+
)
186+
target_sources(app PRIVATE ${app_sources})
187+
188+
set(_model_pte_header ${CMAKE_CURRENT_BINARY_DIR}/model_pte.h)
189+
add_custom_command(
190+
OUTPUT ${_model_pte_header}
191+
COMMAND
192+
${Python3_EXECUTABLE}
193+
${EXECUTORCH_DIR}/examples/arm/executor_runner/pte_to_header.py --pte
194+
${ET_PTE_FILE_PATH} --outdir ${CMAKE_CURRENT_BINARY_DIR} --section
195+
${ET_PTE_SECTION}
196+
DEPENDS ${ET_PTE_FILE_PATH}
197+
${EXECUTORCH_DIR}/examples/arm/executor_runner/pte_to_header.py
198+
COMMENT "Converting ${ET_PTE_FILE_PATH} to model_pte.h"
199+
)
200+
add_custom_target(gen_model_header DEPENDS ${_model_pte_header})
201+
add_dependencies(app gen_model_header)
202+
203+
if(DEFINED CONFIG_EXECUTORCH_METHOD_ALLOCATOR_POOL_SIZE)
204+
target_compile_definitions(
205+
app
206+
PRIVATE
207+
ET_ARM_METHOD_ALLOCATOR_POOL_SIZE=${CONFIG_EXECUTORCH_METHOD_ALLOCATOR_POOL_SIZE}
208+
)
209+
endif()
210+
if(DEFINED CONFIG_EXECUTORCH_TEMP_ALLOCATOR_POOL_SIZE)
211+
target_compile_definitions(
212+
app
213+
PRIVATE
214+
ET_ARM_BAREMETAL_SCRATCH_TEMP_ALLOCATOR_POOL_SIZE=${CONFIG_EXECUTORCH_TEMP_ALLOCATOR_POOL_SIZE}
215+
)
216+
endif()
217+
218+
target_link_libraries(app PRIVATE libexecutorch)
219+
if(EXECUTORCH_OPS_LIB)
220+
target_link_libraries(app PRIVATE ${EXECUTORCH_OPS_LIB})
221+
endif()
222+
if(CONFIG_CPU_CORTEX_M)
223+
if(TARGET cortex_m_ops_lib)
224+
target_link_libraries(app PRIVATE cortex_m_ops_lib)
225+
endif()
226+
if(TARGET cortex_m_kernels)
227+
executorch_target_link_options_shared_lib(cortex_m_kernels)
228+
target_link_libraries(app PRIVATE cortex_m_kernels)
229+
endif()
230+
endif()
231+
if(TARGET quantized_kernels)
232+
executorch_target_link_options_shared_lib(quantized_kernels)
233+
target_link_libraries(app PRIVATE quantized_kernels)
234+
endif()
235+
if(TARGET portable_kernels)
236+
executorch_target_link_options_shared_lib(portable_kernels)
237+
target_link_libraries(app PRIVATE portable_kernels)
238+
endif()
239+
if(TARGET executorch_delegate_ethos_u)
240+
executorch_target_link_options_shared_lib(executorch_delegate_ethos_u)
241+
target_link_libraries(app PRIVATE executorch_delegate_ethos_u)
242+
endif()
243+
if(TARGET ethosu_core_driver)
244+
target_link_libraries(app PRIVATE ethosu_core_driver)
245+
endif()
246+
247+
target_include_directories(app PRIVATE src ${CMAKE_CURRENT_BINARY_DIR})
248+
get_target_property(OUT app LINK_LIBRARIES)
249+
message(STATUS ${OUT})

zephyr/samples/mv2-ethosu/Kconfig

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# Copyright (c) Meta Platforms, Inc. and affiliates.
2+
# All rights reserved.
3+
# Copyright 2025-2026 Arm Limited and/or its affiliates.
4+
#
5+
# SPDX-License-Identifier: Apache-2.0
6+
7+
source "Kconfig.zephyr"
8+
9+
menu "ExecuTorch MobileNetV2 sample configuration"
10+
11+
config EXECUTORCH_METHOD_ALLOCATOR_POOL_SIZE
12+
int "Method allocator pool size in bytes"
13+
default 1572864
14+
depends on EXECUTORCH
15+
help
16+
Size of the method allocator pool in bytes. MobileNetV2 requires
17+
more memory than simple models. Default is 1.5MB which is sufficient
18+
for a fully NPU-delegated INT8 MobileNetV2 model.
19+
20+
config EXECUTORCH_TEMP_ALLOCATOR_POOL_SIZE
21+
int "Temporary allocator pool size in bytes"
22+
default 1572864
23+
depends on EXECUTORCH
24+
help
25+
Size of the temporary allocator pool in bytes. Default is 1.5MB
26+
which provides sufficient scratch space for MobileNetV2 inference
27+
on Ethos-U NPU.
28+
29+
endmenu
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
# MobileNetV2 Image Classification with Ethos-U NPU
2+
3+
This sample demonstrates running a quantized MobileNetV2 image classification
4+
model on the Arm Ethos-U NPU using ExecuTorch within a Zephyr RTOS application.
5+
6+
The model classifies a static 224x224x3 RGB test image into one of 1000
7+
ImageNet classes and prints the top-5 predictions.
8+
9+
## Prerequisites
10+
11+
- Zephyr SDK with ExecuTorch module enabled
12+
- Python 3.10+ with ExecuTorch, torchvision, and ethos-u-vela installed
13+
- A board with Arm Ethos-U NPU (e.g., Corstone-300 FVP, Alif E7/E8 DevKit)
14+
15+
## Export the model
16+
17+
Export a quantized INT8 MobileNetV2 model with Ethos-U delegation:
18+
19+
```bash
20+
python -m executorch.backends.arm.scripts.aot_arm_compiler \
21+
--model_name=mv2_untrained \
22+
--quantize \
23+
--delegate \
24+
--target=ethos-u55-128 \
25+
--output=mv2_ethosu.pte
26+
```
27+
28+
For boards with Ethos-U55-256 (e.g., Alif E8 HP core), use `--target=ethos-u55-256`.
29+
30+
## Build
31+
32+
### Corstone-300 FVP
33+
34+
```bash
35+
west build -b mps3/corstone300/fvp \
36+
modules/lib/executorch/zephyr/samples/mv2-ethosu \
37+
-t run -- \
38+
-DET_PTE_FILE_PATH=mv2_ethosu.pte
39+
```
40+
41+
### Alif Ensemble E8 DevKit
42+
43+
```bash
44+
west build -b alif_e8_dk/ae822fa0e5597xx0/rtss_hp \
45+
-S ethos-u55-enable \
46+
modules/lib/executorch/zephyr/samples/mv2-ethosu -- \
47+
-DET_PTE_FILE_PATH=mv2_ethosu.pte
48+
```
49+
50+
## Expected output
51+
52+
```
53+
========================================
54+
ExecuTorch MobileNetV2 Classification Demo
55+
========================================
56+
57+
Ethos-U backend registered successfully
58+
Model loaded, has 1 methods
59+
Inference completed in <N> ms
60+
61+
--- Classification Results ---
62+
Top-5 predictions:
63+
[1] class <id>: <score>
64+
[2] class <id>: <score>
65+
...
66+
67+
MobileNetV2 Demo Complete
68+
Inference time: <N> ms
69+
========================================
70+
```
71+
72+
When using `mv2_untrained`, the output class IDs will be arbitrary since the
73+
model has no trained weights. Use `mv2` (requires torchvision pretrained
74+
weights) for meaningful predictions.
75+
76+
## Memory requirements
77+
78+
The default configuration allocates 1.5 MB each for the method and temporary
79+
allocator pools. These defaults are sufficient for a fully NPU-delegated INT8
80+
MobileNetV2. Adjust `CONFIG_EXECUTORCH_METHOD_ALLOCATOR_POOL_SIZE` and
81+
`CONFIG_EXECUTORCH_TEMP_ALLOCATOR_POOL_SIZE` in `prj.conf` or via board-specific
82+
overlay files for different model configurations.
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Copyright 2026 Arm Limited and/or its affiliates.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Enable the Zephyr Ethos-U driver so executorch_delegate_ethos_u can reserve
6+
# and use the hardware instance exposed by the board DTS.
7+
CONFIG_ETHOS_U=y
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
/* Copyright 2026 Arm Limited and/or its affiliates.
2+
*
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
/* Ensure ExecuTorch scratch buffers live in shared SRAM that the Ethos-U
7+
* DMA can access. The default board DTS routes Zephyr's general-purpose
8+
* SRAM to DTCM, which the NPU cannot reach. Override the choice so that
9+
* .data/.bss land in ISRAM (0x3100_0000) instead.
10+
*/
11+
/ {
12+
chosen {
13+
zephyr,sram = &isram;
14+
};
15+
};
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Copyright 2026 Arm Limited and/or its affiliates.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
#
5+
# Enable the Zephyr Ethos-U driver so executorch_delegate_ethos_u can reserve
6+
# and use the hardware instance exposed by the board DTS.
7+
CONFIG_ETHOS_U=y

0 commit comments

Comments
 (0)