Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 11 additions & 3 deletions backends/arm/runtime/EthosUBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,12 @@
#include <executorch/runtime/core/exec_aten/util/dim_order_util.h>
#include <executorch/runtime/core/exec_aten/util/scalar_type_util.h>

// Overridable memcpy used by the EthosU backend for input/output scratch
// shuffling. Default (weak) implementation in EthosUBackend_IoMemcpy.cpp does
// std::memcpy. Firmware targets can supply a strong override (e.g. routing
// through a DMA engine) to reduce CPU memcpy load on the host MCU.
extern "C" void arm_ethos_io_memcpy(void* dst, const void* src, size_t size);

using namespace std;

using executorch::aten::ScalarType;
Expand Down Expand Up @@ -237,8 +243,9 @@ class EthosUBackend final : public ::executorch::runtime::BackendInterface {
if (both_char || both_int || both_short || both_bool) {
EXECUTORCH_PROF_SCOPE(
event_tracer, "+EthosUBackend::execute()handles.input.memcpy()");
// Sizes match and elt size matches so memcpy
memcpy(
// Sizes match and elt size matches so memcpy.
// Routed through arm_ethos_io_memcpy so firmware can DMA-accelerate.
arm_ethos_io_memcpy(
scratch_addr,
tensor_in.mutable_data_ptr<char>(),
tensor_in.nbytes());
Expand Down Expand Up @@ -389,7 +396,8 @@ Error copy_with_layout_adjustment(
}
const char* src_bytes = src;
for (size_t chunk_idx = 0; chunk_idx < chunk_count; ++chunk_idx) {
memcpy(dest, src_bytes, chunk_size);
// Routed through arm_ethos_io_memcpy so firmware can DMA-accelerate.
arm_ethos_io_memcpy(dest, src_bytes, chunk_size);
src_bytes += vela_chunk_size;
dest += chunk_size;
}
Expand Down
9 changes: 8 additions & 1 deletion backends/arm/runtime/EthosUBackend_Cortex_M.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,12 @@ extern "C" __attribute__((weak)) struct ethosu_driver* ethosu_reserve_driver_ex(
return ethosu_reserve_driver();
}

// Overridable memcpy used by the EthosU backend for output scratch
// shuffling. Default (weak) implementation in EthosUBackend_IoMemcpy.cpp does
// std::memcpy. Firmware targets can supply a strong override (e.g. routing
// through a DMA engine) to reduce CPU memcpy load on the host MCU.
extern "C" void arm_ethos_io_memcpy(void* dst, const void* src, size_t size);

namespace executorch {
namespace backends {
namespace arm {
Expand Down Expand Up @@ -133,7 +139,8 @@ Error platform_execute(
}
io_bytes_total += tensor_bytes;
} else {
memcpy(
// Routed through arm_ethos_io_memcpy so firmware can DMA-accelerate.
arm_ethos_io_memcpy(
tensor_out.mutable_data_ptr<char>(),
static_cast<const char*>(output_addr),
tensor_bytes);
Expand Down
19 changes: 19 additions & 0 deletions backends/arm/runtime/EthosUBackend_IoMemcpy.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
/*
* Copyright 2026 Arm Limited and/or its affiliates.
*
* This source code is licensed under the BSD-style license found in the
* LICENSE file in the root directory of this source tree.
*/

#include <cstddef>
#include <cstring>

// Weak default for arm_ethos_io_memcpy. Firmware targets can provide a
// strong-symbol override (e.g. routing through DMA on Cortex-M55) without
// touching the upstream EthosUBackend code. Lives in its own translation
// unit so the compiler in the call-site TUs cannot inline this body and
// bypass the link-time override (same trick as bolt_arm_memcpy_external).
extern "C" __attribute__((weak)) void
arm_ethos_io_memcpy(void* dst, const void* src, size_t size) {
std::memcpy(dst, src, size);
}
1 change: 1 addition & 0 deletions backends/arm/runtime/targets.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ def define_common_targets():
srcs = [
"EthosUBackend.cpp",
"EthosUBackend_Cortex_M.cpp",
"EthosUBackend_IoMemcpy.cpp",
],
headers = ["EthosUBackend_Internal.h"],
compatible_with = ["ovr_config//cpu:arm32-embedded", "ovr_config//cpu:arm32-embedded-fpu"],
Expand Down
Loading