Skip to content

Commit 884c161

Browse files
committed
Qualcomm AI Engine Direct - LPAI Direct Mode Support
1 parent 4ac044b commit 884c161

29 files changed

Lines changed: 386 additions & 123 deletions

.claude/skills/qualcomm/SKILL.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ Use `backends/qualcomm/scripts/build.sh`. Linux only (macOS not supported).
3131
|---|---|---|
3232
| x86_64 (Python interface + host tools) | enabled | `build-x86/` |
3333
| Android arm64-v8a (device runner) | enabled | `build-android/` |
34-
| Hexagon DSP (direct mode) | disabled | `build-hexagon/` |
34+
| Direct mode (LPAI ADSP or Hexagon CDSP) | disabled | `build-direct/` |
3535
| OE Linux embedded | disabled | `build-oe-linux/` |
3636

3737
**Common build commands:**

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ cmake-out*
1818
cmake-out-android/
1919
build-android/
2020
build-x86/
21-
build-hexagon/
21+
build-direct/
2222
dist/
2323
arm-scratch/
2424
executorch.egg-info

backends/qualcomm/CMakeLists.txt

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -248,21 +248,31 @@ target_link_libraries(
248248

249249
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES Hexagon)
250250
# Add macro here so we can dlopen the correct .so library.
251-
if(DSP_TYPE STREQUAL "3")
252-
string(TOUPPER ${DSP_VERSION} CAPITAL_DSP_VERSION)
253-
set(HEXAGON_LIB "libQnnHtp${CAPITAL_DSP_VERSION}.so")
251+
if(DSP_TYPE STREQUAL "0")
252+
message(
253+
STATUS
254+
"aDSP direct and non-direct mode uses shared libraries under different folders but have the same name, skipping HEXAGON_LIB override."
255+
)
256+
# Just random string here since QnnBackendUnifiedRegistry.h will ignore this
257+
# macro for aDSP case.
258+
add_compile_definitions(HEXAGON_LIB="")
259+
elseif(DSP_TYPE STREQUAL "3")
260+
string(TOUPPER ${CDSP_VERSION} CAPITAL_CDSP_VERSION)
261+
set(HEXAGON_LIB "libQnnHtp${CAPITAL_CDSP_VERSION}.so")
254262
add_compile_definitions(HEXAGON_LIB="${HEXAGON_LIB}")
255263
message(STATUS "For hexagon build, using HTP Library: ${HEXAGON_LIB}")
256264
else()
257265
message(FATAL_ERROR "Unknown DSP_TYPE ${DSP_TYPE}")
258266
endif()
259267

268+
# aDSP will also use cDSP c/c++ library since aDSP does not have these
269+
# libraries.
260270
target_link_libraries(
261271
qnn_executorch_backend
262272
PRIVATE
263-
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${DSP_VERSION}/G0/pic/libc.so
264-
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${DSP_VERSION}/G0/pic/libc++.so.1
265-
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${DSP_VERSION}/G0/pic/libc++abi.so.1
273+
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${CDSP_VERSION}/G0/pic/libc.so
274+
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${CDSP_VERSION}/G0/pic/libc++.so.1
275+
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${CDSP_VERSION}/G0/pic/libc++abi.so.1
266276
)
267277
endif()
268278

backends/qualcomm/aot/wrappers/TensorWrapper.cpp

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include <executorch/backends/qualcomm/aot/wrappers/TensorWrapper.h>
10+
#include <executorch/backends/qualcomm/runtime/backends/QnnCustomProtocol.h>
1011

1112
#include <atomic>
1213
#include <cstring>
@@ -115,19 +116,24 @@ TensorWrapper::TensorWrapper(
115116
}
116117
}
117118

118-
Error TensorWrapper::FillDataBuffer(const void* data, bool copy_data) {
119+
Error TensorWrapper::FillDataBuffer(const void* data) {
119120
if (data != nullptr) {
120121
QNN_TENSOR_VER_PTR(tensor_)->memType = QNN_TENSORMEMTYPE_RAW;
122+
#ifdef __hexagon__
123+
// data's address is already aligned in idl skel implementation. e.g.
124+
// QnnExecuTorchIdlWrapper.cpp Here, we are ensuring we pass data size that
125+
// is also multiple of 64. QnnExecuTorchIdlWrapper.cpp should have created
126+
// sufficient space for tensor.
127+
auto align_size = [](size_t alignment, size_t sz) {
128+
return (sz + (alignment - 1)) & ~(alignment - 1);
129+
};
130+
QNN_TENSOR_VER_PTR(tensor_)->clientBuf.dataSize =
131+
align_size(QNN_TENSOR_ALIGNMENT, bytes_);
132+
#else
121133
QNN_TENSOR_VER_PTR(tensor_)->clientBuf.dataSize = bytes_;
122-
if (copy_data) {
123-
owned_data_ = std::make_unique<char[]>(bytes_);
124-
const char* src_data = static_cast<const char*>(data);
125-
std::memcpy(owned_data_.get(), src_data, bytes_);
126-
QNN_TENSOR_VER_PTR(tensor_)->clientBuf.data = owned_data_.get();
127-
} else {
128-
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
129-
QNN_TENSOR_VER_PTR(tensor_)->clientBuf.data = const_cast<void*>(data);
130-
}
134+
#endif
135+
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
136+
QNN_TENSOR_VER_PTR(tensor_)->clientBuf.data = const_cast<void*>(data);
131137
} else {
132138
QNN_EXECUTORCH_LOG_WARN("Data pointer is nullptr");
133139
}

backends/qualcomm/aot/wrappers/TensorWrapper.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,9 +36,7 @@ class TensorWrapper {
3636
const void* data = nullptr,
3737
bool copy_data = false);
3838

39-
executorch::runtime::Error FillDataBuffer(
40-
const void* data,
41-
bool copy_data = false);
39+
executorch::runtime::Error FillDataBuffer(const void* data);
4240

4341
executorch::runtime::Error AllocateDataBuffer();
4442

backends/qualcomm/export_utils.py

Lines changed: 52 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@
3434
QcomChipset,
3535
QnnExecuTorchBackendType,
3636
QnnExecuTorchHtpPerformanceMode,
37+
QnnExecuTorchLpaiTargetEnv,
3738
QnnExecuTorchOpPackageOptions,
3839
)
3940
from executorch.backends.qualcomm.utils.constants import (
40-
DSP_VERSION,
4141
HEXAGON_SDK_ROOT,
4242
HEXAGON_TOOLS_ROOT,
4343
)
@@ -76,7 +76,7 @@ class QnnConfig:
7676
backend (str): The target backend, such as htp, gpu, etc. QnnConfig will then parse this to type QnnExecuTorchBackendType.
7777
soc_model (QcomChipset): The target Qualcomm System on Chip (SoC) model.
7878
build_folder (str): Path to cmake binary directory for target platform, e.g., /path/to/build-android.
79-
direct_build_folder (str): Path to cmake binary directory for direct_mode. E.g., path/to/build-hexagon.
79+
direct_build_folder (str): Path to cmake binary directory for direct_mode. E.g., path/to/build-direct.
8080
target (str): Target platform for deployment.
8181
online_prepare (bool): Compose QNN graph on device if set to True.
8282
shared_buffer (bool): Enables usage of shared buffer(zero-copy mechanism) between application and backend for graph I/O during runtime.
@@ -235,16 +235,14 @@ def __init__(
235235
)
236236
self.runner = runner
237237
if qnn_config.direct_build_folder:
238-
required_env = [HEXAGON_SDK_ROOT, HEXAGON_TOOLS_ROOT, DSP_VERSION]
238+
required_env = [HEXAGON_SDK_ROOT, HEXAGON_TOOLS_ROOT]
239239
assert all(
240240
var in os.environ for var in required_env
241241
), f"Please ensure the following environment variables are set: {required_env}"
242242
self.hexagon_sdk_root = os.getenv(HEXAGON_SDK_ROOT)
243243
self.hexagon_tools_root = os.getenv(HEXAGON_TOOLS_ROOT)
244-
self.dsp_arch = os.getenv(DSP_VERSION)
245244
logging.info(f"{HEXAGON_SDK_ROOT}={self.hexagon_sdk_root}")
246245
logging.info(f"{HEXAGON_TOOLS_ROOT}={self.hexagon_tools_root}")
247-
logging.info(f"{DSP_VERSION}={self.dsp_arch}")
248246
self.qnn_config = qnn_config
249247
self.qnn_sdk = os.getenv("QNN_SDK_ROOT")
250248
self.build_path = qnn_config.build_folder
@@ -287,17 +285,25 @@ def __init__(
287285
if self.direct_build_folder:
288286
direct_general_artifacts = [
289287
f"{self.build_path}/examples/qualcomm/direct_executor_runner/libqnn_executorch_stub.so",
290-
f"{self.direct_build_folder}/backends/qualcomm/libqnn_executorch_backend.so",
291-
f"{self.direct_build_folder}/backends/qualcomm/qnn_executorch/direct_mode/libqnn_executorch_skel.so",
292288
]
293289
self.backend_library_paths.update(
294290
{
295291
QnnExecuTorchBackendType.kHtpBackend: [
292+
f"{self.direct_build_folder}/backends/qualcomm/libqnn_executorch_backend.so",
293+
f"{self.direct_build_folder}/backends/qualcomm/qnn_executorch/direct_mode/libqnn_executorch_skel.so",
296294
f"{self.qnn_sdk}/lib/hexagon-v{self.htp_arch}/unsigned/libQnnHtpV{self.htp_arch}.so",
297295
f"{self.qnn_sdk}/lib/hexagon-v{self.htp_arch}/unsigned/libQnnSystem.so",
298296
f"{self.hexagon_tools_root}/Tools/target/hexagon/lib/v{self.htp_arch}/G0/pic/libc++abi.so.1",
299297
f"{self.hexagon_tools_root}/Tools/target/hexagon/lib/v{self.htp_arch}/G0/pic/libc++.so.1",
300-
]
298+
],
299+
QnnExecuTorchBackendType.kLpaiBackend: [
300+
f"{self.qnn_sdk}/lib/lpai-v{self.lpai_hw_ver}/signed/libqnn_executorch_backend.so",
301+
f"{self.qnn_sdk}/lib/lpai-v{self.lpai_hw_ver}/signed/libqnn_executorch_skel.so",
302+
f"{self.qnn_sdk}/lib/lpai-v{self.lpai_hw_ver}/signed/libQnnLpai.so",
303+
f"{self.qnn_sdk}/lib/lpai-v{self.lpai_hw_ver}/signed/libQnnSystem.so",
304+
f"{self.qnn_sdk}/lib/lpai-v{self.lpai_hw_ver}/signed/libc++abi.so.1",
305+
f"{self.qnn_sdk}/lib/lpai-v{self.lpai_hw_ver}/signed/libc++.so.1",
306+
],
301307
}
302308
)
303309
for _, library_paths in self.backend_library_paths.items():
@@ -378,6 +384,12 @@ def push( # noqa: C901
378384
# backend libraries
379385
for backend in backends:
380386
artifacts.extend(self.backend_library_paths[backend])
387+
388+
# Ensure that all necessary library artifacts exists.
389+
missing = [path for path in artifacts if not os.path.exists(path)]
390+
assert not missing, "Missing the following libraries:\n" + "\n".join(
391+
f" {p}" for p in missing
392+
)
381393
with tempfile.TemporaryDirectory() as tmp_dir:
382394
input_list_file, input_files = generate_inputs(
383395
tmp_dir, self.input_list_filename, inputs
@@ -440,6 +452,13 @@ def execute(
440452
)
441453
+ self.extra_cmds
442454
)
455+
if self.qnn_config.direct_build_folder:
456+
qnn_executor_runner_args = " ".join(
457+
[
458+
qnn_executor_runner_args,
459+
f"--domain_id {get_dsp_id(self.qnn_config.backend)}",
460+
]
461+
)
443462
qnn_executor_runner_cmds = " ".join(
444463
[
445464
f"cd {self.workspace} &&",
@@ -526,7 +545,9 @@ def build_executorch_binary(
526545
):
527546
raise RuntimeError("Currently LPAI backend only supports offline_prepare.")
528547
backend_options = {
529-
QnnExecuTorchBackendType.kLpaiBackend: generate_lpai_compiler_spec(),
548+
QnnExecuTorchBackendType.kLpaiBackend: generate_lpai_compiler_spec(
549+
target_env=get_lpai_target_env(qnn_config)
550+
),
530551
QnnExecuTorchBackendType.kGpuBackend: generate_gpu_compiler_spec(),
531552
QnnExecuTorchBackendType.kHtpBackend: generate_htp_compiler_spec(
532553
use_fp16=False if quant_dtype is not None else True,
@@ -652,10 +673,31 @@ def make_quantizer(
652673
return quantizer
653674

654675

676+
def get_lpai_target_env(qnn_config: QnnConfig):
677+
if qnn_config.enable_x86_64:
678+
return QnnExecuTorchLpaiTargetEnv.kX86
679+
elif qnn_config.direct_build_folder:
680+
return QnnExecuTorchLpaiTargetEnv.kAdsp
681+
return QnnExecuTorchLpaiTargetEnv.kArm
682+
683+
655684
def get_backend_type(backend: str):
656685
return getattr(QnnExecuTorchBackendType, f"k{backend.title()}Backend")
657686

658687

688+
def get_dsp_id(backend):
689+
dsp_id_map = {
690+
QnnExecuTorchBackendType.kLpaiBackend: 0,
691+
QnnExecuTorchBackendType.kHtpBackend: 3,
692+
}
693+
if backend not in dsp_id_map:
694+
raise ValueError(
695+
f"Unsupported backend {backend} for direct mode. "
696+
f"Supported: {list(dsp_id_map.keys())}"
697+
)
698+
return dsp_id_map[backend]
699+
700+
659701
def setup_common_args_and_variables():
660702
parser = argparse.ArgumentParser()
661703

@@ -822,7 +864,7 @@ def setup_common_args_and_variables():
822864

823865
parser.add_argument(
824866
"--direct_build_folder",
825-
help="Path to cmake binary directory for direct_mode. E.g., path/to/build-hexagon."
867+
help="Path to cmake binary directory for direct_mode. E.g., path/to/build-direct."
826868
"If enabled, run self-defined protocol to control fastrpc communication.",
827869
type=str,
828870
)

backends/qualcomm/requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ pydot
33
py-cpuinfo
44
requests
55
tabulate
6+
openpyxl

backends/qualcomm/runtime/QnnExecuTorch.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,9 @@
2626
#define QNN_RUNTIME_LPAI_AFFINITY "qnn_runtime_lpai_affinity"
2727
#define QNN_RUNTIME_LPAI_CORE_SELECTION "qnn_runtime_lpai_core_selection"
2828

29+
// This is for direct mode, especially LPAI
30+
#define QNN_TENSOR_ALIGNMENT 64
31+
2932
#ifdef __cplusplus
3033
extern "C" {
3134
#endif // __cplusplus

backends/qualcomm/runtime/QnnExecuTorchBackend.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,7 @@ Error QnnExecuTorchBackend::execute(
148148
Error::Ok) {
149149
// update data ptr only should be fine
150150
input_tensor->FillDataBuffer(
151-
args[args_index]->toTensor().const_data_ptr(),
152-
false /* copy_data */);
151+
args[args_index]->toTensor().const_data_ptr());
153152
// use the real input shape instead of nominal one to make sure
154153
// dynamic shape is functional
155154
auto dims = args[args_index]->toTensor().sizes();
@@ -167,7 +166,7 @@ Error QnnExecuTorchBackend::execute(
167166
void* mutable_data_ptr = args[args_index]->toTensor().mutable_data_ptr();
168167
if (qnn_manager->RegisterMem(mutable_data_ptr, output_tensor) !=
169168
Error::Ok) {
170-
output_tensor->FillDataBuffer(mutable_data_ptr, false /* copy_data */);
169+
output_tensor->FillDataBuffer(mutable_data_ptr);
171170
}
172171
args_index++;
173172
}

backends/qualcomm/runtime/QnnManager.cpp

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -376,8 +376,7 @@ Error QnnManager::AllocateTensor(const std::string& graph_name) {
376376
mutable_buffer_id_to_memory_map.end()) {
377377
// Fill the same memory for I/O of mutable buffer
378378
tensor_wrapper->FillDataBuffer(
379-
mutable_buffer_id_to_memory_map[mutable_buffer_id],
380-
false /* copy_data */);
379+
mutable_buffer_id_to_memory_map[mutable_buffer_id]);
381380
}
382381
output_tensors_[graph_name].emplace_back(std::move(tensor_wrapper));
383382
}
@@ -581,9 +580,7 @@ Error QnnManager::CompileDlc() {
581580
mutable_buffer_id_to_memory_map.find(mutable_buffer_id) !=
582581
mutable_buffer_id_to_memory_map.end()) {
583582
// Fill the same memory for I/O of mutable buffer
584-
tw->FillDataBuffer(
585-
mutable_buffer_id_to_memory_map[mutable_buffer_id],
586-
false /* copy_data */);
583+
tw->FillDataBuffer(mutable_buffer_id_to_memory_map[mutable_buffer_id]);
587584
}
588585
graph_outputs.push_back(tw);
589586
}

0 commit comments

Comments
 (0)