Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .claude/skills/qualcomm/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Use `backends/qualcomm/scripts/build.sh`. Linux only (macOS not supported).
|---|---|---|
| x86_64 (Python interface + host tools) | enabled | `build-x86/` |
| Android arm64-v8a (device runner) | enabled | `build-android/` |
| Hexagon DSP (direct mode) | disabled | `build-hexagon/` |
| Direct mode (LPAI ADSP or Hexagon CDSP) | disabled | `build-direct/` |
| OE Linux embedded | disabled | `build-oe-linux/` |

**Common build commands:**
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ cmake-out*
cmake-out-android/
build-android/
build-x86/
build-hexagon/
build-direct/
dist/
arm-scratch/
executorch.egg-info
Expand Down
22 changes: 16 additions & 6 deletions backends/qualcomm/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -248,21 +248,31 @@ target_link_libraries(

if(${CMAKE_SYSTEM_PROCESSOR} MATCHES Hexagon)
# Add macro here so we can dlopen the correct .so library.
if(DSP_TYPE STREQUAL "3")
string(TOUPPER ${DSP_VERSION} CAPITAL_DSP_VERSION)
set(HEXAGON_LIB "libQnnHtp${CAPITAL_DSP_VERSION}.so")
if(DSP_TYPE STREQUAL "0")
message(
STATUS
"aDSP direct and non-direct mode uses shared libraries under different folders but have the same name, skipping HEXAGON_LIB override."
)
# Just random string here since QnnBackendUnifiedRegistry.h will ignore this
# macro for aDSP case.
add_compile_definitions(HEXAGON_LIB="")
elseif(DSP_TYPE STREQUAL "3")
string(TOUPPER ${CDSP_VERSION} CAPITAL_CDSP_VERSION)
set(HEXAGON_LIB "libQnnHtp${CAPITAL_CDSP_VERSION}.so")
add_compile_definitions(HEXAGON_LIB="${HEXAGON_LIB}")
message(STATUS "For hexagon build, using HTP Library: ${HEXAGON_LIB}")
else()
message(FATAL_ERROR "Unknown DSP_TYPE ${DSP_TYPE}")
endif()

# aDSP will also use cDSP c/c++ library since aDSP does not have these
# libraries.
target_link_libraries(
qnn_executorch_backend
PRIVATE
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${DSP_VERSION}/G0/pic/libc.so
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${DSP_VERSION}/G0/pic/libc++.so.1
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${DSP_VERSION}/G0/pic/libc++abi.so.1
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${CDSP_VERSION}/G0/pic/libc.so
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${CDSP_VERSION}/G0/pic/libc++.so.1
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${CDSP_VERSION}/G0/pic/libc++abi.so.1
)
endif()

Expand Down
25 changes: 15 additions & 10 deletions backends/qualcomm/aot/wrappers/TensorWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,19 +115,24 @@ TensorWrapper::TensorWrapper(
}
}

Error TensorWrapper::FillDataBuffer(const void* data, bool copy_data) {
Error TensorWrapper::FillDataBuffer(const void* data) {
if (data != nullptr) {
QNN_TENSOR_VER_PTR(tensor_)->memType = QNN_TENSORMEMTYPE_RAW;
#ifdef __hexagon__
// data's address is already aligned in idl skel implementation. e.g.
// QnnExecuTorchIdlWrapper.cpp Here, we are ensuring we pass data size that
// is also multiple of 64. QnnExecuTorchIdlWrapper.cpp should have created
// sufficient space for tensor.
auto align_size = [](size_t alignment, size_t sz) {
return (sz + (alignment - 1)) & ~(alignment - 1);
};
QNN_TENSOR_VER_PTR(tensor_)->clientBuf.dataSize =
align_size(QNN_TENSOR_ALIGNMENT, bytes_);
#else
QNN_TENSOR_VER_PTR(tensor_)->clientBuf.dataSize = bytes_;
if (copy_data) {
owned_data_ = std::make_unique<char[]>(bytes_);
const char* src_data = static_cast<const char*>(data);
std::memcpy(owned_data_.get(), src_data, bytes_);
QNN_TENSOR_VER_PTR(tensor_)->clientBuf.data = owned_data_.get();
} else {
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
QNN_TENSOR_VER_PTR(tensor_)->clientBuf.data = const_cast<void*>(data);
}
#endif
// NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast)
QNN_TENSOR_VER_PTR(tensor_)->clientBuf.data = const_cast<void*>(data);
} else {
QNN_EXECUTORCH_LOG_WARN("Data pointer is nullptr");
}
Expand Down
7 changes: 4 additions & 3 deletions backends/qualcomm/aot/wrappers/TensorWrapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@
#define QNN_TENSOR_VER_PTR(x) (&((x).v2))
#define QNN_OP_VER_PTR(x) (&((x).v1))

// This is for direct mode, especially LPAI
#define QNN_TENSOR_ALIGNMENT 64

namespace executorch {
namespace backends {
namespace qnn {
Expand All @@ -36,9 +39,7 @@ class TensorWrapper {
const void* data = nullptr,
bool copy_data = false);

executorch::runtime::Error FillDataBuffer(
const void* data,
bool copy_data = false);
executorch::runtime::Error FillDataBuffer(const void* data);

executorch::runtime::Error AllocateDataBuffer();

Expand Down
62 changes: 52 additions & 10 deletions backends/qualcomm/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,10 @@
QcomChipset,
QnnExecuTorchBackendType,
QnnExecuTorchHtpPerformanceMode,
QnnExecuTorchLpaiTargetEnv,
QnnExecuTorchOpPackageOptions,
)
from executorch.backends.qualcomm.utils.constants import (
DSP_VERSION,
HEXAGON_SDK_ROOT,
HEXAGON_TOOLS_ROOT,
)
Expand Down Expand Up @@ -76,7 +76,7 @@ class QnnConfig:
backend (str): The target backend, such as htp, gpu, etc. QnnConfig will then parse this to type QnnExecuTorchBackendType.
soc_model (QcomChipset): The target Qualcomm System on Chip (SoC) model.
build_folder (str): Path to cmake binary directory for target platform, e.g., /path/to/build-android.
direct_build_folder (str): Path to cmake binary directory for direct_mode. E.g., path/to/build-hexagon.
direct_build_folder (str): Path to cmake binary directory for direct_mode. E.g., path/to/build-direct.
target (str): Target platform for deployment.
online_prepare (bool): Compose QNN graph on device if set to True.
shared_buffer (bool): Enables usage of shared buffer(zero-copy mechanism) between application and backend for graph I/O during runtime.
Expand Down Expand Up @@ -235,16 +235,14 @@ def __init__(
)
self.runner = runner
if qnn_config.direct_build_folder:
required_env = [HEXAGON_SDK_ROOT, HEXAGON_TOOLS_ROOT, DSP_VERSION]
required_env = [HEXAGON_SDK_ROOT, HEXAGON_TOOLS_ROOT]
assert all(
var in os.environ for var in required_env
), f"Please ensure the following environment variables are set: {required_env}"
self.hexagon_sdk_root = os.getenv(HEXAGON_SDK_ROOT)
self.hexagon_tools_root = os.getenv(HEXAGON_TOOLS_ROOT)
self.dsp_arch = os.getenv(DSP_VERSION)
logging.info(f"{HEXAGON_SDK_ROOT}={self.hexagon_sdk_root}")
logging.info(f"{HEXAGON_TOOLS_ROOT}={self.hexagon_tools_root}")
logging.info(f"{DSP_VERSION}={self.dsp_arch}")
self.qnn_config = qnn_config
self.qnn_sdk = os.getenv("QNN_SDK_ROOT")
self.build_path = qnn_config.build_folder
Expand Down Expand Up @@ -287,17 +285,25 @@ def __init__(
if self.direct_build_folder:
direct_general_artifacts = [
f"{self.build_path}/examples/qualcomm/direct_executor_runner/libqnn_executorch_stub.so",
f"{self.direct_build_folder}/backends/qualcomm/libqnn_executorch_backend.so",
f"{self.direct_build_folder}/backends/qualcomm/qnn_executorch/direct_mode/libqnn_executorch_skel.so",
]
self.backend_library_paths.update(
{
QnnExecuTorchBackendType.kHtpBackend: [
f"{self.direct_build_folder}/backends/qualcomm/libqnn_executorch_backend.so",
f"{self.direct_build_folder}/backends/qualcomm/qnn_executorch/direct_mode/libqnn_executorch_skel.so",
f"{self.qnn_sdk}/lib/hexagon-v{self.htp_arch}/unsigned/libQnnHtpV{self.htp_arch}.so",
f"{self.qnn_sdk}/lib/hexagon-v{self.htp_arch}/unsigned/libQnnSystem.so",
f"{self.hexagon_tools_root}/Tools/target/hexagon/lib/v{self.htp_arch}/G0/pic/libc++abi.so.1",
f"{self.hexagon_tools_root}/Tools/target/hexagon/lib/v{self.htp_arch}/G0/pic/libc++.so.1",
]
],
QnnExecuTorchBackendType.kLpaiBackend: [
f"{self.qnn_sdk}/lib/lpai-v{self.lpai_hw_ver}/signed/libqnn_executorch_backend.so",
f"{self.qnn_sdk}/lib/lpai-v{self.lpai_hw_ver}/signed/libqnn_executorch_skel.so",
f"{self.qnn_sdk}/lib/lpai-v{self.lpai_hw_ver}/signed/libQnnLpai.so",
f"{self.qnn_sdk}/lib/lpai-v{self.lpai_hw_ver}/signed/libQnnSystem.so",
f"{self.qnn_sdk}/lib/lpai-v{self.lpai_hw_ver}/signed/libc++abi.so.1",
f"{self.qnn_sdk}/lib/lpai-v{self.lpai_hw_ver}/signed/libc++.so.1",
],
}
)
for _, library_paths in self.backend_library_paths.items():
Expand Down Expand Up @@ -378,6 +384,12 @@ def push( # noqa: C901
# backend libraries
for backend in backends:
artifacts.extend(self.backend_library_paths[backend])

# Ensure that all necessary library artifacts exists.
missing = [path for path in artifacts if not os.path.exists(path)]
assert not missing, "Missing the following libraries:\n" + "\n".join(
f" {p}" for p in missing
)
with tempfile.TemporaryDirectory() as tmp_dir:
input_list_file, input_files = generate_inputs(
tmp_dir, self.input_list_filename, inputs
Expand Down Expand Up @@ -440,6 +452,13 @@ def execute(
)
+ self.extra_cmds
)
if self.qnn_config.direct_build_folder:
qnn_executor_runner_args = " ".join(
[
qnn_executor_runner_args,
f"--domain_id {get_dsp_id(self.qnn_config.backend)}",
]
)
qnn_executor_runner_cmds = " ".join(
[
f"cd {self.workspace} &&",
Expand Down Expand Up @@ -526,7 +545,9 @@ def build_executorch_binary(
):
raise RuntimeError("Currently LPAI backend only supports offline_prepare.")
backend_options = {
QnnExecuTorchBackendType.kLpaiBackend: generate_lpai_compiler_spec(),
QnnExecuTorchBackendType.kLpaiBackend: generate_lpai_compiler_spec(
target_env=get_lpai_target_env(qnn_config)
),
QnnExecuTorchBackendType.kGpuBackend: generate_gpu_compiler_spec(),
QnnExecuTorchBackendType.kHtpBackend: generate_htp_compiler_spec(
use_fp16=False if quant_dtype is not None else True,
Expand Down Expand Up @@ -652,10 +673,31 @@ def make_quantizer(
return quantizer


def get_lpai_target_env(qnn_config: QnnConfig):
if qnn_config.enable_x86_64:
return QnnExecuTorchLpaiTargetEnv.kX86
elif qnn_config.direct_build_folder:
return QnnExecuTorchLpaiTargetEnv.kAdsp
return QnnExecuTorchLpaiTargetEnv.kArm


def get_backend_type(backend: str):
return getattr(QnnExecuTorchBackendType, f"k{backend.title()}Backend")


def get_dsp_id(backend):
dsp_id_map = {
QnnExecuTorchBackendType.kLpaiBackend: 0,
QnnExecuTorchBackendType.kHtpBackend: 3,
}
if backend not in dsp_id_map:
raise ValueError(
f"Unsupported backend {backend} for direct mode. "
f"Supported: {list(dsp_id_map.keys())}"
)
return dsp_id_map[backend]


def setup_common_args_and_variables():
parser = argparse.ArgumentParser()

Expand Down Expand Up @@ -822,7 +864,7 @@ def setup_common_args_and_variables():

parser.add_argument(
"--direct_build_folder",
help="Path to cmake binary directory for direct_mode. E.g., path/to/build-hexagon."
help="Path to cmake binary directory for direct_mode. E.g., path/to/build-direct."
"If enabled, run self-defined protocol to control fastrpc communication.",
type=str,
)
Expand Down
1 change: 1 addition & 0 deletions backends/qualcomm/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ pydot
py-cpuinfo
requests
tabulate
openpyxl
5 changes: 2 additions & 3 deletions backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,7 @@ Error QnnExecuTorchBackend::execute(
Error::Ok) {
// update data ptr only should be fine
input_tensor->FillDataBuffer(
args[args_index]->toTensor().const_data_ptr(),
false /* copy_data */);
args[args_index]->toTensor().const_data_ptr());
// use the real input shape instead of nominal one to make sure
// dynamic shape is functional
auto dims = args[args_index]->toTensor().sizes();
Expand All @@ -167,7 +166,7 @@ Error QnnExecuTorchBackend::execute(
void* mutable_data_ptr = args[args_index]->toTensor().mutable_data_ptr();
if (qnn_manager->RegisterMem(mutable_data_ptr, output_tensor) !=
Error::Ok) {
output_tensor->FillDataBuffer(mutable_data_ptr, false /* copy_data */);
output_tensor->FillDataBuffer(mutable_data_ptr);
}
args_index++;
}
Expand Down
7 changes: 2 additions & 5 deletions backends/qualcomm/runtime/QnnManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,8 +376,7 @@ Error QnnManager::AllocateTensor(const std::string& graph_name) {
mutable_buffer_id_to_memory_map.end()) {
// Fill the same memory for I/O of mutable buffer
tensor_wrapper->FillDataBuffer(
mutable_buffer_id_to_memory_map[mutable_buffer_id],
false /* copy_data */);
mutable_buffer_id_to_memory_map[mutable_buffer_id]);
}
output_tensors_[graph_name].emplace_back(std::move(tensor_wrapper));
}
Expand Down Expand Up @@ -581,9 +580,7 @@ Error QnnManager::CompileDlc() {
mutable_buffer_id_to_memory_map.find(mutable_buffer_id) !=
mutable_buffer_id_to_memory_map.end()) {
// Fill the same memory for I/O of mutable buffer
tw->FillDataBuffer(
mutable_buffer_id_to_memory_map[mutable_buffer_id],
false /* copy_data */);
tw->FillDataBuffer(mutable_buffer_id_to_memory_map[mutable_buffer_id]);
}
graph_outputs.push_back(tw);
}
Expand Down
4 changes: 2 additions & 2 deletions backends/qualcomm/runtime/backends/QnnBackendCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

#include <executorch/backends/qualcomm/runtime/backends/QnnBackendCache.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnCustomProtocol.h>

namespace executorch {
namespace backends {
namespace qnn {
Expand All @@ -22,11 +23,10 @@ Error QnnBackendCache::GetQnnGraphInfoFromBinary(
std::uint32_t num_graphs;
QnnSystemContext_GraphInfo_t* graphs = nullptr;
const QnnSystemContext_BinaryInfo_t* binaryinfo{nullptr};
Qnn_ContextBinarySize_t binaryinfo_size = 0;
Qnn_ErrorHandle_t error = QNN_SUCCESS;

error = qnn_sys_interface.qnn_system_context_get_binary_info(
sys_context_handle_, buffer, nbytes, &binaryinfo, &binaryinfo_size);
sys_context_handle_, buffer, nbytes, &binaryinfo);

if (error != QNN_SUCCESS) {
QNN_EXECUTORCH_LOG_WARN(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ class QnnBackendUnifiedRegistry {
#endif
static constexpr const char* gpu_library_name_ = "libQnnGpu.so";
static constexpr const char* dsp_library_name_ = "libQnnDsp.so";
// Lpai library name is same for both traditional build and hexagon build.
static constexpr const char* lpai_library_name_ = "libQnnLpai.so";

std::unique_ptr<const QnnSaver_Config_t*[]> GetImplementationConfig(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ class QnnInterface {
DEFINE_SHIM_FUNCTION_INTERFACE(graph_execute, graphExecute);
DEFINE_SHIM_FUNCTION_INTERFACE(graph_retrieve, graphRetrieve);
DEFINE_SHIM_FUNCTION_INTERFACE(graph_set_config, graphSetConfig);
DEFINE_SHIM_FUNCTION_INTERFACE(graph_get_property, graphGetProperty);
// --------- QnnLog ---------
DEFINE_SHIM_FUNCTION_INTERFACE(log_create, logCreate);
DEFINE_SHIM_FUNCTION_INTERFACE(log_free, logFree);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class QnnSystemInterface {
systemContextCreate);
DEFINE_SHIM_FUNCTION_SYS_INTERFACE(
system_context_get_binary_info,
systemContextGetBinaryInfo);
systemContextGetMetaData);
DEFINE_SHIM_FUNCTION_SYS_INTERFACE(system_context_free, systemContextFree);

private:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,17 @@ add_library(
target_include_directories(
qnn_executorch_skel PRIVATE ${CMAKE_CURRENT_BINARY_DIR}
)
# aDSP will also use cDSP c/c++ library since aDSP does not have these
# libraries.
target_link_libraries(
qnn_executorch_skel
PRIVATE
extension_data_loader
qnn_executorch_backend
quantized_ops_lib
etdump
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${DSP_VERSION}/G0/pic/libc.so
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${DSP_VERSION}/G0/pic/libc++.so.1
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${DSP_VERSION}/G0/pic/libc++abi.so.1
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${CDSP_VERSION}/G0/pic/libc.so
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${CDSP_VERSION}/G0/pic/libc++.so.1
${HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/${CDSP_VERSION}/G0/pic/libc++abi.so.1
)
target_compile_options(qnn_executorch_skel PRIVATE "-fvisibility=default")
Loading
Loading