Skip to content

Commit 1232bac

Browse files
committed
lpai e2e & minimum inference runtime support
1 parent a0ba28e commit 1232bac

52 files changed

Lines changed: 2104 additions & 85 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

backends/qualcomm/CMakeLists.txt

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -239,11 +239,28 @@ target_link_libraries(
239239
shared_buffer
240240
qnn_dlc_manager
241241
)
242-
target_link_libraries(
243-
qnn_executorch_backend
244-
PRIVATE qnn_executorch_header qnn_schema qnn_manager executorch_core
245-
extension_tensor qnn_backend_options
246-
)
242+
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES Hexagon)
243+
link_directories(
244+
$ENV{HEXAGON_TOOLS_ROOT}/Tools/target/hexagon/lib/$ENV{HEXAGON_ARCH}/G0/pic
245+
)
246+
target_link_libraries(
247+
qnn_executorch_backend
248+
PRIVATE qnn_executorch_header
249+
qnn_schema
250+
qnn_manager
251+
executorch_core
252+
extension_tensor
253+
qnn_backend_options
254+
c
255+
c++
256+
)
257+
else()
258+
target_link_libraries(
259+
qnn_executorch_backend
260+
PRIVATE qnn_executorch_header qnn_schema qnn_manager executorch_core
261+
extension_tensor qnn_backend_options
262+
)
263+
endif()
247264
set_target_properties(
248265
qnn_executorch_backend PROPERTIES LINK_FLAGS "-Wl,-rpath='$ORIGIN'"
249266
)
@@ -278,6 +295,13 @@ install(
278295
RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}/executorch/backends/qualcomm
279296
)
280297

298+
if(DEFINED ENV{HEXAGON_SDK_ROOT})
299+
add_subdirectory(
300+
${QNN_EXECUTORCH_ROOT_DIR}/fastrpc
301+
${CMAKE_CURRENT_BINARY_DIR}/qnn_executorch/fastrpc
302+
)
303+
endif()
304+
281305
# QNN pybind
282306
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES "x86_64")
283307
add_subdirectory(

backends/qualcomm/_passes/build_quant_io.py

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,8 @@
55
# LICENSE file in the root directory of this source tree.
66
import torch
77
from executorch.backends.qualcomm.utils.constants import QCOM_QUANTIZED_IO
8-
from executorch.exir.delegate import executorch_call_delegate
98

10-
from executorch.exir.pass_base import ExportPass, ProxyValue
9+
from executorch.exir.pass_base import ExportPass
1110
from executorch.exir.tensor import TensorSpec
1211
from torch.utils import _pytree as pytree
1312

@@ -39,11 +38,17 @@ def call_getitem(self, value, key: int, meta):
3938
return super().call_getitem(value, key, meta)
4039

4140
def call_delegate(self, lowered_module, args, kwargs, meta):
42-
args_data, _ = pytree.tree_map_only(
43-
ProxyValue, lambda x: x.data, (args, kwargs)
44-
)
41+
output_node = [
42+
node
43+
for node in lowered_module.original_module.graph.nodes
44+
if node.target == "output"
45+
][0]
46+
tensors = [
47+
node.meta["val"].to(node.meta[QCOM_QUANTIZED_IO])
48+
for node in output_node.args[0]
49+
]
4550
meta["spec"] = pytree.tree_map(
4651
self._make_spec,
47-
executorch_call_delegate(lowered_module, *args_data),
52+
tuple(tensors),
4853
)
4954
return super().call_delegate(lowered_module, args, kwargs, meta)

backends/qualcomm/aot/wrappers/TensorWrapper.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,17 @@ TensorWrapper::TensorWrapper(
118118
Error TensorWrapper::FillDataBuffer(const void* data, bool copy_data) {
119119
if (data != nullptr) {
120120
QNN_TENSOR_VER_PTR(tensor_)->memType = QNN_TENSORMEMTYPE_RAW;
121+
#ifdef __hexagon__
122+
// alignment is required
123+
auto align_size = [](size_t alignment, size_t sz) {
124+
return (sz + (alignment - 1)) & ~(alignment - 1);
125+
};
126+
const size_t alignment = 64;
127+
QNN_TENSOR_VER_PTR(tensor_)->clientBuf.dataSize =
128+
align_size(alignment, bytes_);
129+
#else
121130
QNN_TENSOR_VER_PTR(tensor_)->clientBuf.dataSize = bytes_;
131+
#endif
122132
if (copy_data) {
123133
owned_data_ = std::make_unique<char[]>(bytes_);
124134
const char* src_data = static_cast<const char*>(data);
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
# Copyright (c) Qualcomm Innovation Center, Inc.
2+
# Copyright 2025 Arm Limited and/or its affiliates.
3+
# All rights reserved
4+
#
5+
# This source code is licensed under the BSD-style license found in the
6+
# LICENSE file in the root directory of this source tree.
7+
8+
set(CMAKE_CXX_STANDARD 17)
9+
set(CMAKE_CXX_STANDARD_REQUIRED ON)
10+
set(CMAKE_POSITION_INDEPENDENT_CODE ON)
11+
12+
set(_qnn_fastrpc__dir ${CMAKE_BINARY_DIR}/backends/qualcomm/fastrpc)
13+
set(_qnn_fastrpc__srcs ${CMAKE_CURRENT_LIST_DIR}/qnn_executorch.idl)
14+
set(_qnn_fastrpc__outputs
15+
${_qnn_fastrpc__dir}/qnn_executorch.h
16+
${_qnn_fastrpc__dir}/qnn_executorch_stub.c
17+
${_qnn_fastrpc__dir}/qnn_executorch_skel.c
18+
)
19+
20+
if(DEFINED ENV{HEXAGON_SDK_ROOT})
21+
add_custom_command(
22+
OUTPUT ${_qnn_fastrpc__outputs}
23+
COMMAND mkdir -p ${_qnn_fastrpc__dir}
24+
COMMAND
25+
$ENV{HEXAGON_SDK_ROOT}/ipc/fastrpc/qaic/bin/qaic -I
26+
$ENV{HEXAGON_SDK_ROOT}/incs -I $ENV{HEXAGON_SDK_ROOT}/incs/stddef -o
27+
${_qnn_fastrpc__dir} ${_qnn_fastrpc__srcs}
28+
WORKING_DIRECTORY ${EXECUTORCH_SOURCE_DIR}
29+
DEPENDS qnn_executorch_backend
30+
COMMENT "Codegen for fastrpc files"
31+
)
32+
add_custom_target(
33+
fastrpc_codegen
34+
DEPENDS ${_qnn_fastrpc__outputs}
35+
COMMENT "Codegen for fastrpc files"
36+
)
37+
38+
endif()
39+
40+
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES Hexagon)
41+
add_library(
42+
qnn_executorch_skel SHARED
43+
${_qnn_fastrpc__dir}/qnn_executorch.h
44+
${_qnn_fastrpc__dir}/qnn_executorch_skel.c qnn_executorch_impl.cpp
45+
)
46+
target_include_directories(qnn_executorch_skel PRIVATE ${_qnn_fastrpc__dir})
47+
target_link_libraries(
48+
qnn_executorch_skel PRIVATE extension_data_loader qnn_executorch_backend
49+
c++ c
50+
)
51+
add_dependencies(qnn_executorch_skel fastrpc_codegen)
52+
endif()
53+
54+
if(${CMAKE_SYSTEM_PROCESSOR} MATCHES aarch64)
55+
include_directories(
56+
$ENV{HEXAGON_SDK_ROOT}/incs $ENV{HEXAGON_SDK_ROOT}/incs/stddef
57+
${_qnn_fastrpc__dir}
58+
)
59+
link_directories(
60+
$ENV{HEXAGON_SDK_ROOT}/ipc/fastrpc/remote/ship/android_aarch64
61+
)
62+
add_library(
63+
qnn_executorch_stub SHARED ${_qnn_fastrpc__dir}/qnn_executorch.h
64+
${_qnn_fastrpc__dir}/qnn_executorch_stub.c
65+
)
66+
# TODO: support cdsp if necessary
67+
target_link_libraries(qnn_executorch_stub PRIVATE adsprpc)
68+
add_dependencies(qnn_executorch_stub fastrpc_codegen)
69+
70+
# build minimum example app
71+
add_executable(qnn_executor_runner qnn_executor_runner.cpp)
72+
target_link_libraries(
73+
qnn_executor_runner PRIVATE executorch_core gflags qnn_executorch_stub
74+
adsprpc
75+
)
76+
# TODO: support cdsp if necessary
77+
target_link_libraries(qnn_executor_runner PRIVATE adsprpc)
78+
endif()
Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
/*
2+
* Copyright (c) Qualcomm Innovation Center, Inc.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include <chrono>
10+
#include <fstream>
11+
#include <memory>
12+
#include <numeric>
13+
14+
#include <executorch/runtime/platform/assert.h>
15+
#include <gflags/gflags.h>
16+
17+
#include "qnn_executorch.h"
18+
19+
DEFINE_string(
20+
model_path,
21+
"model.pte",
22+
"Model serialized in flatbuffer format.");
23+
DEFINE_string(
24+
output_folder_path,
25+
".",
26+
"Executorch inference data output path.");
27+
DEFINE_string(input_list_path, "input_list.txt", "Model input list path.");
28+
29+
int main(int argc, char** argv) {
30+
gflags::ParseCommandLineFlags(&argc, &argv, true);
31+
if (argc != 1) {
32+
std::string msg = "extra commandline args:";
33+
for (int i = 1 /* skip argv[0] (program name) */; i < argc; i++) {
34+
msg += std::string(" ") + argv[i];
35+
}
36+
ET_LOG(Error, "%s", msg.c_str());
37+
return 1;
38+
}
39+
40+
// fastrpc related
41+
// adsp
42+
const int adsp_domain_id = 0;
43+
// signed PD
44+
const int enable_unsigned_pd = 0;
45+
// domain uri
46+
std::string domain_uri(qnn_executorch_URI);
47+
domain_uri += "&_dom=adsp";
48+
// init session
49+
struct remote_rpc_control_unsigned_module data;
50+
data.domain = adsp_domain_id;
51+
data.enable = enable_unsigned_pd;
52+
int err = AEE_SUCCESS;
53+
ET_CHECK_MSG(
54+
AEE_SUCCESS ==
55+
(err = remote_session_control(
56+
DSPRPC_CONTROL_UNSIGNED_MODULE, (void*)&data, sizeof(data))),
57+
"remote_session_control failed: 0x%x",
58+
err);
59+
// start session
60+
remote_handle64 handle = -1;
61+
ET_CHECK_MSG(
62+
AEE_SUCCESS == (err = qnn_executorch_open(domain_uri.data(), &handle)),
63+
"qnn_executorch_open failed: 0x%x",
64+
err);
65+
// load model
66+
const char* model_path = FLAGS_model_path.c_str();
67+
qnn_executorch_load(handle, model_path);
68+
69+
// prepare io
70+
std::vector<std::vector<uint8_t>> input_data, output_data;
71+
std::vector<tensor> input_tensor, output_tensor;
72+
for (int i = 0;; ++i) {
73+
int nbytes = 0;
74+
qnn_executorch_get_input_size(handle, model_path, i, &nbytes);
75+
if (nbytes == -1) {
76+
break;
77+
}
78+
input_data.emplace_back(std::vector<uint8_t>(nbytes));
79+
input_tensor.emplace_back(
80+
tensor({input_data.back().data(), (int)input_data.back().size()}));
81+
}
82+
for (int i = 0;; ++i) {
83+
int nbytes = 0;
84+
qnn_executorch_get_output_size(handle, model_path, i, &nbytes);
85+
if (nbytes == -1) {
86+
break;
87+
}
88+
output_data.emplace_back(std::vector<uint8_t>(nbytes));
89+
output_tensor.emplace_back(
90+
tensor({output_data.back().data(), (int)output_data.back().size()}));
91+
}
92+
93+
// prepare input data
94+
std::ifstream input_list(FLAGS_input_list_path);
95+
// TODO: should check IO info via fastrpc first
96+
if (input_list.is_open()) {
97+
auto split = [](std::string s, std::string delimiter) {
98+
size_t pos_start = 0, pos_end, delim_len = delimiter.length();
99+
std::string token;
100+
std::vector<std::string> res;
101+
102+
while ((pos_end = s.find(delimiter, pos_start)) != std::string::npos) {
103+
token = s.substr(pos_start, pos_end - pos_start);
104+
pos_start = pos_end + delim_len;
105+
res.push_back(token);
106+
}
107+
res.push_back(s.substr(pos_start));
108+
return res;
109+
};
110+
111+
std::string file_path;
112+
int inference_index = 0;
113+
while (std::getline(input_list, file_path)) {
114+
auto input_files = split(file_path, " ");
115+
if (input_files.size() == 0) {
116+
break;
117+
}
118+
size_t num_inputs = input_files.size();
119+
for (int i = 0; i < num_inputs; ++i) {
120+
std::ifstream fin(input_files[i], std::ios::binary);
121+
fin.seekg(0, fin.end);
122+
size_t file_size = fin.tellg();
123+
fin.seekg(0, fin.beg);
124+
fin.read((char*)input_data[i].data(), file_size);
125+
fin.close();
126+
}
127+
qnn_executorch_set_input(
128+
handle, model_path, input_tensor.data(), input_tensor.size());
129+
qnn_executorch_execute(handle, model_path);
130+
qnn_executorch_get_output(
131+
handle, model_path, output_tensor.data(), output_tensor.size());
132+
for (size_t i = 0; i < output_tensor.size(); i++) {
133+
auto output_file_name = FLAGS_output_folder_path + "/output_" +
134+
std::to_string(inference_index) + "_" + std::to_string(i) + ".raw";
135+
std::ofstream fout(output_file_name.c_str(), std::ios::binary);
136+
fout.write(
137+
(const char*)output_tensor[i].data, output_tensor[i].dataLen);
138+
fout.close();
139+
}
140+
}
141+
}
142+
143+
// unload model
144+
qnn_executorch_unload(handle, model_path);
145+
// tear down
146+
qnn_executorch_close(handle);
147+
return 0;
148+
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
/*
2+
* Copyright (c) Qualcomm Innovation Center, Inc.
3+
* All rights reserved.
4+
*
5+
* This source code is licensed under the BSD-style license found in the
6+
* LICENSE file in the root directory of this source tree.
7+
*/
8+
9+
#include "AEEStdDef.idl"
10+
#include "remote.idl"
11+
12+
/// Enabling stub-skel mismatch check feature in the auto-gen files.
13+
/// Please refer to the IDL documentation for more details on the feature.
14+
/// It is fully supported only on Kailua and later targets.
15+
const string IDL_VERSION = "0.0.0";
16+
17+
typedef sequence<uint8> tensor;
18+
19+
interface qnn_executorch : remote_handle64 {
20+
long load(in string pte_path);
21+
long get_input_size(in string pte_path, in long index, rout long nbytes);
22+
long set_input(in string pte_path, in sequence<tensor> tensors);
23+
long execute(in string pte_path);
24+
long get_output_size(in string pte_path, in long index, rout long nbytes);
25+
long get_output(in string pte_path, rout sequence<tensor> tensors);
26+
long unload(in string pte_path);
27+
};

0 commit comments

Comments
 (0)