Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 78 additions & 2 deletions backends/qualcomm/debugger/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ Generate optrace and QHAS files using QNN tools under $QNN_SDK_ROOT. After finis
adb = SimpleADB(
qnn_config=qnn_config,
pte_path=f"{args.artifact}/{pte_filename}.pte",
workspace=f"/data/local/tmp/executorch/{pte_filename},
workspace=f"/data/local/tmp/executorch/{pte_filename}",
)
binaries_trace = generate_optrace(
args, adb, f"{args.artifact}/{pte_filename}.pte", example_input
Expand Down Expand Up @@ -78,7 +78,7 @@ qairt_visualizer.view(reports=[optrace, qhas])
- `model`: Path to your QNN model file (e.g., `path_to_your_model.dlc`).
- **`reports`**: List of report file paths, including the optrace (`optrace.json`) and QHAS (`optrace_qnn_htp_analysis_summary.json`).

Note: Files ending with `.bin ` do not support graph visualization in qairt_visualizer.
Note: Files ending with `.bin` do not support graph visualization in qairt_visualizer.

## Demo

Expand Down Expand Up @@ -226,3 +226,79 @@ python examples/qualcomm/util_scripts/qnn_intermediate_debugger_demo.py -b build
2. Please ignore this if you are using `qnn_executor_runner`. If you have decided to write your own runner, please follow the [tutorial](https://pytorch.org/executorch/stable/etdump.html) on how to implement etdump into your own runner.
3. The current debugger does not support graph with partitions. (WIP)
4. The current debugger does not support LLM models. (WIP)


## ExecuTorch QNN HTP Heap Profiling

Measures DSP memory usage when using context binary models on the HTP backend.

### Introduction

DSP heap profiling is available for `QnnContext_createFromBinary` use-cases. It captures total DSP heap usage at two checkpoints:

- **Before the first context is created** (`before_context_created`)
- **After the last context is freed** (`after_context_freed`)

The difference between the two values represents heap consumed during context execution. The value after freeing is typically equal to or greater than before creation.

### Instructions

#### Run the example test

```bash
python backends/qualcomm/tests/test_qnn_delegate.py \
TestQNNQuantizedUtils.test_qnn_backend_runtime_option_heap_profile \
-b build-android -H ${HOST} -s ${SN} -m ${SOC_MODEL}
```

See [test_qnn_delegate.py](../tests/test_qnn_delegate.py) for the full test implementation.

#### Setting

```python
from executorch.backends.qualcomm.utils.utils import generate_htp_compiler_spec
from executorch.backends.qualcomm.utils.utils import generate_qnn_executorch_compiler_spec

backend_options = generate_htp_compiler_spec(
use_multi_contexts=True,
)

compiler_specs = generate_qnn_executorch_compiler_spec(
soc_model=self.chipset_table[TestQNN.soc_model],
backend_options=backend_options,
profile_level=2,
)

# ...

self.verify_output(
module,
sample_input,
exec_prog,
save_heap_result=True,
)
```

#### Output file format

The result is written to a text file (default: `htp_heap_usage.txt`) with two lines:

```
DSP:before_context_created (bytes), <value>
DSP:after_context_freed (bytes), <value>
```

#### Reference result

Measured on SM8850. A difference of 0 means no additional heap is consumed during context binary execution.

```console
First value (before_context_created): 928212 bytes
Second value (after_context_freed): 928212 bytes
difference: 0.00 bytes
```

### Limitations

1. Only supported HTP backend on Android and QNX platforms.
2. By enabling this feature, initialization and cleanup time might be impacted.
5 changes: 5 additions & 0 deletions backends/qualcomm/export_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,6 +494,11 @@ def pull_debug_output(self, etdump_path, debug_ouput_path, callback=None):
if callback:
callback()

def pull_heap_output(self, src_file_path, dst_folder, callback=None):
self._adb(["pull", src_file_path, dst_folder])
if callback:
callback()


def build_executorch_binary(
model: torch.nn.Module, # noqa: B006
Expand Down
8 changes: 8 additions & 0 deletions backends/qualcomm/runtime/QnnBackendOptions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,14 @@ template QnnExecuTorchProfileLevel get_option<QnnExecuTorchProfileLevel>(
QnnExecuTorchProfileLevel,
const char*);

executorch::runtime::Error get_runtime_option(
const char* key,
executorch::runtime::BackendOption& backend_option) {
std::strncpy(backend_option.key, key, runtime::kMaxOptionKeyLength);
backend_option.key[runtime::kMaxOptionKeyLength - 1] = '\0';
return get_option(QNN_BACKEND, backend_option);
}

} // namespace qnn
} // namespace backends
} // namespace executorch
13 changes: 13 additions & 0 deletions backends/qualcomm/runtime/QnnBackendOptions.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,19 @@ struct RuntimeOption {
template <typename T>
T get_option(T aot_option, const char* aot_key);

/**
* @brief
* Get the backend option.
* This method checks runtime option only.
*
* @param key The key of runtime option.
* @param backend_option The backend_option to be restored in runtime.
*/

executorch::runtime::Error get_runtime_option(
const char* key,
executorch::runtime::BackendOption& backend_option);

} // namespace qnn
} // namespace backends
} // namespace executorch
1 change: 1 addition & 0 deletions backends/qualcomm/runtime/QnnExecuTorch.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
#define QNN_RUNTIME_LPAI_CLIENT_PERF_TYPE "qnn_runtime_lpai_client_perf_type"
#define QNN_RUNTIME_LPAI_AFFINITY "qnn_runtime_lpai_affinity"
#define QNN_RUNTIME_LPAI_CORE_SELECTION "qnn_runtime_lpai_core_selection"
#define QNN_RUNTIME_HEAP_PROFILING_PATH "qnn_runtime_heap_profiling_path"

#ifdef __cplusplus
extern "C" {
Expand Down
11 changes: 11 additions & 0 deletions backends/qualcomm/runtime/QnnExecuTorchBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,13 @@ executorch::runtime::Error QnnExecuTorchBackend::set_option(
qnn_runtime_lpai_core_selection_.value = *val;
qnn_runtime_lpai_core_selection_.is_set = true;
}
} else if (strcmp(option.key, QNN_RUNTIME_HEAP_PROFILING_PATH) == 0) {
if (auto* val =
std::get_if<std::array<char, runtime::kMaxOptionValueLength>>(
&option.value)) {
qnn_runtime_heap_profiling_path_.value = *val;
qnn_runtime_heap_profiling_path_.is_set = true;
}
} else {
ET_LOG(
Error,
Expand Down Expand Up @@ -303,6 +310,10 @@ executorch::runtime::Error QnnExecuTorchBackend::get_option(
strcmp(backend_options[i].key, QNN_RUNTIME_LPAI_CORE_SELECTION) == 0 &&
qnn_runtime_lpai_core_selection_.is_set) {
backend_options[i].value = qnn_runtime_lpai_core_selection_.value;
} else if (
strcmp(backend_options[i].key, QNN_RUNTIME_HEAP_PROFILING_PATH) == 0 &&
qnn_runtime_heap_profiling_path_.is_set) {
backend_options[i].value = qnn_runtime_heap_profiling_path_.value;
} else {
// either runtime never called set_option or key does not exist
matches--;
Expand Down
1 change: 1 addition & 0 deletions backends/qualcomm/runtime/QnnExecuTorchBackend.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class QnnExecuTorchBackend final
RuntimeOption qnn_runtime_lpai_client_perf_type_{false, 0};
RuntimeOption qnn_runtime_lpai_affinity_{false, 0};
RuntimeOption qnn_runtime_lpai_core_selection_{false, 0};
RuntimeOption qnn_runtime_heap_profiling_path_{false, {}};
};

} // namespace qnn
Expand Down
3 changes: 2 additions & 1 deletion backends/qualcomm/runtime/backends/QnnBackendFactory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ std::unique_ptr<BackendConfigParameters> QnnBackendFactory::Create(
qnn_device_ptr,
backend_params->qnn_backend_cache_ptr_.get(),
htp_options,
qnn_dlc_manager);
qnn_dlc_manager,
get_option(options->profile_level(), QNN_RUNTIME_PROFILE_LEVEL));

backend_params->qnn_graph_ptr_ = std::make_unique<HtpGraph>(
implementation_ptr,
Expand Down
73 changes: 70 additions & 3 deletions backends/qualcomm/runtime/backends/QnnContextCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,26 +6,63 @@
* LICENSE file in the root directory of this source tree.
*/

#include <executorch/backends/qualcomm/runtime/QnnBackendOptions.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnContextCommon.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnDlcManager.h>

namespace executorch {
namespace backends {
namespace qnn {

std::mutex QnnContext::htp_context_mutex_;
int QnnContext::htp_context_count_{0};

void QnnContext::WriteHeapProfile() {
executorch::runtime::BackendOption backend_option;
std::string heap_profiling_path;
if (get_runtime_option(QNN_RUNTIME_HEAP_PROFILING_PATH, backend_option) ==
Error::Ok) {
auto* arr = std::get_if<std::array<char, runtime::kMaxOptionValueLength>>(
&backend_option.value);
if (arr) {
heap_profiling_path = arr->data();
}
}
Qnn_ErrorHandle_t error_profile =
qnn_profiler_->ProfileDataToFile(heap_profiling_path);
if (error_profile != QNN_SUCCESS) {
QNN_EXECUTORCH_LOG_ERROR(
"Failed to profile. Cannot get profile from handle. Error %d",
QNN_GET_ERROR_CODE(error_profile));
}
}

QnnContext::~QnnContext() {
const QnnInterface& qnn_interface = implementation_->GetQnnInterface();
Qnn_ErrorHandle_t error = QNN_SUCCESS;

if (handle_ != nullptr) {
QNN_EXECUTORCH_LOG_INFO("Destroy Qnn context");
error = qnn_interface.qnn_context_free(handle_, /*profile=*/nullptr);

bool do_heap_profile = false;
{
std::lock_guard<std::mutex> lock(htp_context_mutex_);
if (is_htp_backend_ && htp_context_count_ > 0 && need_to_profile_) {
--htp_context_count_;
do_heap_profile = (htp_context_count_ == 0);
}
}
error = qnn_interface.qnn_context_free(
handle_, do_heap_profile ? qnn_profiler_->GetHandle() : nullptr);
if (error != QNN_SUCCESS) {
QNN_EXECUTORCH_LOG_ERROR(
"Failed to free QNN "
"context_handle_. Backend "
"ID %u, error %d",
qnn_interface.GetBackendId(),
QNN_GET_ERROR_CODE(error));
} else if (do_heap_profile) {
WriteHeapProfile();
}
handle_ = nullptr;
}
Expand All @@ -45,21 +82,51 @@ Error QnnContext::Configure() {
if (cache_->GetCacheState() == QnnBackendCache::DESERIALIZE) {
const QnnExecuTorchContextBinary& qnn_context_blob =
cache_->GetQnnContextBlob();
/*
Total DSP heap usage can be measured in two conditions, first context
creation and last context free. By the QNN documentation, we need to insert
profileHandle in qnn_context_create_from_binary when creating first context
and closing last context.

Limitations are two:
1.Only supported on Android and QNX platforms.
2.By enabling this feature initialization and cleanup time might be
impacted.
*/

bool do_heap_profile = false;
{
std::lock_guard<std::mutex> lock(htp_context_mutex_);
do_heap_profile =
is_htp_backend_ && (htp_context_count_ == 0) && need_to_profile_;
if (is_htp_backend_) {
++htp_context_count_;
}
}

error = qnn_interface.qnn_context_create_from_binary(
backend_->GetHandle(),
device_->GetHandle(),
temp_context_config.empty() ? nullptr : temp_context_config.data(),
(temp_context_config.empty() ? nullptr : temp_context_config.data()),
static_cast<uint8_t*>(qnn_context_blob.buffer),
qnn_context_blob.nbytes,
&handle_,
/*profile=*/nullptr);
do_heap_profile ? qnn_profiler_->GetHandle() : nullptr);
if (error != QNN_SUCCESS) {
QNN_EXECUTORCH_LOG_ERROR(
"Can't create context from "
"binary. Error %d.",
QNN_GET_ERROR_CODE(error));
// Rollback the count since context creation failed
{
std::lock_guard<std::mutex> lock(htp_context_mutex_);
if (is_htp_backend_ && htp_context_count_ > 0) {
--htp_context_count_;
}
}
return Error::Internal;
} else if (do_heap_profile) {
WriteHeapProfile();
}
} else if (
cache_->GetCacheState() == QnnBackendCache::SERIALIZE ||
Expand Down
25 changes: 23 additions & 2 deletions backends/qualcomm/runtime/backends/QnnContextCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,10 @@
#include <executorch/backends/qualcomm/runtime/backends/QnnCustomProtocol.h>
#include <executorch/backends/qualcomm/runtime/backends/QnnDeviceCommon.h>

#include <executorch/backends/qualcomm/runtime/backends/QnnProfiler.h>

#include <memory>
#include <mutex>

namespace executorch {
namespace backends {
Expand All @@ -28,13 +31,23 @@ class QnnContext {
QnnBackend* backend,
QnnDevice* device,
QnnBackendCache* cache,
QnnDlcManager* qnn_dlc_manager)
QnnDlcManager* qnn_dlc_manager,
const QnnExecuTorchProfileLevel& profile_level)
: handle_(nullptr),
implementation_(implementation),
backend_(backend),
device_(device),
cache_(cache),
qnn_dlc_manager_(qnn_dlc_manager) {}
qnn_dlc_manager_(qnn_dlc_manager),
profile_level_(profile_level),
is_htp_backend_(
implementation->GetQnnInterface().GetBackendId() ==
QNN_BACKEND_ID_HTP),
need_to_profile_(
profile_level != QnnExecuTorchProfileLevel::kProfileOff) {
qnn_profiler_ =
std::make_unique<QnnProfile>(implementation_, backend_, profile_level_);
}

virtual ~QnnContext();

Expand Down Expand Up @@ -73,13 +86,21 @@ class QnnContext {
};

private:
void WriteHeapProfile();
Qnn_ContextHandle_t handle_;
QnnImplementation* implementation_;
QnnBackend* backend_;
QnnDevice* device_;
QnnBackendCache* cache_;
QnnContextCustomProtocol qnn_context_custom_protocol_;
QnnDlcManager* qnn_dlc_manager_;

QnnExecuTorchProfileLevel profile_level_;
std::unique_ptr<QnnProfile> qnn_profiler_;
bool is_htp_backend_;
bool need_to_profile_;
static std::mutex htp_context_mutex_;
static int htp_context_count_;
};
} // namespace qnn
} // namespace backends
Expand Down
Loading
Loading