Skip to content

Commit 5baf3f9

Browse files
authored
Qualcomm AI Engine Direct - Support MSVC-compatible code (pytorch#19686)
## Summary ### 1. Remove the **designated initializers** for C++ Why it compiles on Linux but not on Windows MSVC? - Designated initializers for C++ aggregates were standardized in C++20. GCC and Clang have supported them as a C++11/14/17 extension — they silently accept the syntax even when compiling in `-std=c++17` mode. MSVC is strictly conformant: it only accepts designated initializers when `/std:c++20` (or `/std:c++latest`) is active. ### 2. Remove the **GNU statement expressions** Why it compiles on Linux but not on Windows MSVC? - The GNU statement expression is a GNU C / GNU C++ language extension that lets you treat a block of statements as if it were a single expression that produces a value. It is not part of standard C or C++, but it is widely supported by GCC and Clang. MSVC does not support it. ### 3. Replace `constexpr` inside the lambda `[&]` - `ET_INTERNAL_SWITCH` wraps the `NAME` in `[&] { ... }()`. The `[&]` capture means the lambda captures all local variables by reference, including `NAME`. - However, inside the lambda `[&]`, `NAME` is accessed via the closure's implicit `this` pointer — it is `(*this).name` in the closure's internal representation to capture variables by reference. Dereferencing `this` is not a constant expression because `this` is a runtime pointer to the closure object, which is not a constant expression and only exists at runtime. Why it compiles on Linux but not on Windows MSVC? - GCC and Clang are more permissive here. They apply a special rule: if the captured variable is itself `constexpr` and its value is a compile-time constant, they allow it to be used as a constant expression inside the lambda, effectively treating the capture as a constant propagation rather than a runtime dereference. This is a quality-of-implementation extension beyond what the standard strictly requires. ### 4. Replace the `__attribute__((visibility("default")))` with corresponding MSVC-compatible syntax - Use Microsoft-specific C/C++ extensions `__declspec(dllexport)` and `__declspec(dllimport)` to control symbol visibility when working with Windows DLLs.
1 parent 359ac31 commit 5baf3f9

18 files changed

Lines changed: 127 additions & 95 deletions

File tree

backends/qualcomm/aot/wrappers/QuantizeParamsWrapper.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,9 +70,9 @@ class UndefinedQuantizeParamsWrapper final : public QuantizeParamsWrapper {
7070
}
7171

7272
Qnn_QuantizeParams_t CreateQuantizeParams() override {
73-
Qnn_QuantizeParams_t rval = {
74-
.encodingDefinition = GetEncodingDefinition(),
75-
.quantizationEncoding = GetQuantizationEncoding()};
73+
Qnn_QuantizeParams_t rval;
74+
rval.encodingDefinition = GetEncodingDefinition();
75+
rval.quantizationEncoding = GetQuantizationEncoding();
7676
return rval;
7777
}
7878
};

backends/qualcomm/aot/wrappers/TensorWrapper.h

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -130,9 +130,12 @@ class TensorWrapper {
130130
std::unique_ptr<char[]> owned_data_;
131131
bool created_{false};
132132

133-
Qnn_Tensor_t tensor_ = {
134-
.version = QNN_TENSOR_VERSION_2,
135-
.v2 = QNN_TENSOR_V2_INIT};
133+
Qnn_Tensor_t tensor_ = []() noexcept {
134+
Qnn_Tensor_t t{};
135+
t.version = QNN_TENSOR_VERSION_2;
136+
t.v2 = QNN_TENSOR_V2_INIT;
137+
return t;
138+
}();
136139
};
137140
// base function for Create TensorWrapper
138141
std::shared_ptr<TensorWrapper> CreateTensorWrapper(

backends/qualcomm/runtime/QnnExecuTorch.h

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,16 @@
2727
#define QNN_RUNTIME_LPAI_CORE_SELECTION "qnn_runtime_lpai_core_selection"
2828
#define QNN_RUNTIME_HEAP_PROFILING_PATH "qnn_runtime_heap_profiling_path"
2929

30+
#if defined(_MSC_VER)
31+
#if defined(QNN_EXECUTORCH_BUILDING_DLL)
32+
#define QNN_EXECUTORCH_EXPORT __declspec(dllexport)
33+
#else
34+
#define QNN_EXECUTORCH_EXPORT __declspec(dllimport)
35+
#endif
36+
#else
37+
#define QNN_EXECUTORCH_EXPORT __attribute__((__visibility__("default")))
38+
#endif
39+
3040
#ifdef __cplusplus
3141
extern "C" {
3242
#endif // __cplusplus
@@ -69,18 +79,18 @@ struct CustomMemTensorInfo {
6979
/// alignment as MemoryAllocator::kDefaultAlignment.
7080
/// See runtime/core/memory_allocator.h. The function returns a valid pointer
7181
/// if allocation is successful.
72-
__attribute__((__visibility__("default"))) void* QnnExecuTorchAllocCustomMem(
82+
QNN_EXECUTORCH_EXPORT void* QnnExecuTorchAllocCustomMem(
7383
size_t bytes,
7484
size_t alignment);
7585

7686
/// Add tensor to custom memory with custom type descriptor. Create memory
7787
/// handle to tensor wrapper during execution
78-
__attribute__((__visibility__("default"))) void
79-
QnnExecuTorchAddCustomMemTensorAddr(void* tensor_addr, void* custom_mem);
88+
QNN_EXECUTORCH_EXPORT void QnnExecuTorchAddCustomMemTensorAddr(
89+
void* tensor_addr,
90+
void* custom_mem);
8091

8192
/// Free the allocated shared memory.
82-
__attribute__((__visibility__("default"))) void QnnExecuTorchFreeCustomMem(
83-
void* buffer_ptr);
93+
QNN_EXECUTORCH_EXPORT void QnnExecuTorchFreeCustomMem(void* buffer_ptr);
8494

8595
#ifdef __cplusplus
8696
}

backends/qualcomm/runtime/QnnManager.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77
*/
88

99
#include <executorch/backends/qualcomm/runtime/QnnBackendOptions.h>
10+
#include <executorch/backends/qualcomm/runtime/QnnExecuTorch.h>
1011
#include <executorch/backends/qualcomm/runtime/QnnManager.h>
1112
#include <executorch/backends/qualcomm/runtime/SharedBuffer.h>
1213
#include <executorch/backends/qualcomm/runtime/backends/QnnBackendCommon.h>

examples/models/llama/main.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -199,8 +199,8 @@ int32_t main(int32_t argc, char** argv) {
199199
}
200200
}
201201
// generate
202-
executorch::extension::llm::GenerationConfig config{
203-
.temperature = temperature};
202+
executorch::extension::llm::GenerationConfig config{};
203+
config.temperature = temperature;
204204

205205
config.ignore_eos = FLAGS_ignore_eos;
206206
config.num_bos = FLAGS_num_bos;

examples/qualcomm/oss_scripts/llama/runner/attention_sink_rope_runner.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,9 @@ Error AttentionSinkRopeRunner::load(
4040
for (const std::string& method_name : method_names) {
4141
ET_CHECK_OK_OR_RETURN_ERROR(module_->load_method(method_name));
4242
}
43-
eviction_batch_size_ = ET_UNWRAP(module_->get("get_eviction_batch_size"))
44-
.toScalar()
45-
.to<int64_t>();
43+
ET_UNWRAP(
44+
eviction_batch_size_evalue__, module_->get("get_eviction_batch_size"));
45+
eviction_batch_size_ = eviction_batch_size_evalue__.toScalar().to<int64_t>();
4646
return Error::Ok;
4747
}
4848

examples/qualcomm/oss_scripts/llama/runner/lhd_token_generator.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -347,8 +347,9 @@ Result<int64_t> LhdTokenGenerator::generate(
347347
shifted_pos++;
348348

349349
// print the token as string, decode it with the Tokenizer object
350-
token_callback(
351-
ET_UNWRAP_TOKENIZER(this->tokenizer_->decode(prev_token, cur_token)));
350+
ET_UNWRAP_TOKENIZER(
351+
decoded_token__, this->tokenizer_->decode(prev_token, cur_token));
352+
token_callback(decoded_token__);
352353

353354
// data-dependent terminating condition: we have n_eos_ number of EOS
354355
if (this->eos_ids_->count(cur_token) > 0) {

examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_lhd_token_generator.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -332,8 +332,9 @@ Result<int64_t> MultimodalLhdTokenGenerator::generate(
332332
pos++;
333333

334334
// print the token as string, decode it with the Tokenizer object
335-
token_callback(
336-
ET_UNWRAP_TOKENIZER(this->tokenizer_->decode(prev_token, cur_token)));
335+
ET_UNWRAP_TOKENIZER(
336+
decoded_token__, this->tokenizer_->decode(prev_token, cur_token));
337+
token_callback(decoded_token__);
337338

338339
// data-dependent terminating condition: we have n_eos_ number of EOS
339340
if (this->eos_ids_->count(cur_token) > 0) {

examples/qualcomm/oss_scripts/llama/runner/multimodal_runner/multimodal_runner.cpp

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,8 @@ Error QNNMultimodalRunner::load() {
223223

224224
ET_LOG(Info, "Reading metadata from model");
225225
// retrieve any method meta, can be either prefill or kv
226-
int64_t num_layers =
227-
ET_UNWRAP(text_decoder_->get("get_n_layers")).toScalar().to<int64_t>();
226+
ET_UNWRAP(num_layers_evalue__, text_decoder_->get("get_n_layers"));
227+
int64_t num_layers = num_layers_evalue__.toScalar().to<int64_t>();
228228

229229
ET_CHECK_MSG(num_layers != -1, "Could not retrieve num layers");
230230
// k_cache: [1, n_heads, head_dim, seq_len]
@@ -292,8 +292,9 @@ Error QNNMultimodalRunner::load() {
292292
// attention
293293
int32_t sliding_window = context_len_;
294294
if (text_decoder_->method_names()->count("get_sliding_window") > 0) {
295-
sliding_window =
296-
ET_UNWRAP(text_decoder_->get("get_sliding_window")).toInt();
295+
ET_UNWRAP(
296+
sliding_window_evalue__, text_decoder_->get("get_sliding_window"));
297+
sliding_window = sliding_window_evalue__.toInt();
297298
}
298299
kv_manager_ = std::make_unique<KVManager>(
299300
KVManager::Metadata{
@@ -527,8 +528,9 @@ executorch::runtime::Error QNNMultimodalRunner::generate(
527528
// print the first token from prefill. No prev_token so use cur_token for
528529
// it.
529530
if (token_callback) {
530-
token_callback(
531-
ET_UNWRAP_TOKENIZER(tokenizer_->decode(cur_token, cur_token)));
531+
ET_UNWRAP_TOKENIZER(
532+
decoded_token__, tokenizer_->decode(cur_token, cur_token));
533+
token_callback(decoded_token__);
532534
}
533535
ET_LOG(
534536
Info,
@@ -538,8 +540,15 @@ executorch::runtime::Error QNNMultimodalRunner::generate(
538540
// start the main loop
539541
prompt_tokens.push_back(cur_token);
540542

541-
int64_t num_generated_tokens = ET_UNWRAP(token_generator_->generate(
542-
prompt_tokens, cur_pos_, seq_len, token_callback, dump_logits, nullptr));
543+
ET_UNWRAP(
544+
num_generated_tokens,
545+
token_generator_->generate(
546+
prompt_tokens,
547+
cur_pos_,
548+
seq_len,
549+
token_callback,
550+
dump_logits,
551+
nullptr));
543552
stats_.inference_end_ms = time_in_ms();
544553
ET_LOG(
545554
Info,

examples/qualcomm/oss_scripts/llama/runner/runner.cpp

Lines changed: 16 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -227,8 +227,8 @@ Error Runner::load() {
227227

228228
ET_LOG(Info, "Reading metadata from model");
229229
// retrieve any method meta, can be either prefill or kv
230-
int64_t num_layers =
231-
ET_UNWRAP(module_->get("get_n_layers")).toScalar().to<int64_t>();
230+
ET_UNWRAP(num_layers_evalue__, module_->get("get_n_layers"));
231+
int64_t num_layers = num_layers_evalue__.toScalar().to<int64_t>();
232232

233233
ET_CHECK_MSG(num_layers != -1, "Could not retrieve num layers");
234234
// k_cache: [1, n_heads, head_dim, seq_len]
@@ -270,7 +270,8 @@ Error Runner::load() {
270270
// attention
271271
int32_t sliding_window = context_len_;
272272
if (module_->method_names()->count("get_sliding_window") > 0) {
273-
sliding_window = ET_UNWRAP(module_->get("get_sliding_window")).toInt();
273+
ET_UNWRAP(sliding_window_evalue__, module_->get("get_sliding_window"));
274+
sliding_window = sliding_window_evalue__.toInt();
274275
}
275276
kv_manager_ = std::make_unique<KVManager>(
276277
KVManager::Metadata{
@@ -461,8 +462,9 @@ Error Runner::generate_from_prompt_or_file(
461462
// print the first token from prefill. No prev_token so use cur_token for
462463
// it.
463464
if (token_callback) {
464-
token_callback(
465-
ET_UNWRAP_TOKENIZER(tokenizer_->decode(cur_token, cur_token)));
465+
ET_UNWRAP_TOKENIZER(
466+
decoded_token__, tokenizer_->decode(cur_token, cur_token));
467+
token_callback(decoded_token__);
466468
}
467469
ET_LOG(
468470
Info,
@@ -471,13 +473,15 @@ Error Runner::generate_from_prompt_or_file(
471473

472474
// start the main loop
473475
prompt_tokens.push_back(cur_token);
474-
int64_t num_generated_tokens = ET_UNWRAP(token_generator_->generate(
475-
prompt_tokens,
476-
cur_pos_,
477-
seq_len,
478-
token_callback,
479-
dump_logits,
480-
attention_sink_rope_runner_.get()));
476+
ET_UNWRAP(
477+
num_generated_tokens,
478+
token_generator_->generate(
479+
prompt_tokens,
480+
cur_pos_,
481+
seq_len,
482+
token_callback,
483+
dump_logits,
484+
attention_sink_rope_runner_.get()));
481485
stats_.inference_end_ms = time_in_ms();
482486
ET_LOG(
483487
Info,

0 commit comments

Comments
 (0)