Skip to content

Commit a3dd0fa

Browse files
authored
qwen3_5_moe: guard new cudaMemGetInfo blocks behind EXECUTORCH_BUILD_CUDA (#19265)
### Summary #19228 added structured GPU memory tracking to the qwen3_5_moe runner but did not wrap the new cudaMemGetInfo blocks in the existing EXECUTORCH_BUILD_CUDA guard that the rest of the file uses for CUDA-only APIs. The same main.cpp is built for the Metal target where the CUDA runtime headers are not available, so the new blocks failed to compile on macOS: error: use of undeclared identifier 'cudaMemGetInfo' if (cudaMemGetInfo(&free, &total) == cudaSuccess) { Wrap the three new scoped blocks in #ifdef EXECUTORCH_BUILD_CUDA, matching the existing guard pattern at lines 27, 68, 113, 168, and 184. The stats struct fields they would have populated (gpu_free_before_load_bytes, gpu_free_after_load_bytes, gpu_free_after_generate_bytes, gpu_peak_usage_mb) default to their sentinel values on non-CUDA builds, so the rest of the runner's stats reporting tolerates their absence. Authored with Claude Code. ### Test plan CI
1 parent 8464b47 commit a3dd0fa

1 file changed

Lines changed: 6 additions & 0 deletions

File tree

examples/models/qwen3_5_moe/main.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -131,6 +131,7 @@ int main(int argc, char** argv) {
131131
return 1;
132132
}
133133

134+
#ifdef EXECUTORCH_BUILD_CUDA
134135
// GPU memory: before load
135136
{
136137
size_t free = 0, total = 0;
@@ -139,6 +140,7 @@ int main(int argc, char** argv) {
139140
stats.gpu_free_before_load_bytes = free;
140141
}
141142
}
143+
#endif
142144

143145
stats.model_load_start_ms = llm::time_in_ms();
144146

@@ -224,13 +226,15 @@ int main(int argc, char** argv) {
224226

225227
stats.model_load_end_ms = llm::time_in_ms();
226228

229+
#ifdef EXECUTORCH_BUILD_CUDA
227230
// GPU memory: after load
228231
{
229232
size_t free = 0, total = 0;
230233
if (cudaMemGetInfo(&free, &total) == cudaSuccess) {
231234
stats.gpu_free_after_load_bytes = free;
232235
}
233236
}
237+
#endif
234238

235239
// Get EOS ids
236240
auto eos_ids = llm::get_eos_ids(tokenizer.get(), module.get());
@@ -397,6 +401,7 @@ int main(int argc, char** argv) {
397401
int64_t num_generated = pos - num_prompt_tokens;
398402
stats.num_generated_tokens = num_generated;
399403

404+
#ifdef EXECUTORCH_BUILD_CUDA
400405
// GPU memory: after generate + peak usage
401406
{
402407
size_t free = 0, total = 0;
@@ -412,6 +417,7 @@ int main(int argc, char** argv) {
412417
stats.gpu_peak_usage_mb = (double)(total - min_free) / 1024.0 / 1024.0;
413418
}
414419
}
420+
#endif
415421

416422
printf("\n");
417423

0 commit comments

Comments
 (0)