1616#include < executorch/runtime/platform/log.h>
1717#include < pytorch/tokenizers/hf_tokenizer.h>
1818
19+ #include < cinttypes>
1920#include < fstream>
2021#include < string>
2122#include < vector>
@@ -58,11 +59,13 @@ int main(int argc, char** argv) {
5859
5960 llm::Stats stats;
6061
62+ #ifdef EXECUTORCH_BUILD_CUDA
6163 // GPU memory before load
62- size_t gpu_free_bytes, gpu_total_bytes;
64+ size_t gpu_free_bytes = 0 , gpu_total_bytes = 0 ;
6365 cudaMemGetInfo (&gpu_free_bytes, &gpu_total_bytes);
6466 stats.gpu_total_bytes = gpu_total_bytes;
6567 stats.gpu_free_before_load_bytes = gpu_free_bytes;
68+ #endif
6669
6770 stats.model_load_start_ms = llm::time_in_ms ();
6871
@@ -127,9 +130,11 @@ int main(int argc, char** argv) {
127130
128131 stats.model_load_end_ms = llm::time_in_ms ();
129132
133+ #ifdef EXECUTORCH_BUILD_CUDA
130134 // GPU memory after load
131135 cudaMemGetInfo (&gpu_free_bytes, &gpu_total_bytes);
132136 stats.gpu_free_after_load_bytes = gpu_free_bytes;
137+ #endif
133138
134139 // Get EOS ids
135140 auto eos_ids = llm::get_eos_ids (tokenizer.get (), module .get ());
@@ -155,7 +160,7 @@ int main(int argc, char** argv) {
155160 }
156161 auto prompt_tokens = std::move (*encode_result);
157162 int64_t num_prompt_tokens = prompt_tokens.size ();
158- printf (" Prompt tokens: %ld \n " , num_prompt_tokens);
163+ printf (" Prompt tokens: %" PRId64 " \n " , num_prompt_tokens);
159164
160165 stats.num_prompt_tokens = num_prompt_tokens;
161166 stats.inference_start_ms = llm::time_in_ms ();
@@ -209,7 +214,7 @@ int main(int argc, char** argv) {
209214 double prefill_ms =
210215 (double )(stats.prompt_eval_end_ms - stats.inference_start_ms );
211216 printf (
212- " Prefill: %ld tokens in %.1f ms (%.1f tok/s)\n " ,
217+ " Prefill: %" PRId64 " tokens in %.1f ms (%.1f tok/s)\n " ,
213218 num_prompt_tokens,
214219 prefill_ms,
215220 num_prompt_tokens * 1000.0 / prefill_ms);
@@ -290,17 +295,19 @@ int main(int argc, char** argv) {
290295 double decode_ms =
291296 (double )(stats.inference_end_ms - stats.prompt_eval_end_ms );
292297 printf (
293- " Decode: %ld tokens in %.1f ms (%.1f tok/s)\n " ,
298+ " Decode: %" PRId64 " tokens in %.1f ms (%.1f tok/s)\n " ,
294299 num_generated,
295300 decode_ms,
296301 num_generated * 1000.0 / decode_ms);
297- printf (" Prompt tokens: %ld \n " , num_prompt_tokens);
302+ printf (" Prompt tokens: %" PRId64 " \n " , num_prompt_tokens);
298303
304+ #ifdef EXECUTORCH_BUILD_CUDA
299305 // GPU memory after generation
300306 cudaMemGetInfo (&gpu_free_bytes, &gpu_total_bytes);
301307 stats.gpu_free_after_generate_bytes = gpu_free_bytes;
302308 stats.gpu_peak_usage_mb =
303309 (stats.gpu_total_bytes - gpu_free_bytes) / 1024.0 / 1024.0 ;
310+ #endif
304311
305312 llm::print_report (stats);
306313
0 commit comments