Skip to content

Commit 3c81c8d

Browse files
authored
server : print graphs reused in slot timings (#23279)
Add graphs reused counter to the per-slot timing output, printed via llama_perf_context(). Assisted-by: llama.cpp:local pi Co-authored-by: ggerganov <ggerganov@users.noreply.github.com>
1 parent cd963fe commit 3c81c8d

1 file changed

Lines changed: 15 additions & 9 deletions

File tree

tools/server/server-context.cpp

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -467,20 +467,26 @@ struct server_slot {
467467
const double n_gen_second = 1e3 / t_token_generation * n_decoded;
468468

469469
SLT_INF(*this,
470-
"\n"
471-
"prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n"
472-
" eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n"
470+
"prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
471+
t_prompt_processing, n_prompt_tokens_processed, t_prompt, n_prompt_second);
472+
473+
SLT_INF(*this,
474+
" eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n",
475+
t_token_generation, n_decoded, t_gen, n_gen_second);
476+
477+
SLT_INF(*this,
473478
" total time = %10.2f ms / %5d tokens\n",
474-
t_prompt_processing, n_prompt_tokens_processed, t_prompt, n_prompt_second,
475-
t_token_generation, n_decoded, t_gen, n_gen_second,
476479
t_prompt_processing + t_token_generation, n_prompt_tokens_processed + n_decoded);
477480

481+
SLT_INF(*this,
482+
" graphs reused = %10d\n",
483+
llama_perf_context(ctx_tgt).n_reused);
484+
478485
if (n_draft_total > 0) {
479486
const float draft_ratio = (float) n_draft_accepted / n_draft_total;
480-
SLT_CNT(*this,
481-
"draft acceptance rate = %0.5f (%5d accepted / %5d generated)\n",
482-
draft_ratio, n_draft_accepted, n_draft_total
483-
);
487+
SLT_INF(*this,
488+
"draft acceptance = %0.5f (%5d accepted / %5d generated)\n",
489+
draft_ratio, n_draft_accepted, n_draft_total);
484490
}
485491

486492
common_speculative_print_stats(spec);

0 commit comments

Comments
 (0)