File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -401,11 +401,11 @@ struct common_speculative_state_mtp : public common_speculative_impl {
401401
402402 n_embd = llama_model_n_embd (llama_get_model (ctx_dft));
403403
404- const int32_t n_ub = (int32_t ) llama_n_ubatch (ctx_dft);
405- batch = llama_batch_init (/* n_tokens=*/ n_ub , /* embd=*/ n_embd, /* n_seq_max=*/ 1 );
404+ const int32_t n_b = (int32_t ) llama_n_batch (ctx_dft);
405+ batch = llama_batch_init (/* n_tokens=*/ n_b , /* embd=*/ n_embd, /* n_seq_max=*/ 1 );
406406 // llama_batch_init allocates only one of token/embd; MTP needs both.
407407 // TODO: fix, how to call without malloc
408- batch.token = (llama_token *) malloc (sizeof (llama_token) * n_ub );
408+ batch.token = (llama_token *) malloc (sizeof (llama_token) * n_b );
409409
410410 smpls.resize (n_seq);
411411 for (auto & s : smpls) {
You can’t perform that action at this time.
0 commit comments