Skip to content

Commit cfb386c

Browse files
committed
fix batch size
1 parent a7813c7 commit cfb386c

1 file changed

Lines changed: 3 additions & 3 deletions

File tree

common/speculative.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -401,11 +401,11 @@ struct common_speculative_state_mtp : public common_speculative_impl {
401401

402402
n_embd = llama_model_n_embd(llama_get_model(ctx_dft));
403403

404-
const int32_t n_ub = (int32_t) llama_n_ubatch(ctx_dft);
405-
batch = llama_batch_init(/*n_tokens=*/ n_ub, /*embd=*/ n_embd, /*n_seq_max=*/ 1);
404+
const int32_t n_b = (int32_t) llama_n_batch(ctx_dft);
405+
batch = llama_batch_init(/*n_tokens=*/ n_b, /*embd=*/ n_embd, /*n_seq_max=*/ 1);
406406
// llama_batch_init allocates only one of token/embd; MTP needs both.
407407
// TODO: fix, how to call without malloc
408-
batch.token = (llama_token *) malloc(sizeof(llama_token) * n_ub);
408+
batch.token = (llama_token *) malloc(sizeof(llama_token) * n_b);
409409

410410
smpls.resize(n_seq);
411411
for (auto & s : smpls) {

0 commit comments

Comments
 (0)