Skip to content

Commit 0c6b7a0

Browse files
committed
cont : minor
1 parent 2a0a0c7 commit 0c6b7a0

3 files changed

Lines changed: 14 additions & 11 deletions

File tree

common/speculative.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,22 +1252,22 @@ struct common_speculative_session::impl {
12521252
}
12531253

12541254
bool accept(llama_tokens ids) {
1255-
has_partial = false;
1256-
12571255
LOG_WRN("%s: n_draft=%zu, ids.size=%zu\n", __func__, draft.size(), ids.size());
12581256

1257+
has_partial = false;
1258+
12591259
if (ids.size() < draft.size() + 1) {
12601260
// the main model rejected some tokens
1261-
LOG_DBG("%s: partial acceptance: %zu < %zu\n", __func__, draft.size(), draft.size());
1262-
12631261
if (params.use_checkpoints) {
1264-
// we shorten the draft and retry
1262+
// shorten the draft to the number of accepted tokens
12651263
draft.resize(ids.size() - 1);
12661264

12671265
has_partial = true;
12681266

12691267
return false;
12701268
}
1269+
1270+
LOG_DBG("%s: partial acceptance: %zu < %zu\n", __func__, draft.size(), draft.size());
12711271
}
12721272

12731273
draft = std::move(ids);

tools/server/server-context.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,10 +84,8 @@ struct server_slot {
8484
mtmd_context * mctx = nullptr;
8585

8686
// speculative decoding
87-
server_prompt_checkpoint spec_ckpt;
88-
8987
std::vector<int32_t> spec_i_batch;
90-
88+
server_prompt_checkpoint spec_ckpt;
9189
std::unique_ptr<common_speculative_session> spec = nullptr;
9290

9391
// TODO: move members that belong to the task (such as `generated_text`, `has_new_line`) to task_results_state
@@ -204,8 +202,8 @@ struct server_slot {
204202
n_sent_text = 0;
205203

206204
if (can_speculate()) {
207-
spec_ckpt = {};
208205
spec_i_batch.clear();
206+
spec_ckpt.clear();
209207
spec->clear();
210208
}
211209
generated_tokens.clear();
@@ -2950,9 +2948,7 @@ struct server_context_impl {
29502948
common_sampler_ptr smpl_save(common_sampler_clone(slot.smpl.get()));
29512949

29522950
GGML_ASSERT(slot.spec_i_batch.size() == n_draft + 1);
2953-
29542951
auto accepted = common_sampler_sample_and_accept_n(slot.smpl.get(), slot.ctx, slot.spec_i_batch, draft);
2955-
29562952
slot.spec_i_batch.clear();
29572953

29582954
if (!slot.spec->accept(std::move(accepted))) {

tools/server/server-task.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -580,6 +580,13 @@ struct server_prompt_checkpoint {
580580
bool empty() const {
581581
return data.empty();
582582
}
583+
584+
void clear() {
585+
pos_min = 0;
586+
pos_max = 0;
587+
n_tokens = 0;
588+
data.clear();
589+
}
583590
};
584591

585592
struct server_prompt {

0 commit comments

Comments
 (0)