We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 0b60ba6 commit f2299faCopy full SHA for f2299fa
1 file changed
tools/server/server-context.cpp
@@ -251,8 +251,9 @@ struct server_slot {
251
return state != SLOT_STATE_IDLE;
252
}
253
254
+ // Checks if a draft model is active or self-speculation using context-tokens
255
bool can_speculate() const {
- return ctx_dft;
256
+ return ctx_dft || task->params.speculative.use_self;
257
258
259
void add_token(const completion_token_output & token) {
@@ -264,7 +265,7 @@ struct server_slot {
264
265
266
267
int get_n_draft_max() const {
- if (!can_speculate() && !task->params.speculative.use_self) {
268
+ if (!can_speculate()) {
269
return 0;
270
271
0 commit comments