Skip to content

Commit 8d417c3

Browse files
committed
slop: scope reasoning options
1 parent 0ee7885 commit 8d417c3

4 files changed

Lines changed: 40 additions & 0 deletions

File tree

tools/server/server-context.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3622,6 +3622,11 @@ void server_routes::init_routes() {
36223622
if (meta_resolved->model_path == ctx_server_ref.get_current_model_path()) return;
36233623
common_params swap_params = params;
36243624
swap_params.model.path = meta_resolved->model_path;
3625+
// Apply model preset (e.g. --reasoning, --chat-template-kwargs, etc.)
3626+
auto preset = model_manager->get_preset(requested_model);
3627+
if (preset.has_value()) {
3628+
preset->apply_to_params(swap_params);
3629+
}
36253630
SRV_INF("swapping to model '%s' (path: %s)\n", requested_model.c_str(), swap_params.model.path.c_str());
36263631
ctx_server_ref.swap_model(swap_params);
36273632
meta = std::make_unique<server_context_meta>(get_ctx_meta());

tools/server/server-model-manager.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,13 @@ void server_model_manager::load_locked(const std::string& name, server_context&
183183
info.status = SERVER_MODEL_STATUS_LOADING;
184184
info.last_used = ggml_time_ms();
185185

186+
// Apply model preset to params (e.g. --reasoning, --chat-template-kwargs, etc.)
187+
// This is a safety net: the caller may have already applied the preset,
188+
// but we ensure it's applied here if the caller didn't.
189+
if (!info.preset.options.empty()) {
190+
info.preset.apply_to_params(params);
191+
}
192+
186193
// Use this model's path, not the global params path
187194
std::string saved_path = params.model.path;
188195
params.model.path = info.model_path;
@@ -312,6 +319,19 @@ void server_model_manager::cache_all() {
312319
}
313320
}
314321

322+
std::optional<common_preset> server_model_manager::get_preset(const std::string& name) const {
323+
std::lock_guard<std::mutex> lk(mutex_);
324+
std::string canonical = resolve_model_name(name);
325+
if (canonical.empty()) {
326+
return std::nullopt;
327+
}
328+
auto it = mapping_.find(canonical);
329+
if (it == mapping_.end()) {
330+
return std::nullopt;
331+
}
332+
return it->second.preset;
333+
}
334+
315335
void server_model_manager::unload_lru(server_context& ctx) {
316336
std::string lru = find_lru_model();
317337
if (!lru.empty()) {

tools/server/server-model-manager.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ struct server_model_info {
3434
int64_t last_used = 0; // for LRU eviction (milliseconds since epoch)
3535
int exit_code = 0; // exit code if failed
3636
bool cached = false; // GGUF file is cached in page cache for fast swapping
37+
common_preset preset; // per-model preset for applying to common_params at load/swap time
3738

3839
bool is_ready() const {
3940
return status == SERVER_MODEL_STATUS_LOADED;
@@ -99,6 +100,10 @@ class server_model_manager {
99100
// Cache all models' GGUF files in page cache
100101
void cache_all();
101102

103+
// Get the per-model preset for a given model name (resolves aliases)
104+
// Returns empty optional if model not found or no preset set
105+
std::optional<common_preset> get_preset(const std::string& name) const;
106+
102107
private:
103108
// Find the LRU model name (must be called with mutex_ held)
104109
// Returns empty string if no model to evict

tools/server/server.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,7 @@ int main(int argc, char ** argv) {
409409
base_info.tags = params.model_tags;
410410
base_info.status = SERVER_MODEL_STATUS_LOADED;
411411
base_info.last_used = ggml_time_ms();
412+
base_info.preset = cli_load_result.base_preset;
412413
if (!base_info.name.empty()) {
413414
model_manager->add_model(std::move(base_info));
414415
}
@@ -462,6 +463,7 @@ int main(int argc, char ** argv) {
462463
}
463464
}
464465

466+
info.preset = mp;
465467
SRV_INF("registering model '%s' (status=%d)\n", info.name.c_str(), (int)info.status);
466468
model_manager->add_model(std::move(info));
467469
}
@@ -712,6 +714,7 @@ int main(int argc, char ** argv) {
712714
}
713715
}
714716

717+
info.preset = preset;
715718
model_manager->add_model(std::move(info));
716719

717720
// Check autoload
@@ -732,6 +735,7 @@ int main(int argc, char ** argv) {
732735
base_info.tags = params.model_tags;
733736
base_info.status = SERVER_MODEL_STATUS_LOADED;
734737
base_info.last_used = ggml_time_ms();
738+
base_info.preset = base_preset;
735739
model_manager->add_model(std::move(base_info));
736740
}
737741

@@ -860,6 +864,12 @@ int main(int argc, char ** argv) {
860864
load_params.model.path = path;
861865
}
862866

867+
// Apply model preset (e.g. --reasoning, --chat-template-kwargs, etc.)
868+
auto preset = model_mgr->get_preset(name);
869+
if (preset.has_value()) {
870+
preset->apply_to_params(load_params);
871+
}
872+
863873
// Load the model via model manager (handles LRU eviction)
864874
model_mgr->load(name, ctx_server, load_params);
865875
res_ok(res, {{"success", true}});

0 commit comments

Comments
 (0)