Skip to content

Commit 2cda56f

Browse files
committed
slop: scope reasoning options
1 parent af32091 commit 2cda56f

4 files changed

Lines changed: 40 additions & 0 deletions

File tree

tools/server/server-context.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3604,6 +3604,11 @@ void server_routes::init_routes() {
36043604
if (meta_resolved->model_path == ctx_server_ref.get_current_model_path()) return;
36053605
common_params swap_params = params;
36063606
swap_params.model.path = meta_resolved->model_path;
3607+
// Apply model preset (e.g. --reasoning, --chat-template-kwargs, etc.)
3608+
auto preset = model_manager->get_preset(requested_model);
3609+
if (preset.has_value()) {
3610+
preset->apply_to_params(swap_params);
3611+
}
36073612
SRV_INF("swapping to model '%s' (path: %s)\n", requested_model.c_str(), swap_params.model.path.c_str());
36083613
ctx_server_ref.swap_model(swap_params);
36093614
meta = std::make_unique<server_context_meta>(get_ctx_meta());

tools/server/server-model-manager.cpp

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,13 @@ void server_model_manager::load_locked(const std::string& name, server_context&
183183
info.status = SERVER_MODEL_STATUS_LOADING;
184184
info.last_used = ggml_time_ms();
185185

186+
// Apply model preset to params (e.g. --reasoning, --chat-template-kwargs, etc.)
187+
// This is a safety net: the caller may have already applied the preset,
188+
// but we ensure it's applied here if the caller didn't.
189+
if (!info.preset.options.empty()) {
190+
info.preset.apply_to_params(params);
191+
}
192+
186193
// Use this model's path, not the global params path
187194
std::string saved_path = params.model.path;
188195
params.model.path = info.model_path;
@@ -312,6 +319,19 @@ void server_model_manager::cache_all() {
312319
}
313320
}
314321

322+
std::optional<common_preset> server_model_manager::get_preset(const std::string& name) const {
323+
std::lock_guard<std::mutex> lk(mutex_);
324+
std::string canonical = resolve_model_name(name);
325+
if (canonical.empty()) {
326+
return std::nullopt;
327+
}
328+
auto it = mapping_.find(canonical);
329+
if (it == mapping_.end()) {
330+
return std::nullopt;
331+
}
332+
return it->second.preset;
333+
}
334+
315335
void server_model_manager::unload_lru(server_context& ctx) {
316336
std::string lru = find_lru_model();
317337
if (!lru.empty()) {

tools/server/server-model-manager.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ struct server_model_info {
3434
int64_t last_used = 0; // for LRU eviction (milliseconds since epoch)
3535
int exit_code = 0; // exit code if failed
3636
bool cached = false; // GGUF file is cached in page cache for fast swapping
37+
common_preset preset; // per-model preset for applying to common_params at load/swap time
3738

3839
bool is_ready() const {
3940
return status == SERVER_MODEL_STATUS_LOADED;
@@ -99,6 +100,10 @@ class server_model_manager {
99100
// Cache all models' GGUF files in page cache
100101
void cache_all();
101102

103+
// Get the per-model preset for a given model name (resolves aliases)
104+
// Returns empty optional if model not found or no preset set
105+
std::optional<common_preset> get_preset(const std::string& name) const;
106+
102107
private:
103108
// Find the LRU model name (must be called with mutex_ held)
104109
// Returns empty string if no model to evict

tools/server/server.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,7 @@ int main(int argc, char ** argv) {
400400
base_info.tags = params.model_tags;
401401
base_info.status = SERVER_MODEL_STATUS_LOADED;
402402
base_info.last_used = ggml_time_ms();
403+
base_info.preset = cli_load_result.base_preset;
403404
if (!base_info.name.empty()) {
404405
model_manager->add_model(std::move(base_info));
405406
}
@@ -453,6 +454,7 @@ int main(int argc, char ** argv) {
453454
}
454455
}
455456

457+
info.preset = mp;
456458
SRV_INF("registering model '%s' (status=%d)\n", info.name.c_str(), (int)info.status);
457459
model_manager->add_model(std::move(info));
458460
}
@@ -703,6 +705,7 @@ int main(int argc, char ** argv) {
703705
}
704706
}
705707

708+
info.preset = preset;
706709
model_manager->add_model(std::move(info));
707710

708711
// Check autoload
@@ -723,6 +726,7 @@ int main(int argc, char ** argv) {
723726
base_info.tags = params.model_tags;
724727
base_info.status = SERVER_MODEL_STATUS_LOADED;
725728
base_info.last_used = ggml_time_ms();
729+
base_info.preset = base_preset;
726730
model_manager->add_model(std::move(base_info));
727731
}
728732

@@ -851,6 +855,12 @@ int main(int argc, char ** argv) {
851855
load_params.model.path = path;
852856
}
853857

858+
// Apply model preset (e.g. --reasoning, --chat-template-kwargs, etc.)
859+
auto preset = model_mgr->get_preset(name);
860+
if (preset.has_value()) {
861+
preset->apply_to_params(load_params);
862+
}
863+
854864
// Load the model via model manager (handles LRU eviction)
855865
model_mgr->load(name, ctx_server, load_params);
856866
res_ok(res, {{"success", true}});

0 commit comments

Comments
 (0)