Skip to content

Commit 9e6f86b

Browse files
committed
slop: make sure per-model settings are properly loaded
1 parent 470ecaf commit 9e6f86b

2 files changed

Lines changed: 24 additions & 12 deletions

File tree

tools/server/server-model-manager.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,12 @@ void server_model_manager::unload(const std::string& name, server_context& ctx)
232232
info.status = SERVER_MODEL_STATUS_UNLOADED;
233233
info.last_used = 0;
234234

235-
// Call server_context's unload
236-
ctx.unload_current_model();
235+
// Only destroy the model in ctx_server if this model is the one currently loaded.
236+
// Without this guard, unloading any model marked LOADED would destroy whatever
237+
// ctx_server happens to hold (which may be a different model).
238+
if (ctx.has_model_loaded() && ctx.get_current_model_path() == info.model_path) {
239+
ctx.unload_current_model();
240+
}
237241

238242
cv_.notify_all();
239243
}

tools/server/server.cpp

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -409,7 +409,11 @@ int main(int argc, char ** argv) {
409409
base_info.tags = params.model_tags;
410410
base_info.status = SERVER_MODEL_STATUS_LOADED;
411411
base_info.last_used = ggml_time_ms();
412-
base_info.preset = cli_load_result.base_preset;
412+
// Args after the first -- are this model's per-model preset; args before -- are
413+
// global defaults already in `params`, so on reload we re-apply only this preset.
414+
base_info.preset = cli_load_result.model_presets.empty()
415+
? cli_load_result.base_preset
416+
: cli_load_result.model_presets[0];
413417
if (!base_info.name.empty()) {
414418
model_manager->add_model(std::move(base_info));
415419
}
@@ -510,6 +514,17 @@ int main(int argc, char ** argv) {
510514
params.model.path = cli_base_model_path;
511515
}
512516

517+
// Build the first model's load params: global defaults (`params`, args before --)
518+
// overlaid with its own per-model preset (args in the first -- block). Each model is
519+
// independent — keep `params` unmutated so other models don't inherit this one's args.
520+
common_params first_model_params = params;
521+
if (cli_has_model_presets && !cli_load_result.model_presets.empty()) {
522+
cli_load_result.model_presets[0].apply_to_params(first_model_params);
523+
if (!cli_base_model_path.empty()) {
524+
first_model_params.model.path = cli_base_model_path;
525+
}
526+
}
527+
513528
// load the model
514529
LOG_INF("%s: loading model\n", __func__);
515530

@@ -519,7 +534,7 @@ int main(int argc, char ** argv) {
519534
});
520535
}
521536

522-
if (!ctx_server.load_model(params)) {
537+
if (!ctx_server.load_model(first_model_params)) {
523538
clean_up();
524539
if (ctx_http.thread.joinable()) {
525540
ctx_http.thread.join();
@@ -876,7 +891,7 @@ int main(int argc, char ** argv) {
876891
return res;
877892
}));
878893

879-
ctx_http.post("/models/unload", ex_wrapper([model_mgr2 = model_manager.get(), &ctx_server, model_manager_base_model_name](const server_http_req & req) -> server_http_res_ptr {
894+
ctx_http.post("/models/unload", ex_wrapper([model_mgr2 = model_manager.get(), &ctx_server](const server_http_req & req) -> server_http_res_ptr {
880895
auto res = std::make_unique<server_http_res>();
881896
json body = json::parse(req.body);
882897
std::string name = json_value(body, "model", std::string());
@@ -893,13 +908,6 @@ int main(int argc, char ** argv) {
893908
return res;
894909
}
895910

896-
// Don't allow unloading the base model
897-
if (name == model_manager_base_model_name) {
898-
res->status = 400;
899-
res->data = safe_json_to_str({{"error", format_error_response("base model cannot be unloaded", ERROR_TYPE_INVALID_REQUEST)}});
900-
return res;
901-
}
902-
903911
model_mgr2->unload(name, ctx_server);
904912
res_ok(res, {{"success", true}});
905913
return res;

0 commit comments

Comments
 (0)