Skip to content

Commit a90e4dc

Browse files
committed
slop: make sure per-model settings are properly loaded
1 parent e72fda7 commit a90e4dc

2 files changed

Lines changed: 24 additions & 12 deletions

File tree

tools/server/server-model-manager.cpp

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -232,8 +232,12 @@ void server_model_manager::unload(const std::string& name, server_context& ctx)
232232
info.status = SERVER_MODEL_STATUS_UNLOADED;
233233
info.last_used = 0;
234234

235-
// Call server_context's unload
236-
ctx.unload_current_model();
235+
// Only destroy the model in ctx_server if this model is the one currently loaded.
236+
// Without this guard, unloading any model marked LOADED would destroy whatever
237+
// ctx_server happens to hold (which may be a different model).
238+
if (ctx.has_model_loaded() && ctx.get_current_model_path() == info.model_path) {
239+
ctx.unload_current_model();
240+
}
237241

238242
cv_.notify_all();
239243
}

tools/server/server.cpp

Lines changed: 18 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,11 @@ int main(int argc, char ** argv) {
405405
base_info.tags = params.model_tags;
406406
base_info.status = SERVER_MODEL_STATUS_LOADED;
407407
base_info.last_used = ggml_time_ms();
408-
base_info.preset = cli_load_result.base_preset;
408+
// Args after the first -- are this model's per-model preset; args before -- are
409+
// global defaults already in `params`, so on reload we re-apply only this preset.
410+
base_info.preset = cli_load_result.model_presets.empty()
411+
? cli_load_result.base_preset
412+
: cli_load_result.model_presets[0];
409413
if (!base_info.name.empty()) {
410414
model_manager->add_model(std::move(base_info));
411415
}
@@ -506,6 +510,17 @@ int main(int argc, char ** argv) {
506510
params.model.path = cli_base_model_path;
507511
}
508512

513+
// Build the first model's load params: global defaults (`params`, args before --)
514+
// overlaid with its own per-model preset (args in the first -- block). Each model is
515+
// independent — keep `params` unmutated so other models don't inherit this one's args.
516+
common_params first_model_params = params;
517+
if (cli_has_model_presets && !cli_load_result.model_presets.empty()) {
518+
cli_load_result.model_presets[0].apply_to_params(first_model_params);
519+
if (!cli_base_model_path.empty()) {
520+
first_model_params.model.path = cli_base_model_path;
521+
}
522+
}
523+
509524
// load the model
510525
LOG_INF("%s: loading model\n", __func__);
511526

@@ -515,7 +530,7 @@ int main(int argc, char ** argv) {
515530
});
516531
}
517532

518-
if (!ctx_server.load_model(params)) {
533+
if (!ctx_server.load_model(first_model_params)) {
519534
clean_up();
520535
if (ctx_http.thread.joinable()) {
521536
ctx_http.thread.join();
@@ -872,7 +887,7 @@ int main(int argc, char ** argv) {
872887
return res;
873888
}));
874889

875-
ctx_http.post("/models/unload", ex_wrapper([model_mgr2 = model_manager.get(), &ctx_server, model_manager_base_model_name](const server_http_req & req) -> server_http_res_ptr {
890+
ctx_http.post("/models/unload", ex_wrapper([model_mgr2 = model_manager.get(), &ctx_server](const server_http_req & req) -> server_http_res_ptr {
876891
auto res = std::make_unique<server_http_res>();
877892
json body = json::parse(req.body);
878893
std::string name = json_value(body, "model", std::string());
@@ -889,13 +904,6 @@ int main(int argc, char ** argv) {
889904
return res;
890905
}
891906

892-
// Don't allow unloading the base model
893-
if (name == model_manager_base_model_name) {
894-
res->status = 400;
895-
res->data = safe_json_to_str({{"error", format_error_response("base model cannot be unloaded", ERROR_TYPE_INVALID_REQUEST)}});
896-
return res;
897-
}
898-
899907
model_mgr2->unload(name, ctx_server);
900908
res_ok(res, {{"success", true}});
901909
return res;

0 commit comments

Comments
 (0)