@@ -405,7 +405,11 @@ int main(int argc, char ** argv) {
405405 base_info.tags = params.model_tags ;
406406 base_info.status = SERVER_MODEL_STATUS_LOADED;
407407 base_info.last_used = ggml_time_ms ();
408- base_info.preset = cli_load_result.base_preset ;
408+ // Args after the first -- are this model's per-model preset; args before -- are
409+ // global defaults already in `params`, so on reload we re-apply only this preset.
410+ base_info.preset = cli_load_result.model_presets .empty ()
411+ ? cli_load_result.base_preset
412+ : cli_load_result.model_presets [0 ];
409413 if (!base_info.name .empty ()) {
410414 model_manager->add_model (std::move (base_info));
411415 }
@@ -506,6 +510,17 @@ int main(int argc, char ** argv) {
506510 params.model .path = cli_base_model_path;
507511 }
508512
513+ // Build the first model's load params: global defaults (`params`, args before --)
514+ // overlaid with its own per-model preset (args in the first -- block). Each model is
515+ // independent — keep `params` unmutated so other models don't inherit this one's args.
516+ common_params first_model_params = params;
517+ if (cli_has_model_presets && !cli_load_result.model_presets .empty ()) {
518+ cli_load_result.model_presets [0 ].apply_to_params (first_model_params);
519+ if (!cli_base_model_path.empty ()) {
520+ first_model_params.model .path = cli_base_model_path;
521+ }
522+ }
523+
509524 // load the model
510525 LOG_INF (" %s: loading model\n " , __func__);
511526
@@ -515,7 +530,7 @@ int main(int argc, char ** argv) {
515530 });
516531 }
517532
518- if (!ctx_server.load_model (params )) {
533+ if (!ctx_server.load_model (first_model_params )) {
519534 clean_up ();
520535 if (ctx_http.thread .joinable ()) {
521536 ctx_http.thread .join ();
@@ -872,7 +887,7 @@ int main(int argc, char ** argv) {
872887 return res;
873888 }));
874889
875- ctx_http.post (" /models/unload" , ex_wrapper ([model_mgr2 = model_manager.get (), &ctx_server, model_manager_base_model_name ](const server_http_req & req) -> server_http_res_ptr {
890+ ctx_http.post (" /models/unload" , ex_wrapper ([model_mgr2 = model_manager.get (), &ctx_server](const server_http_req & req) -> server_http_res_ptr {
876891 auto res = std::make_unique<server_http_res>();
877892 json body = json::parse (req.body );
878893 std::string name = json_value (body, " model" , std::string ());
@@ -889,13 +904,6 @@ int main(int argc, char ** argv) {
889904 return res;
890905 }
891906
892- // Don't allow unloading the base model
893- if (name == model_manager_base_model_name) {
894- res->status = 400 ;
895- res->data = safe_json_to_str ({{" error" , format_error_response (" base model cannot be unloaded" , ERROR_TYPE_INVALID_REQUEST)}});
896- return res;
897- }
898-
899907 model_mgr2->unload (name, ctx_server);
900908 res_ok (res, {{" success" , true }});
901909 return res;
0 commit comments