@@ -409,7 +409,11 @@ int main(int argc, char ** argv) {
409409 base_info.tags = params.model_tags ;
410410 base_info.status = SERVER_MODEL_STATUS_LOADED;
411411 base_info.last_used = ggml_time_ms ();
412- base_info.preset = cli_load_result.base_preset ;
412+ // Args after the first -- are this model's per-model preset; args before -- are
413+ // global defaults already in `params`, so on reload we re-apply only this preset.
414+ base_info.preset = cli_load_result.model_presets .empty ()
415+ ? cli_load_result.base_preset
416+ : cli_load_result.model_presets [0 ];
413417 if (!base_info.name .empty ()) {
414418 model_manager->add_model (std::move (base_info));
415419 }
@@ -510,6 +514,17 @@ int main(int argc, char ** argv) {
510514 params.model .path = cli_base_model_path;
511515 }
512516
517+ // Build the first model's load params: global defaults (`params`, args before --)
518+ // overlaid with its own per-model preset (args in the first -- block). Each model is
519+ // independent — keep `params` unmutated so other models don't inherit this one's args.
520+ common_params first_model_params = params;
521+ if (cli_has_model_presets && !cli_load_result.model_presets .empty ()) {
522+ cli_load_result.model_presets [0 ].apply_to_params (first_model_params);
523+ if (!cli_base_model_path.empty ()) {
524+ first_model_params.model .path = cli_base_model_path;
525+ }
526+ }
527+
513528 // load the model
514529 LOG_INF (" %s: loading model\n " , __func__);
515530
@@ -519,7 +534,7 @@ int main(int argc, char ** argv) {
519534 });
520535 }
521536
522- if (!ctx_server.load_model (params )) {
537+ if (!ctx_server.load_model (first_model_params )) {
523538 clean_up ();
524539 if (ctx_http.thread .joinable ()) {
525540 ctx_http.thread .join ();
@@ -876,7 +891,7 @@ int main(int argc, char ** argv) {
876891 return res;
877892 }));
878893
879- ctx_http.post (" /models/unload" , ex_wrapper ([model_mgr2 = model_manager.get (), &ctx_server, model_manager_base_model_name ](const server_http_req & req) -> server_http_res_ptr {
894+ ctx_http.post (" /models/unload" , ex_wrapper ([model_mgr2 = model_manager.get (), &ctx_server](const server_http_req & req) -> server_http_res_ptr {
880895 auto res = std::make_unique<server_http_res>();
881896 json body = json::parse (req.body );
882897 std::string name = json_value (body, " model" , std::string ());
@@ -893,13 +908,6 @@ int main(int argc, char ** argv) {
893908 return res;
894909 }
895910
896- // Don't allow unloading the base model
897- if (name == model_manager_base_model_name) {
898- res->status = 400 ;
899- res->data = safe_json_to_str ({{" error" , format_error_response (" base model cannot be unloaded" , ERROR_TYPE_INVALID_REQUEST)}});
900- return res;
901- }
902-
903911 model_mgr2->unload (name, ctx_server);
904912 res_ok (res, {{" success" , true }});
905913 return res;
0 commit comments