@@ -543,10 +543,18 @@ int main(int argc, char ** argv) {
543543 }
544544 SRV_INF (" autoload: %zu models to load on startup\n " , models_to_load.size ());
545545 if (!models_to_load.empty ()) {
546- if ((int )models_to_load.size () > params.models_max ) {
547- SRV_WRN (" number of models to load on startup (%zu) exceeds models_max (%d), loading first %d\n " ,
548- models_to_load.size (), params.models_max , params.models_max );
549- models_to_load.resize (params.models_max );
546+ // Account for already-loaded models (e.g., the base model loaded via ctx_server.load_model)
547+ size_t already_loaded = 0 ;
548+ for (const auto & info : model_manager->get_all_meta ()) {
549+ if (info.status == SERVER_MODEL_STATUS_LOADED) {
550+ already_loaded++;
551+ }
552+ }
553+ if (params.models_max > 0 && (int )(models_to_load.size () + already_loaded) > params.models_max ) {
554+ size_t to_keep = params.models_max > (int )already_loaded ? params.models_max - already_loaded : 0 ;
555+ SRV_WRN (" number of models to load on startup (%zu) exceeds models_max (%d), loading first %zu\n " ,
556+ models_to_load.size (), params.models_max , to_keep);
557+ models_to_load.resize (to_keep);
550558 }
551559 for (const auto & model_name : models_to_load) {
552560 SRV_INF (" (startup) loading model %s\n " , model_name.c_str ());
@@ -736,10 +744,18 @@ int main(int argc, char ** argv) {
736744 }
737745 }
738746 if (!models_to_load.empty ()) {
739- if ((int )models_to_load.size () > params.models_max ) {
740- SRV_WRN (" number of models to load on startup (%zu) exceeds models_max (%d), loading first %d\n " ,
741- models_to_load.size (), params.models_max , params.models_max );
742- models_to_load.resize (params.models_max );
747+ // Account for already-loaded models (e.g., the base model loaded via ctx_server.load_model)
748+ size_t already_loaded = 0 ;
749+ for (const auto & info : model_manager->get_all_meta ()) {
750+ if (info.status == SERVER_MODEL_STATUS_LOADED) {
751+ already_loaded++;
752+ }
753+ }
754+ if (params.models_max > 0 && (int )(models_to_load.size () + already_loaded) > params.models_max ) {
755+ size_t to_keep = params.models_max > (int )already_loaded ? params.models_max - already_loaded : 0 ;
756+ SRV_WRN (" number of models to load on startup (%zu) exceeds models_max (%d), loading first %zu\n " ,
757+ models_to_load.size (), params.models_max , to_keep);
758+ models_to_load.resize (to_keep);
743759 }
744760 for (const auto & model_name : models_to_load) {
745761 SRV_INF (" (startup) loading model %s\n " , model_name.c_str ());
0 commit comments