@@ -548,10 +548,18 @@ int main(int argc, char ** argv) {
548548 }
549549 SRV_INF (" autoload: %zu models to load on startup\n " , models_to_load.size ());
550550 if (!models_to_load.empty ()) {
551- if ((int )models_to_load.size () > params.models_max ) {
552- SRV_WRN (" number of models to load on startup (%zu) exceeds models_max (%d), loading first %d\n " ,
553- models_to_load.size (), params.models_max , params.models_max );
554- models_to_load.resize (params.models_max );
551+ // Account for already-loaded models (e.g., the base model loaded via ctx_server.load_model)
552+ size_t already_loaded = 0 ;
553+ for (const auto & info : model_manager->get_all_meta ()) {
554+ if (info.status == SERVER_MODEL_STATUS_LOADED ) {
555+ already_loaded++;
556+ }
557+ }
558+ if (params.models_max > 0 && (int )(models_to_load.size () + already_loaded) > params.models_max ) {
559+ size_t to_keep = params.models_max > (int )already_loaded ? params.models_max - already_loaded : 0 ;
560+ SRV_WRN (" number of models to load on startup (%zu) exceeds models_max (%d), loading first %zu\n " ,
561+ models_to_load.size (), params.models_max , to_keep);
562+ models_to_load.resize (to_keep);
555563 }
556564 for (const auto & model_name : models_to_load) {
557565 SRV_INF (" (startup) loading model %s\n " , model_name.c_str ());
@@ -741,10 +749,18 @@ int main(int argc, char ** argv) {
741749 }
742750 }
743751 if (!models_to_load.empty ()) {
744- if ((int )models_to_load.size () > params.models_max ) {
745- SRV_WRN (" number of models to load on startup (%zu) exceeds models_max (%d), loading first %d\n " ,
746- models_to_load.size (), params.models_max , params.models_max );
747- models_to_load.resize (params.models_max );
752+ // Account for already-loaded models (e.g., the base model loaded via ctx_server.load_model)
753+ size_t already_loaded = 0 ;
754+ for (const auto & info : model_manager->get_all_meta ()) {
755+ if (info.status == SERVER_MODEL_STATUS_LOADED ) {
756+ already_loaded++;
757+ }
758+ }
759+ if (params.models_max > 0 && (int )(models_to_load.size () + already_loaded) > params.models_max ) {
760+ size_t to_keep = params.models_max > (int )already_loaded ? params.models_max - already_loaded : 0 ;
761+ SRV_WRN (" number of models to load on startup (%zu) exceeds models_max (%d), loading first %zu\n " ,
762+ models_to_load.size (), params.models_max , to_keep);
763+ models_to_load.resize (to_keep);
748764 }
749765 for (const auto & model_name : models_to_load) {
750766 SRV_INF (" (startup) loading model %s\n " , model_name.c_str ());
0 commit comments