@@ -161,6 +161,30 @@ void server_model_meta::update_args(common_preset_context & ctx_preset, std::str
161161 args = preset.to_args (bin_path);
162162}
163163
164+ void server_model_meta::update_caps () {
165+ try {
166+ common_params params;
167+ preset.apply_to_params (params, {
168+ " LLAMA_ARG_MODEL" ,
169+ " LLAMA_ARG_MODEL_URL" ,
170+ " LLAMA_ARG_MMPROJ" ,
171+ " LLAMA_ARG_MMPROJ_URL" ,
172+ " LLAMA_ARG_HF_REPO" ,
173+ " LLAMA_ARG_HF_REPO_FILE" ,
174+ });
175+ params.offline = true ; // avoid any unwanted network call during capability detection
176+ common_params_handle_models (params, LLAMA_EXAMPLE_SERVER);
177+ if (params.mmproj .path .empty ()) {
178+ multimodal = { false , false };
179+ } else {
180+ multimodal = mtmd_get_cap_from_file (params.mmproj .path .c_str ());
181+ }
182+ } catch (const std::exception & e) {
183+ LOG_WRN (" failed to initialize common_params for multimodal capability detection: %s\n " , e.what ());
184+ multimodal = { false , false };
185+ }
186+ }
187+
164188//
165189// server_models
166190//
@@ -236,6 +260,7 @@ void server_models::add_model(server_model_meta && meta) {
236260 }
237261
238262 meta.update_args (ctx_preset, bin_path); // render args
263+ meta.update_caps ();
239264 std::string name = meta.name ;
240265 mapping[name] = instance_t {
241266 /* subproc */ std::make_shared<subprocess_s>(),
@@ -346,8 +371,10 @@ void server_models::load_models() {
346371 /* status */ SERVER_MODEL_STATUS_UNLOADED,
347372 /* last_used */ 0 ,
348373 /* args */ std::vector<std::string>(),
374+ /* loaded_info */ {},
349375 /* exit_code */ 0 ,
350376 /* stop_timeout */ DEFAULT_STOP_TIMEOUT,
377+ /* multimodal */ mtmd_caps{false , false },
351378 };
352379 add_model (std::move (meta));
353380 }
@@ -481,6 +508,7 @@ void server_models::load_models() {
481508
482509 inst.meta .exit_code = 0 ; // clear failed state so the model can be reloaded
483510 inst.meta .update_args (ctx_preset, bin_path);
511+ inst.meta .update_caps ();
484512 }
485513
486514 // add models that are new in this reload
@@ -496,8 +524,10 @@ void server_models::load_models() {
496524 /* status */ SERVER_MODEL_STATUS_UNLOADED,
497525 /* last_used */ 0 ,
498526 /* args */ std::vector<std::string>(),
527+ /* loaded_info */ {},
499528 /* exit_code */ 0 ,
500529 /* stop_timeout */ DEFAULT_STOP_TIMEOUT,
530+ /* multimodal */ mtmd_caps{false , false },
501531 };
502532 add_model (std::move (meta));
503533 newly_added.push_back (name);
@@ -1206,14 +1236,28 @@ void server_models_routes::init_routes() {
12061236 status[" failed" ] = true ;
12071237 }
12081238
1239+ // pi coding agent multimodal compatibility
1240+ json input_modalities = json::array ({" text" });
1241+ if (meta.multimodal .inp_vision ) {
1242+ input_modalities.push_back (" image" );
1243+ }
1244+ if (meta.multimodal .inp_audio ) {
1245+ input_modalities.push_back (" audio" );
1246+ }
1247+ json architecture {
1248+ {" input_modalities" , input_modalities},
1249+ {" output_modalities" , json::array ({" text" })},
1250+ };
1251+
12091252 json model_info = json {
1210- {" id" , meta.name },
1211- {" aliases" , meta.aliases },
1212- {" tags" , meta.tags },
1213- {" object" , " model" }, // for OAI-compat
1214- {" owned_by" , " llamacpp" }, // for OAI-compat
1215- {" created" , t}, // for OAI-compat
1216- {" status" , status},
1253+ {" id" , meta.name },
1254+ {" aliases" , meta.aliases },
1255+ {" tags" , meta.tags },
1256+ {" object" , " model" }, // for OAI-compat
1257+ {" owned_by" , " llamacpp" }, // for OAI-compat
1258+ {" created" , t}, // for OAI-compat
1259+ {" status" , status},
1260+ {" architecture" , architecture},
12171261 // TODO: add other fields, may require reading GGUF metadata
12181262 };
12191263
0 commit comments