33import logging
44import socket
55from http import HTTPStatus
6+ from typing import Optional
67from urllib .parse import urlparse , urlunparse
78
89import aiohttp
910
1011from consts .const import DATA_PROCESS_SERVICE
1112from consts .const import MODEL_CONFIG_MAPPING
13+ from database .model_management_db import get_model_by_model_id
1214from utils .config_utils import tenant_config_manager , get_model_name_from_config
1315
1416from nexent import MessageObserver
@@ -146,14 +148,19 @@ async def proxy_image_impl(decoded_url: str):
146148 return result
147149
148150
149- def get_vlm_model (tenant_id : str ):
150- """Return the configured image understanding model for AnalyzeImageTool.
151+ def _get_model_config_by_id (tenant_id , model_id , expected_model_type ):
152+ if not model_id :
153+ return None
151154
152- The first multimodal model slot is still stored under MODEL_CONFIG_MAPPING["vlm"]
153- for compatibility, but it is the user-facing image understanding configuration.
154- """
155- vlm_model_config = tenant_config_manager .get_model_config (
156- key = MODEL_CONFIG_MAPPING ["vlm" ], tenant_id = tenant_id )
155+ model_config = get_model_by_model_id (int (model_id ), tenant_id )
156+ if not model_config :
157+ raise ValueError (f"Model not found: { model_id } " )
158+ if model_config .get ("model_type" ) != expected_model_type :
159+ raise ValueError (f"Selected model { model_id } is not a { expected_model_type } model" )
160+ return model_config
161+
162+
163+ def _build_vlm_model (vlm_model_config ):
157164 if not vlm_model_config :
158165 return None
159166 return OpenAIVLModel (
@@ -167,28 +174,34 @@ def get_vlm_model(tenant_id: str):
167174 frequency_penalty = 0.5 ,
168175 max_tokens = 512 ,
169176 ssl_verify = vlm_model_config .get ("ssl_verify" , True ),
177+ model_factory = vlm_model_config .get ("model_factory" ),
178+ display_name = vlm_model_config .get ("display_name" ),
170179 )
171180
172181
182+ def get_vlm_model (tenant_id : str , model_id : Optional [int ] = None ):
183+ """Return the configured image understanding model for AnalyzeImageTool.
184+
185+ The first multimodal model slot is still stored under MODEL_CONFIG_MAPPING["vlm"]
186+ for compatibility, but it is the user-facing image understanding configuration.
187+ """
188+ if model_id :
189+ vlm_model_config = _get_model_config_by_id (tenant_id , model_id , "vlm" )
190+ else :
191+ vlm_model_config = tenant_config_manager .get_model_config (
192+ key = MODEL_CONFIG_MAPPING ["vlm" ], tenant_id = tenant_id )
193+ return _build_vlm_model (vlm_model_config )
194+
195+
173196def get_image_understanding_model (tenant_id : str ):
174197 return get_vlm_model (tenant_id = tenant_id )
175198
176199
177- def get_video_understanding_model (tenant_id : str ):
200+ def get_video_understanding_model (tenant_id : str , model_id : Optional [ int ] = None ):
178201 """Return the configured video understanding model for multimodal tools."""
179- vlm_model_config = tenant_config_manager .get_model_config (
180- key = MODEL_CONFIG_MAPPING ["vlm3" ], tenant_id = tenant_id )
181- if not vlm_model_config :
182- return None
183- return OpenAIVLModel (
184- observer = MessageObserver (),
185- model_id = get_model_name_from_config (
186- vlm_model_config ) if vlm_model_config else "" ,
187- api_base = vlm_model_config .get ("base_url" , "" ),
188- api_key = vlm_model_config .get ("api_key" , "" ),
189- temperature = 0.7 ,
190- top_p = 0.7 ,
191- frequency_penalty = 0.5 ,
192- max_tokens = 512 ,
193- ssl_verify = vlm_model_config .get ("ssl_verify" , True ),
194- )
202+ if model_id :
203+ vlm_model_config = _get_model_config_by_id (tenant_id , model_id , "vlm3" )
204+ else :
205+ vlm_model_config = tenant_config_manager .get_model_config (
206+ key = MODEL_CONFIG_MAPPING ["vlm3" ], tenant_id = tenant_id )
207+ return _build_vlm_model (vlm_model_config )
0 commit comments