Skip to content

Commit 068b418

Browse files
authored
πŸ› Bugfix: Multimodal tools support user model selection (#3249)
* 111 * issue_solve * testcase_fix * test_fix * Remove unrelated unstructured filename metadata change
1 parent 9e1d8ff commit 068b418

17 files changed

Lines changed: 229 additions & 56 deletions

β€Žbackend/agents/create_agent_info.pyβ€Ž

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -578,6 +578,7 @@ async def create_agent_config(
578578
system_prompt = Template(prompt_template["system_prompt"], undefined=StrictUndefined).render(render_kwargs)
579579

580580
model_id_to_use = override_model_id if override_model_id else agent_info.get("model_id")
581+
model_info = None
581582
model_max_tokens = 10000
582583
if model_id_to_use is not None:
583584
model_info = get_model_by_model_id(model_id_to_use, tenant_id=tenant_id)
@@ -587,6 +588,14 @@ async def create_agent_config(
587588
else:
588589
model_name = "main_model"
589590

591+
logger.info(
592+
"Agent main LLM: agent_id=%s, model_id=%s, display_name=%s, model_name=%s",
593+
agent_id,
594+
model_id_to_use,
595+
model_info.get("display_name") if model_info else model_name,
596+
model_info.get("model_name") if model_info else model_name,
597+
)
598+
590599
# Use agent-level setting for context management, default to False.
591600
# When ContextManager is disabled, do not attach context_components because
592601
# downstream runtime may prefer component-based prompt assembly over the
@@ -759,22 +768,25 @@ async def create_tool_config_list(
759768
"rerank_model": rerank_model,
760769
}
761770
elif tool_config.class_name == "AnalyzeTextFileTool":
771+
selected_model_id = param_dict.get("selected_model_id")
762772
tool_config.metadata = {
763-
"llm_model": get_llm_model(tenant_id=tenant_id),
773+
"llm_model": get_llm_model(tenant_id=tenant_id, model_id=selected_model_id),
764774
"storage_client": minio_client,
765775
"data_process_service_url": DATA_PROCESS_SERVICE,
766776
"validate_url_access": lambda urls: validate_urls_access(urls, user_id)
767777
}
768778
elif tool_config.class_name == "AnalyzeImageTool":
779+
selected_model_id = param_dict.get("selected_model_id")
769780
tool_config.metadata = {
770781
# get_vlm_model reads the first multimodal slot, now shown as image understanding.
771-
"vlm_model": get_vlm_model(tenant_id=tenant_id),
782+
"vlm_model": get_vlm_model(tenant_id=tenant_id, model_id=selected_model_id),
772783
"storage_client": minio_client,
773784
"validate_url_access": lambda urls: validate_urls_access(urls, user_id)
774785
}
775786
elif tool_config.class_name in ["AnalyzeAudioTool", "AnalyzeVideoTool"]:
787+
selected_model_id = param_dict.get("selected_model_id")
776788
tool_config.metadata = {
777-
"vlm_model": get_video_understanding_model(tenant_id=tenant_id),
789+
"vlm_model": get_video_understanding_model(tenant_id=tenant_id, model_id=selected_model_id),
778790
"storage_client": minio_client,
779791
"validate_url_access": lambda urls: validate_urls_access(urls, user_id)
780792
}

β€Žbackend/services/file_management_service.pyβ€Ž

Lines changed: 25 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
list_files,
3434
upload_fileobj,
3535
)
36+
from database.model_management_db import get_model_by_model_id
3637
from services.vectordatabase_service import ElasticSearchService, get_vector_db_core
3738
from utils.config_utils import tenant_config_manager, get_model_name_from_config
3839
from utils.file_management_utils import save_upload_file
@@ -448,20 +449,39 @@ async def list_files_impl(prefix: str, limit: Optional[int] = None):
448449
return files
449450

450451

451-
def get_llm_model(tenant_id: str):
452-
# Get the tenant config
453-
main_model_config = tenant_config_manager.get_model_config(
454-
key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id)
452+
def get_llm_model(tenant_id: str, model_id: Optional[int] = None):
453+
if model_id:
454+
main_model_config = get_model_by_model_id(int(model_id), tenant_id)
455+
if not main_model_config:
456+
raise ValueError(f"Model not found: {model_id}")
457+
if main_model_config.get("model_type") != "llm":
458+
raise ValueError(f"Selected model {model_id} is not an LLM model")
459+
else:
460+
# Get the tenant config
461+
main_model_config = tenant_config_manager.get_model_config(
462+
key=MODEL_CONFIG_MAPPING["llm"], tenant_id=tenant_id)
455463
timeout_seconds = main_model_config.get(
456464
"timeout_seconds") if main_model_config else None
465+
466+
resolved_model_name = get_model_name_from_config(main_model_config)
467+
468+
logger.info(
469+
"Using LLM model for analyze_text_file: model_id=%s, display_name=%s, model_name=%s",
470+
model_id,
471+
main_model_config.get("display_name") if main_model_config else None,
472+
resolved_model_name
473+
)
474+
457475
long_text_to_text_model = OpenAILongContextModel(
458476
observer=MessageObserver(),
459-
model_id=get_model_name_from_config(main_model_config),
477+
model_id=resolved_model_name,
460478
api_base=main_model_config.get("base_url"),
461479
api_key=main_model_config.get("api_key"),
462480
max_context_tokens=main_model_config.get("max_tokens"),
463481
ssl_verify=main_model_config.get("ssl_verify", True),
464482
timeout_seconds=timeout_seconds,
483+
model_factory=main_model_config.get("model_factory"),
484+
display_name=main_model_config.get("display_name"),
465485
)
466486
return long_text_to_text_model
467487

β€Žbackend/services/image_service.pyβ€Ž

Lines changed: 37 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -3,12 +3,14 @@
33
import logging
44
import socket
55
from http import HTTPStatus
6+
from typing import Optional
67
from urllib.parse import urlparse, urlunparse
78

89
import aiohttp
910

1011
from consts.const import DATA_PROCESS_SERVICE
1112
from consts.const import MODEL_CONFIG_MAPPING
13+
from database.model_management_db import get_model_by_model_id
1214
from utils.config_utils import tenant_config_manager, get_model_name_from_config
1315

1416
from nexent import MessageObserver
@@ -146,14 +148,19 @@ async def proxy_image_impl(decoded_url: str):
146148
return result
147149

148150

149-
def get_vlm_model(tenant_id: str):
150-
"""Return the configured image understanding model for AnalyzeImageTool.
151+
def _get_model_config_by_id(tenant_id, model_id, expected_model_type):
152+
if not model_id:
153+
return None
151154

152-
The first multimodal model slot is still stored under MODEL_CONFIG_MAPPING["vlm"]
153-
for compatibility, but it is the user-facing image understanding configuration.
154-
"""
155-
vlm_model_config = tenant_config_manager.get_model_config(
156-
key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id)
155+
model_config = get_model_by_model_id(int(model_id), tenant_id)
156+
if not model_config:
157+
raise ValueError(f"Model not found: {model_id}")
158+
if model_config.get("model_type") != expected_model_type:
159+
raise ValueError(f"Selected model {model_id} is not a {expected_model_type} model")
160+
return model_config
161+
162+
163+
def _build_vlm_model(vlm_model_config):
157164
if not vlm_model_config:
158165
return None
159166
return OpenAIVLModel(
@@ -167,28 +174,34 @@ def get_vlm_model(tenant_id: str):
167174
frequency_penalty=0.5,
168175
max_tokens=512,
169176
ssl_verify=vlm_model_config.get("ssl_verify", True),
177+
model_factory=vlm_model_config.get("model_factory"),
178+
display_name=vlm_model_config.get("display_name"),
170179
)
171180

172181

182+
def get_vlm_model(tenant_id: str, model_id: Optional[int] = None):
183+
"""Return the configured image understanding model for AnalyzeImageTool.
184+
185+
The first multimodal model slot is still stored under MODEL_CONFIG_MAPPING["vlm"]
186+
for compatibility, but it is the user-facing image understanding configuration.
187+
"""
188+
if model_id:
189+
vlm_model_config = _get_model_config_by_id(tenant_id, model_id, "vlm")
190+
else:
191+
vlm_model_config = tenant_config_manager.get_model_config(
192+
key=MODEL_CONFIG_MAPPING["vlm"], tenant_id=tenant_id)
193+
return _build_vlm_model(vlm_model_config)
194+
195+
173196
def get_image_understanding_model(tenant_id: str):
174197
return get_vlm_model(tenant_id=tenant_id)
175198

176199

177-
def get_video_understanding_model(tenant_id: str):
200+
def get_video_understanding_model(tenant_id: str, model_id: Optional[int] = None):
178201
"""Return the configured video understanding model for multimodal tools."""
179-
vlm_model_config = tenant_config_manager.get_model_config(
180-
key=MODEL_CONFIG_MAPPING["vlm3"], tenant_id=tenant_id)
181-
if not vlm_model_config:
182-
return None
183-
return OpenAIVLModel(
184-
observer=MessageObserver(),
185-
model_id=get_model_name_from_config(
186-
vlm_model_config) if vlm_model_config else "",
187-
api_base=vlm_model_config.get("base_url", ""),
188-
api_key=vlm_model_config.get("api_key", ""),
189-
temperature=0.7,
190-
top_p=0.7,
191-
frequency_penalty=0.5,
192-
max_tokens=512,
193-
ssl_verify=vlm_model_config.get("ssl_verify", True),
194-
)
202+
if model_id:
203+
vlm_model_config = _get_model_config_by_id(tenant_id, model_id, "vlm3")
204+
else:
205+
vlm_model_config = tenant_config_manager.get_model_config(
206+
key=MODEL_CONFIG_MAPPING["vlm3"], tenant_id=tenant_id)
207+
return _build_vlm_model(vlm_model_config)

β€Žbackend/services/northbound_service.pyβ€Ž

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,7 @@ def _normalize_northbound_attachments(
133133
tenant_id: str,
134134
) -> Optional[List[Dict[str, Any]]]:
135135
"""Convert northbound attachment references into internal minio_files objects.
136-
136+
137137
Supports two formats:
138138
1. List of S3 URL strings (backward compatible): ["s3://nexent/...", "/nexent/...", "attachments/..."]
139139
2. List of attachment objects (full metadata): [{"object_name": "...", "name": "...", ...}]

β€Žbackend/services/tool_configuration_service.pyβ€Ž

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -815,7 +815,8 @@ def _validate_local_tool(
815815
raise ToolExecutionException(
816816
f"Tenant ID and User ID are required for {tool_name} validation")
817817
# get_vlm_model reads the first multimodal slot, now shown as image understanding.
818-
image_to_text_model = get_vlm_model(tenant_id=tenant_id)
818+
selected_model_id = instantiation_params.get("selected_model_id")
819+
image_to_text_model = get_vlm_model(tenant_id=tenant_id, model_id=selected_model_id)
819820
vlm_display_name = getattr(
820821
image_to_text_model, 'display_name', None)
821822
set_monitoring_context(tenant_id=tenant_id)
@@ -832,7 +833,8 @@ def _validate_local_tool(
832833
if not tenant_id or not user_id:
833834
raise ToolExecutionException(
834835
f"Tenant ID and User ID are required for {tool_name} validation")
835-
video_understanding_model = get_video_understanding_model(tenant_id=tenant_id)
836+
selected_model_id = instantiation_params.get("selected_model_id")
837+
video_understanding_model = get_video_understanding_model(tenant_id=tenant_id, model_id=selected_model_id)
836838
model_display_name = getattr(
837839
video_understanding_model, 'display_name', None)
838840
set_monitoring_context(tenant_id=tenant_id)
@@ -849,7 +851,8 @@ def _validate_local_tool(
849851
if not tenant_id or not user_id:
850852
raise ToolExecutionException(
851853
f"Tenant ID and User ID are required for {tool_name} validation")
852-
long_text_to_text_model = get_llm_model(tenant_id=tenant_id)
854+
selected_model_id = instantiation_params.get("selected_model_id")
855+
long_text_to_text_model = get_llm_model(tenant_id=tenant_id, model_id=selected_model_id)
853856
llm_display_name = getattr(
854857
long_text_to_text_model, 'display_name', None)
855858
set_monitoring_context(tenant_id=tenant_id)

0 commit comments

Comments
Β (0)