Skip to content

Commit 0bcc662

Browse files
BlueteemoSisyphbaous-DT-ProjectSoulter
committed
fix: prevent duplicate processing of quoted images by multimodal main providers when no dedicated image caption provider is configured (#8401)
Co-authored-by: C₂₂H₂₅NO₆ <Sisyphbaous-DT-Project@users.noreply.github.com> Co-authored-by: Soulter <905617992@qq.com>
1 parent e960c14 commit 0bcc662

2 files changed

Lines changed: 75 additions & 5 deletions

File tree

astrbot/core/astr_main_agent.py

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -752,6 +752,7 @@ async def _process_quote_message(
752752
plugin_context: Context,
753753
quoted_message_settings: QuotedMessageParserSettings = DEFAULT_QUOTED_MESSAGE_SETTINGS,
754754
config: MainAgentBuildConfig | None = None,
755+
main_provider_supports_image: bool = False,
755756
) -> None:
756757
quote = None
757758
for comp in event.message_obj.message:
@@ -781,13 +782,21 @@ async def _process_quote_message(
781782
image_seg = comp
782783
break
783784

784-
if image_seg:
785+
if image_seg and main_provider_supports_image:
786+
logger.debug(
787+
"Skipping quote image captioning because the main provider supports image input."
788+
)
789+
elif image_seg and not img_cap_prov_id:
790+
logger.debug(
791+
"No dedicated image caption provider configured. "
792+
"Skipping quote image captioning."
793+
)
794+
elif image_seg:
785795
try:
786796
prov = None
787797
path = None
788798
compress_path = None
789-
if img_cap_prov_id:
790-
prov = plugin_context.get_provider_by_id(img_cap_prov_id)
799+
prov = plugin_context.get_provider_by_id(img_cap_prov_id)
791800
if prov is None:
792801
prov = plugin_context.get_using_provider(event.unified_msg_origin)
793802

@@ -876,18 +885,23 @@ async def _decorate_llm_request(
876885
req: ProviderRequest,
877886
plugin_context: Context,
878887
config: MainAgentBuildConfig,
888+
provider: Provider | None = None,
879889
) -> None:
880890
cfg = config.provider_settings or plugin_context.get_config(
881891
umo=event.unified_msg_origin
882892
).get("provider_settings", {})
883893

884894
_apply_prompt_prefix(req, cfg)
885895

896+
main_provider_supports_image = provider is not None and _provider_supports_modality(
897+
provider, "image"
898+
)
899+
886900
if req.conversation:
887901
await _ensure_persona_and_skills(req, cfg, plugin_context, event)
888902

889903
img_cap_prov_id: str = cfg.get("default_image_caption_provider_id") or ""
890-
if img_cap_prov_id and req.image_urls:
904+
if img_cap_prov_id and req.image_urls and not main_provider_supports_image:
891905
await _ensure_img_caption(
892906
event,
893907
req,
@@ -905,6 +919,7 @@ async def _decorate_llm_request(
905919
plugin_context,
906920
quoted_message_settings,
907921
config,
922+
main_provider_supports_image=main_provider_supports_image,
908923
)
909924

910925
tz = config.timezone
@@ -1418,7 +1433,7 @@ async def build_main_agent(
14181433
else:
14191434
return None
14201435

1421-
await _decorate_llm_request(event, req, plugin_context, config)
1436+
await _decorate_llm_request(event, req, plugin_context, config, provider=provider)
14221437

14231438
await _apply_kb(event, req, plugin_context, config)
14241439

tests/unit/test_astr_main_agent.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1085,6 +1085,61 @@ async def test_build_main_agent_with_images(
10851085

10861086
assert result is not None
10871087

1088+
@pytest.mark.asyncio
1089+
async def test_build_main_agent_skips_caption_when_main_provider_supports_images(
1090+
self, mock_event, mock_context, mock_provider
1091+
):
1092+
"""Test image-capable chat providers receive quoted images directly."""
1093+
module = ama
1094+
mock_image = Image(file="file:///tmp/quoted.jpg")
1095+
mock_reply = Reply(
1096+
id="reply-1",
1097+
chain=[Plain(text="quoted text"), mock_image],
1098+
sender_nickname="",
1099+
message_str="quoted text",
1100+
)
1101+
mock_event.message_obj.message = [Plain(text="Hello"), mock_reply]
1102+
1103+
mock_context.get_provider_by_id.return_value = None
1104+
mock_context.get_using_provider.return_value = mock_provider
1105+
mock_context.get_config.return_value = {}
1106+
1107+
conv_mgr = mock_context.conversation_manager
1108+
_setup_conversation_for_build(conv_mgr)
1109+
1110+
with (
1111+
patch("astrbot.core.astr_main_agent.AgentRunner") as mock_runner_cls,
1112+
patch("astrbot.core.astr_main_agent.AstrAgentContext"),
1113+
patch.object(
1114+
Image,
1115+
"convert_to_file_path",
1116+
AsyncMock(return_value="/tmp/quoted.jpg"),
1117+
),
1118+
):
1119+
mock_runner = MagicMock()
1120+
mock_runner.reset = AsyncMock()
1121+
mock_runner_cls.return_value = mock_runner
1122+
1123+
result = await module.build_main_agent(
1124+
event=mock_event,
1125+
plugin_context=mock_context,
1126+
config=module.MainAgentBuildConfig(
1127+
tool_call_timeout=60,
1128+
provider_settings={
1129+
"default_image_caption_provider_id": "caption-provider",
1130+
},
1131+
),
1132+
provider=mock_provider,
1133+
)
1134+
1135+
assert result is not None
1136+
assert result.provider_request.image_urls == ["/tmp/quoted.jpg"]
1137+
assert not any(
1138+
"Image Caption" in part.text or "<image_caption>" in part.text
1139+
for part in result.provider_request.extra_user_content_parts
1140+
)
1141+
mock_provider.text_chat.assert_not_called()
1142+
10881143
@pytest.mark.asyncio
10891144
async def test_build_main_agent_uses_image_fallback_provider(
10901145
self, mock_event, mock_context

0 commit comments

Comments
 (0)