Skip to content

Commit d4e7aa0

Browse files
authored
fix: avoid duplicate quoted image captions (#8718)
1 parent 690b184 commit d4e7aa0

2 files changed

Lines changed: 135 additions & 49 deletions

File tree

astrbot/core/astr_main_agent.py

Lines changed: 64 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,7 @@ async def _process_quote_message(
776776
quoted_message_settings: QuotedMessageParserSettings = DEFAULT_QUOTED_MESSAGE_SETTINGS,
777777
config: MainAgentBuildConfig | None = None,
778778
main_provider_supports_image: bool = False,
779+
skip_quote_image_caption: bool = False,
779780
) -> None:
780781
quote = None
781782
for comp in event.message_obj.message:
@@ -805,54 +806,63 @@ async def _process_quote_message(
805806
image_seg = comp
806807
break
807808

808-
if image_seg and main_provider_supports_image:
809-
logger.debug(
810-
"Skipping quote image captioning because the main provider supports image input."
811-
)
812-
elif image_seg and not img_cap_prov_id:
813-
logger.debug(
814-
"No dedicated image caption provider configured. "
815-
"Skipping quote image captioning."
816-
)
817-
elif image_seg:
818-
try:
819-
prov = None
820-
path = None
821-
compress_path = None
822-
prov = plugin_context.get_provider_by_id(img_cap_prov_id)
823-
if prov is None:
824-
prov = plugin_context.get_using_provider(event.unified_msg_origin)
825-
826-
if prov and isinstance(prov, Provider):
827-
path = await image_seg.convert_to_file_path()
828-
compress_path = await _compress_image_for_provider(
829-
path,
830-
config.provider_settings if config else None,
831-
)
832-
if path and _is_generated_compressed_image_path(path, compress_path):
833-
event.track_temporary_local_file(compress_path)
834-
llm_resp = await prov.text_chat(
835-
prompt="Please describe the image content.",
836-
image_urls=[compress_path],
837-
)
838-
if llm_resp.completion_text:
839-
content_parts.append(
840-
f"[Image Caption in quoted message]: {llm_resp.completion_text}"
809+
if image_seg:
810+
if skip_quote_image_caption:
811+
logger.debug(
812+
"Skipping quote image captioning because image captioning already handled this request."
813+
)
814+
elif main_provider_supports_image:
815+
logger.debug(
816+
"Skipping quote image captioning because the main provider supports image input."
817+
)
818+
elif not img_cap_prov_id:
819+
logger.debug(
820+
"No dedicated image caption provider configured. "
821+
"Skipping quote image captioning."
822+
)
823+
else:
824+
try:
825+
prov = None
826+
path = None
827+
compress_path = None
828+
prov = plugin_context.get_provider_by_id(img_cap_prov_id)
829+
if prov is None:
830+
prov = plugin_context.get_using_provider(event.unified_msg_origin)
831+
832+
if prov and isinstance(prov, Provider):
833+
path = await image_seg.convert_to_file_path()
834+
compress_path = await _compress_image_for_provider(
835+
path,
836+
config.provider_settings if config else None,
841837
)
842-
else:
843-
logger.warning("No provider found for image captioning in quote.")
844-
except BaseException as exc:
845-
logger.error("处理引用图片失败: %s", exc)
846-
finally:
847-
if (
848-
compress_path
849-
and compress_path != path
850-
and os.path.exists(compress_path)
851-
):
852-
try:
853-
os.remove(compress_path)
854-
except Exception as exc: # noqa: BLE001
855-
logger.warning("Fail to remove temporary compressed image: %s", exc)
838+
if path and _is_generated_compressed_image_path(
839+
path, compress_path
840+
):
841+
event.track_temporary_local_file(compress_path)
842+
llm_resp = await prov.text_chat(
843+
prompt="Please describe the image content.",
844+
image_urls=[compress_path],
845+
)
846+
if llm_resp.completion_text:
847+
content_parts.append(
848+
f"[Image Caption in quoted message]: {llm_resp.completion_text}"
849+
)
850+
else:
851+
logger.warning("No provider found for image captioning in quote.")
852+
except BaseException as exc:
853+
logger.error("处理引用图片失败: %s", exc)
854+
finally:
855+
if (
856+
compress_path
857+
and compress_path != path
858+
and os.path.exists(compress_path)
859+
):
860+
try:
861+
os.remove(compress_path)
862+
except Exception as exc: # noqa: BLE001
863+
logger.warning(
864+
"Fail to remove temporary compressed image: %s", exc
865+
)
856866

857867
quoted_content = "\n".join(content_parts)
858868
quoted_text = f"<Quoted Message>\n{quoted_content}\n</Quoted Message>"
@@ -918,11 +928,12 @@ async def _decorate_llm_request(
918928
main_provider_supports_image = provider is not None and _provider_supports_modality(
919929
provider, "image"
920930
)
931+
img_cap_prov_id: str = cfg.get("default_image_caption_provider_id") or ""
932+
quote_images_already_captioned = False
921933

922934
if req.conversation:
923935
await _ensure_persona_and_skills(req, cfg, plugin_context, event)
924936

925-
img_cap_prov_id: str = cfg.get("default_image_caption_provider_id") or ""
926937
if img_cap_prov_id and req.image_urls and not main_provider_supports_image:
927938
await _ensure_img_caption(
928939
event,
@@ -931,8 +942,11 @@ async def _decorate_llm_request(
931942
plugin_context,
932943
img_cap_prov_id,
933944
)
945+
quote_images_already_captioned = any(
946+
"<image_caption>" in getattr(part, "text", "")
947+
for part in req.extra_user_content_parts
948+
)
934949

935-
img_cap_prov_id = cfg.get("default_image_caption_provider_id") or ""
936950
quoted_message_settings = _get_quoted_message_parser_settings(cfg)
937951
await _process_quote_message(
938952
event,
@@ -942,6 +956,7 @@ async def _decorate_llm_request(
942956
quoted_message_settings,
943957
config,
944958
main_provider_supports_image=main_provider_supports_image,
959+
skip_quote_image_caption=quote_images_already_captioned,
945960
)
946961

947962
tz = config.timezone

tests/unit/test_astr_main_agent.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,6 +1247,77 @@ async def test_build_main_agent_skips_caption_when_main_provider_supports_images
12471247
)
12481248
mock_provider.text_chat.assert_not_called()
12491249

1250+
@pytest.mark.asyncio
1251+
async def test_build_main_agent_does_not_caption_quoted_image_twice(
1252+
self, mock_event, mock_context
1253+
):
1254+
"""Quoted images should not be captioned again after request image captioning."""
1255+
module = ama
1256+
text_provider = MagicMock(spec=Provider)
1257+
text_provider.provider_config = {
1258+
"id": "text-provider",
1259+
"modalities": ["text", "tool_use"],
1260+
}
1261+
text_provider.get_model.return_value = "text-model"
1262+
1263+
caption_provider = MagicMock(spec=Provider)
1264+
caption_provider.text_chat = AsyncMock(
1265+
return_value=MagicMock(completion_text="quoted image caption")
1266+
)
1267+
1268+
mock_reply = Reply(
1269+
id="reply-1",
1270+
chain=[Plain(text="quoted text"), Image(file="file:///tmp/quoted.jpg")],
1271+
sender_nickname="Alice",
1272+
message_str="quoted text",
1273+
)
1274+
mock_event.message_obj.message = [Plain(text="Hello"), mock_reply]
1275+
1276+
mock_context.get_provider_by_id.return_value = caption_provider
1277+
mock_context.get_using_provider.return_value = text_provider
1278+
mock_context.get_config.return_value = {}
1279+
1280+
conv_mgr = mock_context.conversation_manager
1281+
_setup_conversation_for_build(conv_mgr)
1282+
1283+
with (
1284+
patch("astrbot.core.astr_main_agent.AgentRunner") as mock_runner_cls,
1285+
patch("astrbot.core.astr_main_agent.AstrAgentContext"),
1286+
patch.object(
1287+
Image,
1288+
"convert_to_file_path",
1289+
AsyncMock(return_value="/tmp/quoted.jpg"),
1290+
),
1291+
patch(
1292+
"astrbot.core.astr_main_agent._compress_image_for_provider",
1293+
AsyncMock(side_effect=lambda path, _settings: path),
1294+
),
1295+
):
1296+
mock_runner = MagicMock()
1297+
mock_runner.reset = AsyncMock()
1298+
mock_runner_cls.return_value = mock_runner
1299+
1300+
result = await module.build_main_agent(
1301+
event=mock_event,
1302+
plugin_context=mock_context,
1303+
config=module.MainAgentBuildConfig(
1304+
tool_call_timeout=60,
1305+
provider_settings={
1306+
"default_image_caption_provider_id": "caption-provider",
1307+
},
1308+
),
1309+
provider=text_provider,
1310+
)
1311+
1312+
assert result is not None
1313+
assert caption_provider.text_chat.await_count == 1
1314+
1315+
extra_text = "\n".join(
1316+
part.text for part in result.provider_request.extra_user_content_parts
1317+
)
1318+
assert "<image_caption>quoted image caption</image_caption>" in extra_text
1319+
assert "[Image Caption in quoted message]" not in extra_text
1320+
12501321
@pytest.mark.asyncio
12511322
async def test_build_main_agent_uses_image_fallback_provider(
12521323
self, mock_event, mock_context

0 commit comments

Comments
 (0)