diff --git a/astrbot/core/agent/runners/tool_loop_agent_runner.py b/astrbot/core/agent/runners/tool_loop_agent_runner.py index 9351b20e1a..54466abfaa 100644 --- a/astrbot/core/agent/runners/tool_loop_agent_runner.py +++ b/astrbot/core/agent/runners/tool_loop_agent_runner.py @@ -15,10 +15,12 @@ TextResourceContents, ) +import astrbot.core.message.components as Comp from astrbot import logger from astrbot.core.agent.message import ImageURLPart, TextPart, ThinkPart from astrbot.core.agent.tool import ToolSet from astrbot.core.agent.tool_image_cache import tool_image_cache +from astrbot.core.exceptions import LLMEmptyResponseError from astrbot.core.message.components import Json from astrbot.core.message.message_event_result import ( MessageChain, @@ -219,6 +221,42 @@ async def _iter_llm_responses( else: yield await self.provider.text_chat(**payload) + def _is_empty_llm_response(self, resp: LLMResponse) -> bool: + """Check if an LLM response is effectively empty. + + This heuristic checks: + - completion_text is empty or whitespace only + - reasoning_content is empty or whitespace only + - tools_call_args is empty (no tool calls) + - result_chain has no meaningful content (Plain components with non-empty text, + or any non-Plain components like images, voice, etc.) + + Returns True if the response contains no meaningful content. + """ + completion_text_stripped = (resp.completion_text or "").strip() + reasoning_content_stripped = (resp.reasoning_content or "").strip() + + # Check result_chain for meaningful non-empty content (e.g., images, non-empty text) + has_result_chain_content = False + if resp.result_chain and resp.result_chain.chain: + for comp in resp.result_chain.chain: + # Skip empty Plain components + if isinstance(comp, Comp.Plain): + if comp.text and comp.text.strip(): + has_result_chain_content = True + break + else: + # Non-Plain components (e.g., images, voice) are considered valid content + has_result_chain_content = True + break + + return ( + not completion_text_stripped + and not reasoning_content_stripped + and not resp.tools_call_args + and not has_result_chain_content + ) + async def _iter_llm_responses_with_fallback( self, ) -> T.AsyncGenerator[LLMResponse, None]: @@ -241,11 +279,27 @@ async def _iter_llm_responses_with_fallback( has_stream_output = False try: async for resp in self._iter_llm_responses(include_model=idx == 0): + # 对于流式 chunk,不立即检查是否为空,因为单个 chunk 可能只是元数据/心跳 + # 流式响应的最终结果会在 resp.is_chunk=False 时返回 if resp.is_chunk: has_stream_output = True yield resp continue + # 如果回复为空且无工具调用 且不是最后一个回退渠道 则引发fallback + # 此处不应判断整个消息链是否为空 因为消息链包含整个对话流 而空回复可能发生在任何阶段 + # 使用辅助函数检查是否为空回复 + if ( + (resp.role == "assistant" or resp.role == "tool") + and self._is_empty_llm_response(resp) + and not is_last_candidate + ): + logger.warning( + "Chat Model %s returns empty response, trying fallback to next provider.", + candidate_id, + ) + break + if ( resp.role == "err" and not has_stream_output @@ -504,6 +558,25 @@ async def step(self): logger.warning( "LLM returned empty assistant message with no tool calls." ) + # 若所有fallback使用完毕后依然为空回复 则显示执行报错 避免静默 + base_msg = "LLM returned empty assistant message with no tool calls." + model_id = getattr(self.run_context, "model_id", None) + provider_id = getattr(self.run_context, "provider_id", None) + run_id = getattr(self.run_context, "run_id", None) + + ctx_parts = [] + if model_id is not None: + ctx_parts.append(f"model_id={model_id}") + if provider_id is not None: + ctx_parts.append(f"provider_id={provider_id}") + if run_id is not None: + ctx_parts.append(f"run_id={run_id}") + + if ctx_parts: + base_msg = f"{base_msg} Context: " + ", ".join(ctx_parts) + "." + + raise LLMEmptyResponseError(base_msg) + self.run_context.messages.append(Message(role="assistant", content=parts)) # call the on_agent_done hook diff --git a/astrbot/core/astr_main_agent.py b/astrbot/core/astr_main_agent.py index 87b1726d67..e59f71b0ce 100644 --- a/astrbot/core/astr_main_agent.py +++ b/astrbot/core/astr_main_agent.py @@ -1,6 +1,7 @@ from __future__ import annotations import asyncio +import base64 import copy import datetime import json @@ -448,7 +449,7 @@ async def _ensure_img_caption( caption = await _request_img_caption( image_caption_provider, cfg, - req.image_urls, + [await _compress_image_internal(url) for url in req.image_urls], plugin_context, ) if caption: @@ -458,6 +459,9 @@ async def _ensure_img_caption( req.image_urls = [] except Exception as exc: # noqa: BLE001 logger.error("处理图片描述失败: %s", exc) + finally: + req.extra_user_content_parts.append(TextPart(text="图片解析失败")) + req.image_urls = [] def _append_quoted_image_attachment(req: ProviderRequest, image_path: str) -> None: @@ -522,8 +526,12 @@ async def _process_quote_message( if prov and isinstance(prov, Provider): llm_resp = await prov.text_chat( - prompt=IMAGE_CAPTION_DEFAULT_PROMPT, - image_urls=[await image_seg.convert_to_file_path()], + prompt=IMAGE_CAPTION_DEFAULT_PROMPT,, + image_urls=[ + await _compress_image_internal( + await image_seg.convert_to_file_path() + ) + ], ) if llm_resp.completion_text: content_parts.append( @@ -1164,3 +1172,48 @@ async def build_main_agent( provider=provider, reset_coro=reset_coro if not apply_reset else None, ) + + +# 压缩用户上传的大体积图片 未来可以提取为通用工具 +async def _compress_image_internal(url_or_path: str) -> str: + try: + data = None + # 若为远程图片则直接返回原值 无需压缩 + if url_or_path.startswith("http"): + return url_or_path + elif url_or_path.startswith("data:image"): + header, encoded = url_or_path.split(",", 1) + data = base64.b64decode(encoded) + elif os.path.exists(url_or_path): + if os.path.getsize(url_or_path) < 1024 * 1024: + return url_or_path + with open(url_or_path, "rb") as f: + data = f.read() + if not data: + return url_or_path + import io + + from PIL import Image as PILImage + + img = PILImage.open(io.BytesIO(data)) + if img.mode in ("RGBA", "P"): + img = img.convert("RGB") + max_size = 1280 + if max(img.size) > max_size: + img.thumbnail((max_size, max_size), PILImage.LANCZOS) + out_io = io.BytesIO() + img.save(out_io, format="JPEG", quality=75, optimize=True) + temp_dir = "/www/server/python_project/AstrBot/data/temp" + if not os.path.exists(temp_dir): + os.makedirs(temp_dir) + import uuid + + temp_path = os.path.join(temp_dir, f"compressed_{uuid.uuid4().hex}.jpg") + with open(temp_path, "wb") as f: + f.write(out_io.getvalue()) + return temp_path + except Exception as e: + from astrbot.core import logger + + logger.warning(f"图片压缩失败: {e}") + return url_or_path diff --git a/astrbot/core/exceptions.py b/astrbot/core/exceptions.py index e637d4930f..bc2e896ef2 100644 --- a/astrbot/core/exceptions.py +++ b/astrbot/core/exceptions.py @@ -7,3 +7,7 @@ class AstrBotError(Exception): class ProviderNotFoundError(AstrBotError): """Raised when a specified provider is not found.""" + + +class LLMEmptyResponseError(AstrBotError): + """Raised when LLM returns an empty assistant message with no tool calls.""" diff --git a/astrbot/core/provider/sources/openai_source.py b/astrbot/core/provider/sources/openai_source.py index 2fae94e1a7..eda49edaa5 100644 --- a/astrbot/core/provider/sources/openai_source.py +++ b/astrbot/core/provider/sources/openai_source.py @@ -306,7 +306,29 @@ async def _query_stream( state = ChatCompletionStreamState() + chunk_index = 0 async for chunk in stream: + # 兼容处理:部分非标准聚合平台(如通过newapi适配层转接的 Gemini)在流式返回 tool_calls 时, + # 可能会缺失 type 字段。由于 openai SDK 的 ChatCompletionStreamState.handle_chunk + # 内部有 assert tool.type == "function" 的断言,缺少该字段会导致 AssertionError。 + # 因此,若检测到 tool_call 且 type 为空,在此处手动补全为 "function"。 + for choice in chunk.choices or []: + if not choice.delta or not choice.delta.tool_calls: + continue + for tool_call in choice.delta.tool_calls: + # 使用 getattr 处理 type 字段可能完全缺失的情况 + tool_type = getattr(tool_call, "type", None) + if ( + tool_type is None or tool_type == "" + ) and tool_call.function is not None: + logger.debug( + f"[{self.get_model()}] tool_call.type is missing or empty in chunk {chunk_index} " + f"(provider: {self.provider_config.get('id', 'unknown')}), " + f"manually set to 'function'" + ) + tool_call.type = "function" + chunk_index += 1 + try: state.handle_chunk(chunk) except Exception as e: